Merge tag 'ieee802154-for-net-2021-11-24' of git://git.kernel.org/pub/scm/linux/kerne...
authorJakub Kicinski <kuba@kernel.org>
Thu, 25 Nov 2021 01:02:43 +0000 (17:02 -0800)
committerJakub Kicinski <kuba@kernel.org>
Thu, 25 Nov 2021 01:02:44 +0000 (17:02 -0800)
Stefan Schmidt says:

====================
pull-request: ieee802154 for net 2021-11-24

A fix from Alexander which has been brought up various times found by
automated checkers. Make sure values are in u32 range.

* tag 'ieee802154-for-net-2021-11-24' of git://git.kernel.org/pub/scm/linux/kernel/git/sschmidt/wpan:
  net: ieee802154: handle iftypes as u32
====================

Link: https://lore.kernel.org/r/20211124150934.3670248-1-stefan@datenfreihafen.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
825 files changed:
Documentation/ABI/testing/sysfs-fs-f2fs
Documentation/admin-guide/sysctl/kernel.rst
Documentation/arm/marvell.rst
Documentation/bpf/index.rst
Documentation/devicetree/bindings/arm/sti.yaml
Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
Documentation/devicetree/bindings/arm/stm32/stm32.yaml
Documentation/devicetree/bindings/clock/ingenic,cgu.yaml
Documentation/devicetree/bindings/clock/sifive/fu740-prci.yaml
Documentation/devicetree/bindings/clock/silabs,si5351.txt
Documentation/devicetree/bindings/clock/socionext,uniphier-clock.yaml
Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml
Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml
Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml
Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml
Documentation/devicetree/bindings/display/bridge/snps,dw-mipi-dsi.yaml
Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt [deleted file]
Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/display/ingenic,ipu.yaml
Documentation/devicetree/bindings/display/ingenic,lcd.yaml
Documentation/devicetree/bindings/display/panel/orisetech,otm8009a.yaml
Documentation/devicetree/bindings/display/panel/raydium,rm68200.yaml
Documentation/devicetree/bindings/display/st,stm32-dsi.yaml
Documentation/devicetree/bindings/display/st,stm32-ltdc.yaml
Documentation/devicetree/bindings/dma/ingenic,dma.yaml
Documentation/devicetree/bindings/dma/st,stm32-dma.yaml
Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml
Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml
Documentation/devicetree/bindings/gpio/gpio-xlp.txt [deleted file]
Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml
Documentation/devicetree/bindings/i2c/i2c-imx.yaml
Documentation/devicetree/bindings/i2c/i2c-xlp9xx.txt [deleted file]
Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml
Documentation/devicetree/bindings/i2c/st,stm32-i2c.yaml
Documentation/devicetree/bindings/iio/adc/ingenic,adc.yaml
Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml
Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml
Documentation/devicetree/bindings/iio/adc/st,stm32-dfsdm-adc.yaml
Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml
Documentation/devicetree/bindings/interrupt-controller/st,stm32-exti.yaml
Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml
Documentation/devicetree/bindings/media/qcom,sc7280-venus.yaml
Documentation/devicetree/bindings/media/st,stm32-cec.yaml
Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml
Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml
Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml
Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml
Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml
Documentation/devicetree/bindings/mfd/st,stmfx.yaml
Documentation/devicetree/bindings/mfd/st,stpmic1.yaml
Documentation/devicetree/bindings/mips/ingenic/ingenic,cpu.yaml
Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml
Documentation/devicetree/bindings/mtd/ingenic,nand.yaml
Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml
Documentation/devicetree/bindings/net/ingenic,mac.yaml
Documentation/devicetree/bindings/net/snps,dwmac.yaml
Documentation/devicetree/bindings/net/stm32-dwmac.yaml
Documentation/devicetree/bindings/nvmem/ingenic,jz4780-efuse.yaml
Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml
Documentation/devicetree/bindings/phy/ingenic,phy-usb.yaml
Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml
Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml
Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml
Documentation/devicetree/bindings/regulator/st,stm32mp1-pwr-reg.yaml
Documentation/devicetree/bindings/remoteproc/ingenic,vpu.yaml
Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml
Documentation/devicetree/bindings/rng/ingenic,trng.yaml
Documentation/devicetree/bindings/rng/st,stm32-rng.yaml
Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml
Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml
Documentation/devicetree/bindings/serial/ingenic,uart.yaml
Documentation/devicetree/bindings/serial/st,stm32-uart.yaml
Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml
Documentation/devicetree/bindings/sound/ingenic,aic.yaml
Documentation/devicetree/bindings/sound/ingenic,codec.yaml
Documentation/devicetree/bindings/sound/st,stm32-i2s.yaml
Documentation/devicetree/bindings/sound/st,stm32-sai.yaml
Documentation/devicetree/bindings/sound/st,stm32-spdifrx.yaml
Documentation/devicetree/bindings/spi/ingenic,spi.yaml
Documentation/devicetree/bindings/spi/spi-xlp.txt [deleted file]
Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml
Documentation/devicetree/bindings/spi/st,stm32-spi.yaml
Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml
Documentation/devicetree/bindings/timer/ingenic,sysost.yaml
Documentation/devicetree/bindings/timer/ingenic,tcu.yaml
Documentation/devicetree/bindings/timer/st,stm32-timer.yaml
Documentation/devicetree/bindings/usb/ingenic,musb.yaml
Documentation/devicetree/bindings/usb/st,stusb160x.yaml
Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml
Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml
Documentation/doc-guide/sphinx.rst
Documentation/filesystems/autofs.rst
Documentation/filesystems/f2fs.rst
Documentation/networking/ipvs-sysctl.rst
Documentation/process/changes.rst
Documentation/process/submitting-patches.rst
Documentation/trace/ftrace.rst
Documentation/translations/it_IT/doc-guide/sphinx.rst
Documentation/translations/it_IT/process/changes.rst
Documentation/translations/zh_CN/doc-guide/sphinx.rst
Documentation/translations/zh_CN/process/management-style.rst
Documentation/virt/kvm/api.rst
Documentation/x86/xstate.rst
MAINTAINERS
Makefile
arch/arm/Makefile
arch/arm/mm/mmu.c
arch/arm64/include/asm/esr.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/kvm/arm.c
arch/arm64/kvm/guest.c
arch/arm64/kvm/hyp/hyp-entry.S
arch/arm64/kvm/hyp/nvhe/host.S
arch/arm64/kvm/hyp/nvhe/setup.c
arch/arm64/kvm/hyp/nvhe/sys_regs.c
arch/mips/Kbuild.platforms
arch/mips/Kconfig
arch/mips/Makefile
arch/mips/bcm63xx/clk.c
arch/mips/boot/compressed/.gitignore [deleted file]
arch/mips/boot/compressed/Makefile
arch/mips/boot/compressed/ashldi3.c [new file with mode: 0644]
arch/mips/boot/compressed/bswapdi.c [new file with mode: 0644]
arch/mips/boot/compressed/bswapsi.c [new file with mode: 0644]
arch/mips/boot/compressed/uart-ath79.c [new file with mode: 0644]
arch/mips/boot/dts/ingenic/jz4725b.dtsi
arch/mips/boot/dts/ingenic/jz4740.dtsi
arch/mips/boot/dts/ingenic/jz4770.dtsi
arch/mips/boot/dts/ingenic/jz4780.dtsi
arch/mips/boot/dts/ingenic/x1000.dtsi
arch/mips/boot/dts/ingenic/x1830.dtsi
arch/mips/configs/bmips_stb_defconfig
arch/mips/dec/setup.c
arch/mips/generic/yamon-dt.c
arch/mips/include/asm/traps.h
arch/mips/kernel/syscalls/syscall_n32.tbl
arch/mips/kernel/syscalls/syscall_n64.tbl
arch/mips/kernel/syscalls/syscall_o32.tbl
arch/mips/kernel/traps.c
arch/mips/kvm/mips.c
arch/mips/lantiq/clk.c
arch/mips/sgi-ip22/ip22-berr.c
arch/mips/sgi-ip22/ip28-berr.c
arch/mips/sgi-ip27/ip27-berr.c
arch/mips/sgi-ip32/ip32-berr.c
arch/mips/sibyte/swarm/setup.c
arch/mips/txx9/generic/setup_tx4927.c
arch/mips/txx9/generic/setup_tx4938.c
arch/mips/txx9/generic/setup_tx4939.c
arch/mips/vdso/Makefile
arch/parisc/configs/generic-32bit_defconfig
arch/parisc/include/asm/assembly.h
arch/parisc/include/asm/jump_label.h
arch/parisc/include/asm/pgtable.h
arch/parisc/include/asm/rt_sigframe.h
arch/parisc/kernel/cache.c
arch/parisc/kernel/entry.S
arch/parisc/kernel/signal.c
arch/parisc/kernel/signal32.h
arch/parisc/kernel/stacktrace.c
arch/parisc/kernel/syscalls/syscall.tbl
arch/parisc/kernel/vmlinux.lds.S
arch/powerpc/kernel/watchdog.c
arch/powerpc/kvm/powerpc.c
arch/riscv/Kconfig
arch/riscv/Makefile
arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi
arch/riscv/boot/dts/sifive/fu540-c000.dtsi
arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts
arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
arch/riscv/configs/32-bit.config [new file with mode: 0644]
arch/riscv/configs/64-bit.config [new file with mode: 0644]
arch/riscv/configs/defconfig
arch/riscv/include/asm/page.h
arch/riscv/include/asm/pgtable.h
arch/riscv/include/asm/vdso.h
arch/riscv/include/asm/vdso/gettimeofday.h
arch/riscv/kernel/head.S
arch/riscv/kernel/reset.c
arch/riscv/kernel/vdso.c
arch/riscv/kernel/vdso/vdso.lds.S
arch/riscv/kernel/vmlinux-xip.lds.S
arch/riscv/kvm/vcpu.c
arch/riscv/kvm/vcpu_sbi.c
arch/riscv/kvm/vm.c
arch/riscv/mm/context.c
arch/riscv/mm/init.c
arch/s390/include/asm/pci.h
arch/s390/kernel/perf_cpum_cf.c
arch/s390/kvm/kvm-s390.c
arch/s390/pci/pci.c
arch/s390/pci/pci_event.c
arch/s390/pci/pci_insn.c
arch/s390/pci/pci_irq.c
arch/sh/Kconfig
arch/sh/Kconfig.debug
arch/sh/boards/mach-landisk/irq.c
arch/sh/boot/Makefile
arch/sh/boot/compressed/.gitignore
arch/sh/boot/compressed/Makefile
arch/sh/boot/compressed/ashiftrt.S [new file with mode: 0644]
arch/sh/boot/compressed/ashldi3.c [new file with mode: 0644]
arch/sh/boot/compressed/ashlsi3.S [new file with mode: 0644]
arch/sh/boot/compressed/ashrsi3.S [new file with mode: 0644]
arch/sh/boot/compressed/lshrsi3.S [new file with mode: 0644]
arch/sh/include/asm/checksum_32.h
arch/sh/include/asm/irq.h
arch/sh/include/asm/sfp-machine.h
arch/sh/include/asm/uaccess.h
arch/sh/kernel/cpu/sh4a/smp-shx3.c
arch/sh/kernel/crash_dump.c
arch/sh/kernel/traps_32.c
arch/sh/math-emu/math.c
arch/sh/mm/nommu.c
arch/x86/events/intel/core.c
arch/x86/events/intel/lbr.c
arch/x86/hyperv/hv_init.c
arch/x86/include/asm/fpu/xcr.h
arch/x86/include/asm/fpu/xstate.h
arch/x86/include/asm/intel-family.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/kvm_para.h
arch/x86/include/asm/mem_encrypt.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/set_memory.h
arch/x86/include/asm/smp.h
arch/x86/include/asm/static_call.h
arch/x86/include/uapi/asm/kvm_para.h
arch/x86/kernel/cpu/cpuid-deps.c
arch/x86/kernel/cpu/mce/intel.c
arch/x86/kernel/cpu/mshyperv.c
arch/x86/kernel/fpu/xstate.h
arch/x86/kernel/kvm.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/static_call.c
arch/x86/kernel/vm86_32.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/hyperv.c
arch/x86/kvm/lapic.c
arch/x86/kvm/lapic.h
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/tdp_mmu.c
arch/x86/kvm/pmu.c
arch/x86/kvm/pmu.h
arch/x86/kvm/svm/avic.c
arch/x86/kvm/svm/pmu.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/pmu_intel.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
arch/x86/kvm/xen.c
arch/x86/mm/mem_encrypt.c
arch/x86/mm/pat/set_memory.c
arch/x86/xen/smp_pv.c
block/blk-core.c
block/blk-ia-ranges.c
block/blk-mq-sched.c
block/blk-mq.c
block/blk-mq.h
block/blk-zoned.c
block/genhd.c
block/ioctl.c
crypto/zstd.c
drivers/base/arch_topology.c
drivers/bus/brcmstb_gisb.c
drivers/clk/actions/owl-factor.c
drivers/clk/clk-ast2600.c
drivers/clk/clk-composite.c
drivers/clk/clk-si5351.c
drivers/clk/clk-si5351.h
drivers/clk/clk-versaclock5.c
drivers/clk/imx/clk.h
drivers/clk/ingenic/cgu.c
drivers/clk/ingenic/jz4725b-cgu.c
drivers/clk/ingenic/jz4740-cgu.c
drivers/clk/ingenic/jz4760-cgu.c
drivers/clk/ingenic/jz4770-cgu.c
drivers/clk/ingenic/jz4780-cgu.c
drivers/clk/ingenic/x1000-cgu.c
drivers/clk/ingenic/x1830-cgu.c
drivers/clk/mediatek/clk-mt8195-imp_iic_wrap.c
drivers/clk/qcom/gcc-msm8996.c
drivers/clk/rockchip/Kconfig
drivers/clk/rockchip/clk-rk3399.c
drivers/clk/rockchip/clk-rk3568.c
drivers/clk/uniphier/clk-uniphier-core.c
drivers/clk/uniphier/clk-uniphier-sys.c
drivers/clk/uniphier/clk-uniphier.h
drivers/clk/versatile/clk-icst.c
drivers/hv/hv_balloon.c
drivers/irqchip/irq-csky-mpintc.c
drivers/irqchip/irq-sifive-plic.c
drivers/net/amt.c
drivers/net/dsa/qca8k.c
drivers/net/ethernet/aquantia/atlantic/aq_ring.c
drivers/net/ethernet/asix/ax88796c_main.h
drivers/net/ethernet/asix/ax88796c_spi.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
drivers/net/ethernet/dec/tulip/de4x5.c
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
drivers/net/ethernet/intel/e100.c
drivers/net/ethernet/intel/i40e/i40e.h
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/iavf/iavf.h
drivers/net/ethernet/intel/iavf/iavf_ethtool.c
drivers/net/ethernet/intel/iavf/iavf_main.c
drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
drivers/net/ethernet/intel/ice/ice_lib.c
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/marvell/mvmdio.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
drivers/net/ethernet/marvell/prestera/prestera_switchdev.c
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
drivers/net/ethernet/mellanox/mlx5/core/cq.c
drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/netronome/nfp/nfp_net.h
drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
drivers/net/ethernet/ni/nixge.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac.h
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/hamradio/mkiss.c
drivers/net/ipa/ipa_cmd.c
drivers/net/ipa/ipa_cmd.h
drivers/net/ipa/ipa_endpoint.c
drivers/net/ipa/ipa_main.c
drivers/net/ipa/ipa_modem.c
drivers/net/ipa/ipa_smp2p.c
drivers/net/ipa/ipa_smp2p.h
drivers/net/slip/slip.h
drivers/net/usb/r8152.c
drivers/net/usb/smsc95xx.c
drivers/of/irq.c
drivers/of/platform.c
drivers/pci/controller/Kconfig
drivers/pci/hotplug/s390_pci_hpc.c
drivers/pci/msi.c
drivers/pci/pci.c
drivers/pci/quirks.c
drivers/ptp/ptp_ocp.c
drivers/s390/char/tape_std.c
drivers/s390/cio/css.c
drivers/sh/maple/maple.c
drivers/video/console/sticon.c
drivers/virtio/virtio_mem.c
fs/9p/vfs_addr.c
fs/9p/vfs_file.c
fs/afs/dir.c
fs/afs/dir_edit.c
fs/afs/file.c
fs/afs/internal.h
fs/afs/write.c
fs/attr.c
fs/btrfs/async-thread.c
fs/btrfs/disk-io.c
fs/btrfs/ioctl.c
fs/btrfs/lzo.c
fs/btrfs/scrub.c
fs/btrfs/volumes.c
fs/btrfs/zstd.c
fs/ceph/addr.c
fs/ceph/cache.c
fs/ceph/caps.c
fs/ceph/debugfs.c
fs/ceph/export.c
fs/ceph/file.c
fs/ceph/inode.c
fs/ceph/locks.c
fs/ceph/mds_client.c
fs/ceph/mdsmap.c
fs/ceph/metric.c
fs/ceph/metric.h
fs/ceph/super.c
fs/ceph/super.h
fs/cifs/cifs_debug.c
fs/cifs/cifs_dfs_ref.c
fs/cifs/cifs_fs_sb.h
fs/cifs/cifsglob.h
fs/cifs/cifsproto.h
fs/cifs/connect.c
fs/cifs/dfs_cache.c
fs/cifs/file.c
fs/cifs/fs_context.c
fs/cifs/fs_context.h
fs/cifs/fscache.c
fs/cifs/misc.c
fs/cifs/ntlmssp.h
fs/cifs/sess.c
fs/cifs/smb2inode.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cifs/transport.c
fs/erofs/zdata.c
fs/erofs/zdata.h
fs/erofs/zpvec.h
fs/f2fs/checkpoint.c
fs/f2fs/compress.c
fs/f2fs/data.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/gc.c
fs/f2fs/inline.c
fs/f2fs/inode.c
fs/f2fs/namei.c
fs/f2fs/node.c
fs/f2fs/node.h
fs/f2fs/recovery.c
fs/f2fs/segment.c
fs/f2fs/segment.h
fs/f2fs/super.c
fs/f2fs/sysfs.c
fs/f2fs/verity.c
fs/f2fs/xattr.c
fs/gfs2/bmap.c
fs/gfs2/file.c
fs/gfs2/glock.c
fs/gfs2/super.c
fs/io-wq.c
fs/ksmbd/Kconfig
fs/ksmbd/auth.c
fs/ksmbd/connection.c
fs/ksmbd/ksmbd_work.c
fs/ksmbd/ksmbd_work.h
fs/ksmbd/oplock.c
fs/ksmbd/oplock.h
fs/ksmbd/server.c
fs/ksmbd/smb2misc.c
fs/ksmbd/smb2ops.c
fs/ksmbd/smb2pdu.c
fs/ksmbd/smb2pdu.h
fs/ksmbd/smb_common.c
fs/ksmbd/smb_common.h
fs/ksmbd/transport_rdma.c
fs/ksmbd/vfs.c
fs/ksmbd/vfs.h
fs/netfs/read_helper.c
fs/nfsd/nfs4xdr.c
fs/pstore/platform.c
fs/squashfs/zstd_wrapper.c
fs/udf/dir.c
fs/udf/namei.c
fs/udf/super.c
fs/xfs/libxfs/xfs_ag.c
fs/xfs/libxfs/xfs_ag.h
fs/xfs/libxfs/xfs_btree.c
fs/xfs/libxfs/xfs_da_btree.c
include/dt-bindings/clock/ingenic,jz4725b-cgu.h [moved from include/dt-bindings/clock/jz4725b-cgu.h with 100% similarity]
include/dt-bindings/clock/ingenic,jz4740-cgu.h [moved from include/dt-bindings/clock/jz4740-cgu.h with 100% similarity]
include/dt-bindings/clock/ingenic,jz4760-cgu.h [moved from include/dt-bindings/clock/jz4760-cgu.h with 100% similarity]
include/dt-bindings/clock/ingenic,jz4770-cgu.h [moved from include/dt-bindings/clock/jz4770-cgu.h with 100% similarity]
include/dt-bindings/clock/ingenic,jz4780-cgu.h [moved from include/dt-bindings/clock/jz4780-cgu.h with 100% similarity]
include/dt-bindings/clock/ingenic,x1000-cgu.h [moved from include/dt-bindings/clock/x1000-cgu.h with 100% similarity]
include/dt-bindings/clock/ingenic,x1830-cgu.h [moved from include/dt-bindings/clock/x1830-cgu.h with 100% similarity]
include/linux/bpf.h
include/linux/ceph/ceph_fs.h
include/linux/ceph/osd_client.h
include/linux/efi.h
include/linux/genhd.h
include/linux/kernel.h
include/linux/kvm_host.h
include/linux/kvm_types.h
include/linux/mlx5/eswitch.h
include/linux/mm_types.h
include/linux/msi.h
include/linux/netfs.h
include/linux/pagemap.h
include/linux/pci.h
include/linux/percpu.h
include/linux/posix-timers.h
include/linux/printk.h
include/linux/trace_events.h
include/linux/vermagic.h
include/linux/virtio_net.h
include/linux/zstd.h
include/linux/zstd_errors.h [new file with mode: 0644]
include/linux/zstd_lib.h [new file with mode: 0644]
include/net/ip6_fib.h
include/net/ipv6_stubs.h
include/net/nfc/nci_core.h
include/net/page_pool.h
include/trace/events/afs.h
include/trace/events/f2fs.h
include/uapi/linux/kvm.h
include/uapi/linux/virtio_mem.h
init/Kconfig
init/Makefile
kernel/Kconfig.preempt
kernel/bpf/cgroup.c
kernel/bpf/helpers.c
kernel/bpf/syscall.c
kernel/bpf/verifier.c
kernel/events/core.c
kernel/fork.c
kernel/irq/msi.c
kernel/printk/printk.c
kernel/sched/autogroup.c
kernel/sched/core.c
kernel/sched/fair.c
kernel/sched/rt.c
kernel/sched/sched.h
kernel/time/posix-cpu-timers.c
kernel/trace/bpf_trace.c
kernel/trace/trace_events_hist.c
kernel/trace/trace_osnoise.c
lib/decompress_unzstd.c
lib/nmi_backtrace.c
lib/zstd/Makefile
lib/zstd/bitstream.h [deleted file]
lib/zstd/common/bitstream.h [new file with mode: 0644]
lib/zstd/common/compiler.h [new file with mode: 0644]
lib/zstd/common/cpu.h [new file with mode: 0644]
lib/zstd/common/debug.c [new file with mode: 0644]
lib/zstd/common/debug.h [new file with mode: 0644]
lib/zstd/common/entropy_common.c [new file with mode: 0644]
lib/zstd/common/error_private.c [new file with mode: 0644]
lib/zstd/common/error_private.h [new file with mode: 0644]
lib/zstd/common/fse.h [new file with mode: 0644]
lib/zstd/common/fse_decompress.c [new file with mode: 0644]
lib/zstd/common/huf.h [new file with mode: 0644]
lib/zstd/common/mem.h [new file with mode: 0644]
lib/zstd/common/zstd_common.c [new file with mode: 0644]
lib/zstd/common/zstd_deps.h [new file with mode: 0644]
lib/zstd/common/zstd_internal.h [new file with mode: 0644]
lib/zstd/compress.c [deleted file]
lib/zstd/compress/fse_compress.c [new file with mode: 0644]
lib/zstd/compress/hist.c [new file with mode: 0644]
lib/zstd/compress/hist.h [new file with mode: 0644]
lib/zstd/compress/huf_compress.c [new file with mode: 0644]
lib/zstd/compress/zstd_compress.c [new file with mode: 0644]
lib/zstd/compress/zstd_compress_internal.h [new file with mode: 0644]
lib/zstd/compress/zstd_compress_literals.c [new file with mode: 0644]
lib/zstd/compress/zstd_compress_literals.h [new file with mode: 0644]
lib/zstd/compress/zstd_compress_sequences.c [new file with mode: 0644]
lib/zstd/compress/zstd_compress_sequences.h [new file with mode: 0644]
lib/zstd/compress/zstd_compress_superblock.c [new file with mode: 0644]
lib/zstd/compress/zstd_compress_superblock.h [new file with mode: 0644]
lib/zstd/compress/zstd_cwksp.h [new file with mode: 0644]
lib/zstd/compress/zstd_double_fast.c [new file with mode: 0644]
lib/zstd/compress/zstd_double_fast.h [new file with mode: 0644]
lib/zstd/compress/zstd_fast.c [new file with mode: 0644]
lib/zstd/compress/zstd_fast.h [new file with mode: 0644]
lib/zstd/compress/zstd_lazy.c [new file with mode: 0644]
lib/zstd/compress/zstd_lazy.h [new file with mode: 0644]
lib/zstd/compress/zstd_ldm.c [new file with mode: 0644]
lib/zstd/compress/zstd_ldm.h [new file with mode: 0644]
lib/zstd/compress/zstd_ldm_geartab.h [new file with mode: 0644]
lib/zstd/compress/zstd_opt.c [new file with mode: 0644]
lib/zstd/compress/zstd_opt.h [new file with mode: 0644]
lib/zstd/decompress.c [deleted file]
lib/zstd/decompress/huf_decompress.c [new file with mode: 0644]
lib/zstd/decompress/zstd_ddict.c [new file with mode: 0644]
lib/zstd/decompress/zstd_ddict.h [new file with mode: 0644]
lib/zstd/decompress/zstd_decompress.c [new file with mode: 0644]
lib/zstd/decompress/zstd_decompress_block.c [new file with mode: 0644]
lib/zstd/decompress/zstd_decompress_block.h [new file with mode: 0644]
lib/zstd/decompress/zstd_decompress_internal.h [new file with mode: 0644]
lib/zstd/decompress_sources.h [new file with mode: 0644]
lib/zstd/entropy_common.c [deleted file]
lib/zstd/error_private.h [deleted file]
lib/zstd/fse.h [deleted file]
lib/zstd/fse_compress.c [deleted file]
lib/zstd/fse_decompress.c [deleted file]
lib/zstd/huf.h [deleted file]
lib/zstd/huf_compress.c [deleted file]
lib/zstd/huf_decompress.c [deleted file]
lib/zstd/mem.h [deleted file]
lib/zstd/zstd_common.c [deleted file]
lib/zstd/zstd_compress_module.c [new file with mode: 0644]
lib/zstd/zstd_decompress_module.c [new file with mode: 0644]
lib/zstd/zstd_internal.h [deleted file]
lib/zstd/zstd_opt.h [deleted file]
mm/memory-failure.c
mm/page-writeback.c
mm/shmem.c
mm/userfaultfd.c
net/ceph/mon_client.c
net/ceph/osd_client.c
net/core/devlink.c
net/core/filter.c
net/core/neighbour.c
net/core/page_pool.c
net/ipv4/bpf_tcp_ca.c
net/ipv4/nexthop.c
net/ipv4/udp.c
net/ipv6/af_inet6.c
net/ipv6/esp6.c
net/ipv6/ip6_output.c
net/ipv6/route.c
net/mac80211/cfg.c
net/mac80211/iface.c
net/mac80211/led.h
net/mac80211/rx.c
net/mac80211/tx.c
net/mac80211/util.c
net/mac80211/wme.c
net/mptcp/options.c
net/mptcp/protocol.c
net/mptcp/protocol.h
net/ncsi/ncsi-cmd.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_flow_table_offload.c
net/netfilter/nft_payload.c
net/netfilter/xt_IDLETIMER.c
net/nfc/core.c
net/nfc/nci/core.c
net/sched/act_mirred.c
net/smc/af_smc.c
net/smc/smc_close.c
net/tipc/crypto.c
net/unix/af_unix.c
net/vmw_vsock/virtio_transport.c
net/wireless/nl80211.c
net/wireless/nl80211.h
net/wireless/util.c
net/xdp/xsk_buff_pool.c
samples/bpf/hbm_kern.h
samples/bpf/xdp_redirect_cpu_user.c
samples/bpf/xdp_sample_user.c
scripts/coccinelle/misc/do_div.cocci [new file with mode: 0644]
scripts/remove-stale-files
tools/arch/x86/include/asm/msr-index.h
tools/arch/x86/include/uapi/asm/prctl.h
tools/bpf/runqslower/Makefile
tools/include/uapi/asm-generic/unistd.h
tools/include/uapi/drm/i915_drm.h
tools/include/uapi/linux/if_link.h
tools/include/uapi/linux/prctl.h
tools/include/uapi/sound/asound.h
tools/lib/bpf/bpf_gen_internal.h
tools/lib/bpf/gen_loader.c
tools/lib/bpf/libbpf.c
tools/objtool/check.c
tools/perf/Documentation/perf-record.txt
tools/perf/Makefile.perf
tools/perf/arch/arm/include/arch-tests.h
tools/perf/arch/arm/tests/arch-tests.c
tools/perf/arch/arm/tests/vectors-page.c
tools/perf/arch/arm64/include/arch-tests.h
tools/perf/arch/arm64/tests/arch-tests.c
tools/perf/arch/arm64/util/arm-spe.c
tools/perf/arch/powerpc/include/arch-tests.h
tools/perf/arch/powerpc/tests/arch-tests.c
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
tools/perf/arch/x86/include/arch-tests.h
tools/perf/arch/x86/tests/arch-tests.c
tools/perf/arch/x86/tests/bp-modify.c
tools/perf/arch/x86/tests/insn-x86.c
tools/perf/arch/x86/tests/intel-cqm.c
tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
tools/perf/arch/x86/tests/rdpmc.c
tools/perf/arch/x86/tests/sample-parsing.c
tools/perf/bench/futex-lock-pi.c
tools/perf/bench/futex-requeue.c
tools/perf/bench/futex-wake-parallel.c
tools/perf/bench/futex-wake.c
tools/perf/builtin-trace.c
tools/perf/design.txt
tools/perf/pmu-events/arch/powerpc/power10/metrics.json [new file with mode: 0644]
tools/perf/tests/api-io.c
tools/perf/tests/attr.c
tools/perf/tests/backward-ring-buffer.c
tools/perf/tests/bitmap.c
tools/perf/tests/bp_account.c
tools/perf/tests/bp_signal.c
tools/perf/tests/bp_signal_overflow.c
tools/perf/tests/bpf.c
tools/perf/tests/builtin-test.c
tools/perf/tests/clang.c
tools/perf/tests/code-reading.c
tools/perf/tests/cpumap.c
tools/perf/tests/demangle-java-test.c
tools/perf/tests/demangle-ocaml-test.c
tools/perf/tests/dlfilter-test.c
tools/perf/tests/dso-data.c
tools/perf/tests/dwarf-unwind.c
tools/perf/tests/event-times.c
tools/perf/tests/event_update.c
tools/perf/tests/evsel-roundtrip-name.c
tools/perf/tests/evsel-tp-sched.c
tools/perf/tests/expand-cgroup.c
tools/perf/tests/expr.c
tools/perf/tests/fdarray.c
tools/perf/tests/genelf.c
tools/perf/tests/hists_cumulate.c
tools/perf/tests/hists_filter.c
tools/perf/tests/hists_link.c
tools/perf/tests/hists_output.c
tools/perf/tests/is_printable_array.c
tools/perf/tests/keep-tracking.c
tools/perf/tests/kmod-path.c
tools/perf/tests/llvm.c
tools/perf/tests/maps.c
tools/perf/tests/mem.c
tools/perf/tests/mem2node.c
tools/perf/tests/mmap-basic.c
tools/perf/tests/mmap-thread-lookup.c
tools/perf/tests/openat-syscall-all-cpus.c
tools/perf/tests/openat-syscall-tp-fields.c
tools/perf/tests/openat-syscall.c
tools/perf/tests/parse-events.c
tools/perf/tests/parse-metric.c
tools/perf/tests/parse-no-sample-id-all.c
tools/perf/tests/pe-file-parsing.c
tools/perf/tests/perf-hooks.c
tools/perf/tests/perf-record.c
tools/perf/tests/perf-time-to-tsc.c
tools/perf/tests/pfm.c
tools/perf/tests/pmu-events.c
tools/perf/tests/pmu.c
tools/perf/tests/python-use.c
tools/perf/tests/sample-parsing.c
tools/perf/tests/sdt.c
tools/perf/tests/shell/record+zstd_comp_decomp.sh
tools/perf/tests/shell/stat_all_pmu.sh
tools/perf/tests/shell/stat_bpf_counters.sh
tools/perf/tests/shell/test_arm_spe.sh [new file with mode: 0755]
tools/perf/tests/stat.c
tools/perf/tests/sw-clock.c
tools/perf/tests/switch-tracking.c
tools/perf/tests/task-exit.c
tools/perf/tests/tests.h
tools/perf/tests/thread-map.c
tools/perf/tests/thread-maps-share.c
tools/perf/tests/time-utils-test.c
tools/perf/tests/topology.c
tools/perf/tests/unit_number__scnprintf.c
tools/perf/tests/vmlinux-kallsyms.c
tools/perf/tests/wp.c
tools/perf/trace/beauty/beauty.h
tools/perf/trace/beauty/sockaddr.c
tools/perf/trace/beauty/sockaddr.sh [new file with mode: 0755]
tools/perf/trace/beauty/socket.c
tools/perf/trace/beauty/socket.sh
tools/perf/trace/beauty/socket_ipproto.sh [deleted file]
tools/perf/util/annotate.c
tools/perf/util/annotate.h
tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
tools/perf/util/arm-spe.c
tools/perf/util/bpf-event.c
tools/perf/util/c++/clang-c.h
tools/perf/util/c++/clang-test.cpp
tools/perf/util/cputopo.c
tools/perf/util/cputopo.h
tools/perf/util/cs-etm.c
tools/perf/util/env.c
tools/perf/util/env.h
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/expr.c
tools/perf/util/expr.h
tools/perf/util/expr.l
tools/perf/util/expr.y
tools/perf/util/header.c
tools/perf/util/stat-shadow.c
tools/perf/util/symbol.c
tools/perf/util/symbol.h
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/prog_tests/helper_restricted.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_helper_restricted.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_verifier.c
tools/testing/selftests/bpf/verifier/helper_restricted.c [new file with mode: 0644]
tools/testing/selftests/bpf/verifier/map_in_map.c
tools/testing/selftests/kvm/.gitignore
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/access_tracking_perf_test.c
tools/testing/selftests/kvm/demand_paging_test.c
tools/testing/selftests/kvm/dirty_log_perf_test.c
tools/testing/selftests/kvm/dirty_log_test.c
tools/testing/selftests/kvm/include/kvm_util.h
tools/testing/selftests/kvm/include/perf_test_util.h
tools/testing/selftests/kvm/include/test_util.h
tools/testing/selftests/kvm/include/x86_64/svm_util.h
tools/testing/selftests/kvm/kvm_page_table_test.c
tools/testing/selftests/kvm/lib/elf.c
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/lib/perf_test_util.c
tools/testing/selftests/kvm/lib/test_util.c
tools/testing/selftests/kvm/lib/x86_64/svm.c
tools/testing/selftests/kvm/memslot_modification_stress_test.c
tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/fib_nexthops.sh
tools/testing/selftests/net/forwarding/config
tools/testing/selftests/net/forwarding/tc_actions.sh
tools/testing/selftests/netfilter/Makefile
tools/testing/selftests/netfilter/conntrack_vrf.sh [new file with mode: 0755]
tools/testing/selftests/netfilter/nft_nat.sh
tools/testing/selftests/netfilter/nft_queue.sh
tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
virt/kvm/kvm_main.c

index f627e70..b268e3e 100644 (file)
@@ -512,3 +512,19 @@ Date:              July 2021
 Contact:       "Daeho Jeong" <daehojeong@google.com>
 Description:   You can control the multiplier value of bdi device readahead window size
                between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option.
+
+What:          /sys/fs/f2fs/<disk>/max_fragment_chunk
+Date:          August 2021
+Contact:       "Daeho Jeong" <daehojeong@google.com>
+Description:   With "mode=fragment:block" mount options, we can scatter block allocation.
+               f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
+               in the length of 1..<max_fragment_hole> by turns. This value can be set
+               between 1..512 and the default value is 4.
+
+What:          /sys/fs/f2fs/<disk>/max_fragment_hole
+Date:          August 2021
+Contact:       "Daeho Jeong" <daehojeong@google.com>
+Description:   With "mode=fragment:block" mount options, we can scatter block allocation.
+               f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
+               in the length of 1..<max_fragment_hole> by turns. This value can be set
+               between 1..512 and the default value is 4.
index 4261620..0e486f4 100644 (file)
@@ -1099,7 +1099,7 @@ task_delayacct
 ===============
 
 Enables/disables task delay accounting (see
-:doc:`accounting/delay-accounting.rst`). Enabling this feature incurs
+Documentation/accounting/delay-accounting.rst. Enabling this feature incurs
 a small amount of overhead in the scheduler but is useful for debugging
 and performance tuning. It is required by some tools such as iotop.
 
index 8323c79..9485a5a 100644 (file)
@@ -104,6 +104,8 @@ Discovery family
 
                 Not supported by the Linux kernel.
 
+  Homepage:
+        https://web.archive.org/web/20110924171043/http://www.marvell.com/embedded-processors/discovery-innovation/
   Core:
        Feroceon 88fr571-vd ARMv5 compatible
 
@@ -120,6 +122,7 @@ EBU Armada family
         - 88F6707
         - 88F6W11
 
+    - Product infos:   https://web.archive.org/web/20141002083258/http://www.marvell.com/embedded-processors/armada-370/
     - Product Brief:   https://web.archive.org/web/20121115063038/http://www.marvell.com/embedded-processors/armada-300/assets/Marvell_ARMADA_370_SoC.pdf
     - Hardware Spec:   https://web.archive.org/web/20140617183747/http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-datasheet.pdf
     - Functional Spec: https://web.archive.org/web/20140617183701/http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-FunctionalSpec-datasheet.pdf
@@ -127,9 +130,29 @@ EBU Armada family
   Core:
        Sheeva ARMv7 compatible PJ4B
 
+  Armada XP Flavors:
+        - MV78230
+        - MV78260
+        - MV78460
+
+    NOTE:
+       not to be confused with the non-SMP 78xx0 SoCs
+
+    - Product infos:   https://web.archive.org/web/20150101215721/http://www.marvell.com/embedded-processors/armada-xp/
+    - Product Brief:   https://web.archive.org/web/20121021173528/http://www.marvell.com/embedded-processors/armada-xp/assets/Marvell-ArmadaXP-SoC-product%20brief.pdf
+    - Functional Spec: https://web.archive.org/web/20180829171131/http://www.marvell.com/embedded-processors/armada-xp/assets/ARMADA-XP-Functional-SpecDatasheet.pdf
+    - Hardware Specs:
+        - https://web.archive.org/web/20141127013651/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78230_OS.PDF
+        - https://web.archive.org/web/20141222000224/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78260_OS.PDF
+        - https://web.archive.org/web/20141222000230/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78460_OS.PDF
+
+  Core:
+       Sheeva ARMv7 compatible Dual-core or Quad-core PJ4B-MP
+
   Armada 375 Flavors:
        - 88F6720
 
+    - Product infos: https://web.archive.org/web/20140108032402/http://www.marvell.com/embedded-processors/armada-375/
     - Product Brief: https://web.archive.org/web/20131216023516/http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA_375_SoC-01_product_brief.pdf
 
   Core:
@@ -162,29 +185,6 @@ EBU Armada family
   Core:
        ARM Cortex-A9
 
-  Armada XP Flavors:
-        - MV78230
-        - MV78260
-        - MV78460
-
-    NOTE:
-       not to be confused with the non-SMP 78xx0 SoCs
-
-    Product Brief:
-       https://web.archive.org/web/20121021173528/http://www.marvell.com/embedded-processors/armada-xp/assets/Marvell-ArmadaXP-SoC-product%20brief.pdf
-
-    Functional Spec:
-       https://web.archive.org/web/20180829171131/http://www.marvell.com/embedded-processors/armada-xp/assets/ARMADA-XP-Functional-SpecDatasheet.pdf
-
-    - Hardware Specs:
-
-        - https://web.archive.org/web/20141127013651/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78230_OS.PDF
-        - https://web.archive.org/web/20141222000224/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78260_OS.PDF
-        - https://web.archive.org/web/20141222000230/http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78460_OS.PDF
-
-  Core:
-       Sheeva ARMv7 compatible Dual-core or Quad-core PJ4B-MP
-
   Linux kernel mach directory:
        arch/arm/mach-mvebu
   Linux kernel plat directory:
@@ -436,7 +436,7 @@ Berlin family (Multimedia Solutions)
   - Flavors:
        - 88DE3010, Armada 1000 (no Linux support)
                - Core:         Marvell PJ1 (ARMv5TE), Dual-core
-               - Product Brief:        http://www.marvell.com.cn/digital-entertainment/assets/armada_1000_pb.pdf
+               - Product Brief:        https://web.archive.org/web/20131103162620/http://www.marvell.com/digital-entertainment/assets/armada_1000_pb.pdf
        - 88DE3005, Armada 1500 Mini
                - Design name:  BG2CD
                - Core:         ARM Cortex-A9, PL310 L2CC
index 37f273a..610450f 100644 (file)
@@ -15,7 +15,7 @@ that goes into great technical depth about the BPF Architecture.
 libbpf
 ======
 
-Documentation/bpf/libbpf/libbpf.rst is a userspace library for loading and interacting with bpf programs.
+Documentation/bpf/libbpf/index.rst is a userspace library for loading and interacting with bpf programs.
 
 BPF Type Format (BTF)
 =====================
index b1f28d1..a41cd87 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ST STi Platforms Device Tree Bindings
 
 maintainers:
-  - Patrice Chotard <patrice.chotard@st.com>
+  - Patrice Chotard <patrice.chotard@foss.st.com>
 
 properties:
   $nodename:
index 8e711bd..ecb28e9 100644 (file)
@@ -7,8 +7,8 @@ $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 title: STMicroelectronics STM32 ML-AHB interconnect bindings
 
 maintainers:
-  - Fabien Dessenne <fabien.dessenne@st.com>
-  - Arnaud Pouliquen <arnaud.pouliquen@st.com>
+  - Fabien Dessenne <fabien.dessenne@foss.st.com>
+  - Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
 
 description: |
   These bindings describe the STM32 SoCs ML-AHB interconnect bus which connects
index 149afb5..6f846d6 100644 (file)
@@ -7,8 +7,8 @@ $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 title: STMicroelectronics STM32 Platforms System Controller bindings
 
 maintainers:
-  - Alexandre Torgue <alexandre.torgue@st.com>
-  - Christophe Roullier <christophe.roullier@st.com>
+  - Alexandre Torgue <alexandre.torgue@foss.st.com>
+  - Christophe Roullier <christophe.roullier@foss.st.com>
 
 properties:
   compatible:
index 9ac7da0..bcaf7be 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 Platforms Device Tree Bindings
 
 maintainers:
-  - Alexandre Torgue <alexandre.torgue@st.com>
+  - Alexandre Torgue <alexandre.torgue@foss.st.com>
 
 properties:
   $nodename:
index 6e80dbc..aa1df03 100644 (file)
@@ -104,7 +104,7 @@ additionalProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4770-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4770-cgu.h>
     cgu: clock-controller@10000000 {
       compatible = "ingenic,jz4770-cgu", "simple-mfd";
       reg = <0x10000000 0x100>;
index e17143c..252085a 100644 (file)
@@ -42,6 +42,9 @@ properties:
   "#clock-cells":
     const: 1
 
+  "#reset-cells":
+    const: 1
+
 required:
   - compatible
   - reg
@@ -57,4 +60,5 @@ examples:
       reg = <0x10000000 0x1000>;
       clocks = <&hfclk>, <&rtcclk>;
       #clock-cells = <1>;
+      #reset-cells = <1>;
     };
index 8fe6f80..bfda6af 100644 (file)
@@ -2,7 +2,7 @@ Binding for Silicon Labs Si5351a/b/c programmable i2c clock generator.
 
 Reference
 [1] Si5351A/B/C Data Sheet
-    https://www.silabs.com/Support%20Documents/TechnicalDocs/Si5351.pdf
+    https://www.skyworksinc.com/-/media/Skyworks/SL/documents/public/data-sheets/Si5351-B.pdf
 
 The Si5351a/b/c are programmable i2c clock generators with up to 8 output
 clocks. Si5351a also has a reduced pin-count package (MSOP10) where only
index c3930ed..9a0cc73 100644 (file)
@@ -23,6 +23,7 @@ properties:
           - socionext,uniphier-ld11-clock
           - socionext,uniphier-ld20-clock
           - socionext,uniphier-pxs3-clock
+          - socionext,uniphier-nx1-clock
       - description: Media I/O (MIO) clock, SD clock
         enum:
           - socionext,uniphier-ld4-mio-clock
@@ -33,6 +34,7 @@ properties:
           - socionext,uniphier-ld11-mio-clock
           - socionext,uniphier-ld20-sd-clock
           - socionext,uniphier-pxs3-sd-clock
+          - socionext,uniphier-nx1-sd-clock
       - description: Peripheral clock
         enum:
           - socionext,uniphier-ld4-peri-clock
@@ -43,6 +45,10 @@ properties:
           - socionext,uniphier-ld11-peri-clock
           - socionext,uniphier-ld20-peri-clock
           - socionext,uniphier-pxs3-peri-clock
+          - socionext,uniphier-nx1-peri-clock
+      - description: SoC-glue clock
+        enum:
+          - socionext,uniphier-pro4-sg-clock
 
   "#clock-cells":
     const: 1
index 8b1ecb2..a0ae486 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Reset Clock Controller Binding
 
 maintainers:
-  - Gabriel Fernandez <gabriel.fernandez@st.com>
+  - Gabriel Fernandez <gabriel.fernandez@foss.st.com>
 
 description: |
   The RCC IP is both a reset and a clock controller.
index cee624c..b72e485 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 CRC bindings
 
 maintainers:
-  - Lionel Debieve <lionel.debieve@st.com>
+  - Lionel Debieve <lionel.debieve@foss.st.com>
 
 properties:
   compatible:
index a457455..ed23bf9 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 CRYP bindings
 
 maintainers:
-  - Lionel Debieve <lionel.debieve@st.com>
+  - Lionel Debieve <lionel.debieve@foss.st.com>
 
 properties:
   compatible:
index 6dd658f..10ba947 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 HASH bindings
 
 maintainers:
-  - Lionel Debieve <lionel.debieve@st.com>
+  - Lionel Debieve <lionel.debieve@foss.st.com>
 
 properties:
   compatible:
index 3c3e51a..11fd68a 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Synopsys DesignWare MIPI DSI host controller
 
 maintainers:
-  - Philippe CORNU <philippe.cornu@st.com>
+  - Philippe CORNU <philippe.cornu@foss.st.com>
 
 description: |
   This document defines device tree properties for the Synopsys DesignWare MIPI
diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt
deleted file mode 100644 (file)
index 583c5e9..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-Toshiba TC358767 eDP bridge bindings
-
-Required properties:
- - compatible: "toshiba,tc358767"
- - reg: i2c address of the bridge, 0x68 or 0x0f, depending on bootstrap pins
- - clock-names: should be "ref"
- - clocks: OF device-tree clock specification for refclk input. The reference
-   clock rate must be 13 MHz, 19.2 MHz, 26 MHz, or 38.4 MHz.
-
-Optional properties:
- - shutdown-gpios: OF device-tree gpio specification for SD pin
-                   (active high shutdown input)
- - reset-gpios: OF device-tree gpio specification for RSTX pin
-                (active low system reset)
- - toshiba,hpd-pin: TC358767 GPIO pin number to which HPD is connected to (0 or 1)
- - ports: the ports node can contain video interface port nodes to connect
-   to a DPI/DSI source and to an eDP/DP sink according to [1][2]:
-    - port@0: DSI input port
-    - port@1: DPI input port
-    - port@2: eDP/DP output port
-
-[1]: Documentation/devicetree/bindings/graph.txt
-[2]: Documentation/devicetree/bindings/media/video-interfaces.txt
-
-Example:
-       edp-bridge@68 {
-               compatible = "toshiba,tc358767";
-               reg = <0x68>;
-               shutdown-gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
-               reset-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>;
-               clock-names = "ref";
-               clocks = <&edp_refclk>;
-
-               ports {
-                       #address-cells = <1>;
-                       #size-cells = <0>;
-
-                       port@1 {
-                               reg = <1>;
-
-                               bridge_in: endpoint {
-                                       remote-endpoint = <&dpi_out>;
-                               };
-                       };
-
-                       port@2 {
-                               reg = <2>;
-
-                               bridge_out: endpoint {
-                                       remote-endpoint = <&panel_in>;
-                               };
-                       };
-               };
-       };
diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml
new file mode 100644 (file)
index 0000000..f1541cc
--- /dev/null
@@ -0,0 +1,158 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/bridge/toshiba,tc358767.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Toshiba TC358767 eDP bridge bindings
+
+maintainers:
+  - Andrey Gusakov <andrey.gusakov@cogentembedded.com>
+
+description: The TC358767 is bridge device which converts DSI/DPI to eDP/DP
+
+properties:
+  compatible:
+    const: toshiba,tc358767
+
+  reg:
+    enum:
+      - 0x68
+      - 0x0f
+    description: |
+        i2c address of the bridge, 0x68 or 0x0f, depending on bootstrap pins
+
+  clock-names:
+    const: "ref"
+
+  clocks:
+    maxItems: 1
+    description: |
+        OF device-tree clock specification for refclk input. The reference.
+        clock rate must be 13 MHz, 19.2 MHz, 26 MHz, or 38.4 MHz.
+
+  shutdown-gpios:
+    maxItems: 1
+    description: |
+        OF device-tree gpio specification for SD pin(active high shutdown input)
+
+  reset-gpios:
+    maxItems: 1
+    description: |
+        OF device-tree gpio specification for RSTX pin(active low system reset)
+
+  toshiba,hpd-pin:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum:
+      - 0
+      - 1
+    description: TC358767 GPIO pin number to which HPD is connected to (0 or 1)
+
+  ports:
+    $ref: /schemas/graph.yaml#/properties/ports
+
+    properties:
+      port@0:
+        $ref: /schemas/graph.yaml#/properties/port
+        description: |
+            DSI input port. The remote endpoint phandle should be a
+            reference to a valid DSI output endpoint node
+
+      port@1:
+        $ref: /schemas/graph.yaml#/properties/port
+        description: |
+            DPI input port. The remote endpoint phandle should be a
+            reference to a valid DPI output endpoint node
+
+      port@2:
+        $ref: /schemas/graph.yaml#/properties/port
+        description: |
+            eDP/DP output port. The remote endpoint phandle should be a
+            reference to a valid eDP panel input endpoint node. This port is
+            optional, treated as DP panel if not defined
+
+    oneOf:
+      - required:
+          - port@0
+      - required:
+          - port@1
+
+
+required:
+  - compatible
+  - reg
+  - clock-names
+  - clocks
+  - ports
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    /* DPI input and eDP output */
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        edp-bridge@68 {
+            compatible = "toshiba,tc358767";
+            reg = <0x68>;
+            shutdown-gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
+            reset-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>;
+            clock-names = "ref";
+            clocks = <&edp_refclk>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@1 {
+                    reg = <1>;
+
+                    bridge_in_0: endpoint {
+                        remote-endpoint = <&dpi_out>;
+                    };
+                };
+
+                port@2 {
+                    reg = <2>;
+
+                    bridge_out: endpoint {
+                        remote-endpoint = <&panel_in>;
+                    };
+                };
+            };
+        };
+    };
+  - |
+    /* DPI input and DP output */
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        edp-bridge@68 {
+            compatible = "toshiba,tc358767";
+            reg = <0x68>;
+            shutdown-gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
+            reset-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>;
+            clock-names = "ref";
+            clocks = <&edp_refclk>;
+
+            ports {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                port@1 {
+                    reg = <1>;
+
+                    bridge_in_1: endpoint {
+                        remote-endpoint = <&dpi_out>;
+                    };
+                };
+            };
+        };
+    };
index e679f48..3f93def 100644 (file)
@@ -45,7 +45,7 @@ additionalProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4770-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4770-cgu.h>
     ipu@13080000 {
       compatible = "ingenic,jz4770-ipu", "ingenic,jz4760-ipu";
       reg = <0x13080000 0x800>;
index 50d2b0a..0049010 100644 (file)
@@ -88,7 +88,7 @@ additionalProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4740-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4740-cgu.h>
     lcd-controller@13050000 {
       compatible = "ingenic,jz4740-lcd";
       reg = <0x13050000 0x1000>;
@@ -107,7 +107,7 @@ examples:
     };
 
   - |
-    #include <dt-bindings/clock/jz4725b-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4725b-cgu.h>
     lcd-controller@13050000 {
       compatible = "ingenic,jz4725b-lcd";
       reg = <0x13050000 0x1000>;
index 4b6dda6..17cbd0a 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Orise Tech OTM8009A 3.97" 480x800 TFT LCD panel (MIPI-DSI video mode)
 
 maintainers:
-  - Philippe CORNU <philippe.cornu@st.com>
+  - Philippe CORNU <philippe.cornu@foss.st.com>
 
 description: |
              The Orise Tech OTM8009A is a 3.97" 480x800 TFT LCD panel connected using
index 3947779..e8ce231 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Raydium Semiconductor Corporation RM68200 5.5" 720p MIPI-DSI TFT LCD panel
 
 maintainers:
-  - Philippe CORNU <philippe.cornu@st.com>
+  - Philippe CORNU <philippe.cornu@foss.st.com>
 
 description: |
   The Raydium Semiconductor Corporation RM68200 is a 5.5" 720x1280 TFT LCD
index ed310bb..ce1ef93 100644 (file)
@@ -7,8 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 DSI host controller
 
 maintainers:
-  - Philippe Cornu <philippe.cornu@st.com>
-  - Yannick Fertre <yannick.fertre@st.com>
+  - Philippe Cornu <philippe.cornu@foss.st.com>
+  - Yannick Fertre <yannick.fertre@foss.st.com>
 
 description:
   The STMicroelectronics STM32 DSI controller uses the Synopsys DesignWare MIPI-DSI host controller.
index 4ae3d75..01e2da2 100644 (file)
@@ -7,8 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 lcd-tft display controller
 
 maintainers:
-  - Philippe Cornu <philippe.cornu@st.com>
-  - Yannick Fertre <yannick.fertre@st.com>
+  - Philippe Cornu <philippe.cornu@foss.st.com>
+  - Yannick Fertre <yannick.fertre@foss.st.com>
 
 properties:
   compatible:
index ac4d594..dc059d6 100644 (file)
@@ -68,7 +68,7 @@ unevaluatedProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4780-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
     dma: dma-controller@13420000 {
       compatible = "ingenic,jz4780-dma";
       reg = <0x13420000 0x400>, <0x13421000 0x40>;
index 4bf676f..55faab6 100644 (file)
@@ -50,7 +50,7 @@ description: |
 
 
 maintainers:
-  - Amelie Delaunay <amelie.delaunay@st.com>
+  - Amelie Delaunay <amelie.delaunay@foss.st.com>
 
 allOf:
   - $ref: "dma-controller.yaml#"
index c8d2b51..f751796 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 DMA MUX (DMA request router) bindings
 
 maintainers:
-  - Amelie Delaunay <amelie.delaunay@st.com>
+  - Amelie Delaunay <amelie.delaunay@foss.st.com>
 
 allOf:
   - $ref: "dma-router.yaml#"
index c30be84..87b4afd 100644 (file)
@@ -50,7 +50,7 @@ description: |
        if no HW ack signal is used by the MDMA client
 
 maintainers:
-  - Amelie Delaunay <amelie.delaunay@st.com>
+  - Amelie Delaunay <amelie.delaunay@foss.st.com>
 
 allOf:
   - $ref: "dma-controller.yaml#"
diff --git a/Documentation/devicetree/bindings/gpio/gpio-xlp.txt b/Documentation/devicetree/bindings/gpio/gpio-xlp.txt
deleted file mode 100644 (file)
index 47fc649..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-Netlogic XLP Family GPIO
-========================
-
-This GPIO driver is used for following Netlogic XLP SoCs:
-       XLP832, XLP316, XLP208, XLP980, XLP532
-This GPIO driver is also compatible with GPIO controller found on
-Broadcom Vulcan ARM64.
-
-Required properties:
--------------------
-
-- compatible: Should be one of the following:
-  - "netlogic,xlp832-gpio": For Netlogic XLP832
-  - "netlogic,xlp316-gpio": For Netlogic XLP316
-  - "netlogic,xlp208-gpio": For Netlogic XLP208
-  - "netlogic,xlp980-gpio": For Netlogic XLP980
-  - "netlogic,xlp532-gpio": For Netlogic XLP532
-  - "brcm,vulcan-gpio": For Broadcom Vulcan ARM64
-- reg: Physical base address and length of the controller's registers.
-- #gpio-cells: Should be two. The first cell is the pin number and the second
-  cell is used to specify optional parameters (currently unused).
-- gpio-controller: Marks the device node as a GPIO controller.
-- nr-gpios: Number of GPIO pins supported by the controller.
-- interrupt-cells: Should be two. The first cell is the GPIO Number. The
-  second cell is used to specify flags. The following subset of flags is
-  supported:
-  - trigger type:
-       1 = low to high edge triggered.
-       2 = high to low edge triggered.
-       4 = active high level-sensitive.
-       8 = active low level-sensitive.
-- interrupts: Interrupt number for this device.
-- interrupt-controller: Identifies the node as an interrupt controller.
-
-Example:
-
-       gpio: xlp_gpio@34000 {
-               compatible = "netlogic,xlp316-gpio";
-               reg = <0 0x34100 0x1000
-                      0 0x35100 0x1000>;
-               #gpio-cells = <2>;
-               gpio-controller;
-               nr-gpios = <57>;
-
-               #interrupt-cells = <2>;
-               interrupt-parent = <&pic>;
-               interrupts = <39>;
-               interrupt-controller;
-       };
index 47cf9c8..b18c616 100644 (file)
@@ -7,8 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 Hardware Spinlock bindings
 
 maintainers:
-  - Benjamin Gaignard <benjamin.gaignard@st.com>
-  - Fabien Dessenne <fabien.dessenne@st.com>
+  - Fabien Dessenne <fabien.dessenne@foss.st.com>
 
 properties:
   "#hwlock-cells":
index 3592d49..c167958 100644 (file)
@@ -57,7 +57,9 @@ properties:
     const: ipg
 
   clock-frequency:
-    enum: [ 100000, 400000 ]
+    minimum: 1
+    default: 100000
+    maximum: 400000
 
   dmas:
     items:
diff --git a/Documentation/devicetree/bindings/i2c/i2c-xlp9xx.txt b/Documentation/devicetree/bindings/i2c/i2c-xlp9xx.txt
deleted file mode 100644 (file)
index f818ef5..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-Device tree configuration for the I2C controller on the XLP9xx/5xx SoC
-
-Required properties:
-- compatible      : should be "netlogic,xlp980-i2c"
-- reg             : bus address start and address range size of device
-- interrupts      : interrupt number
-
-Optional properties:
-- clock-frequency : frequency of bus clock in Hz
-                    Defaults to 100 KHz when the property is not specified
-
-Example:
-
-i2c0: i2c@113100 {
-       compatible = "netlogic,xlp980-i2c";
-       #address-cells = <1>;
-       #size-cells = <0>;
-       reg = <0 0x113100 0x100>;
-       clock-frequency = <400000>;
-       interrupts = <30>;
-       interrupt-parent = <&pic>;
-};
index e1e65eb..febde6c 100644 (file)
@@ -60,7 +60,7 @@ unevaluatedProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4780-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
     #include <dt-bindings/dma/jz4780-dma.h>
     #include <dt-bindings/interrupt-controller/irq.h>
     i2c@10054000 {
index d747f49..c07289a 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: I2C controller embedded in STMicroelectronics STM32 I2C platform
 
 maintainers:
-  - Pierre-Yves MORDRET <pierre-yves.mordret@st.com>
+  - Pierre-Yves MORDRET <pierre-yves.mordret@foss.st.com>
 
 allOf:
   - $ref: /schemas/i2c/i2c-controller.yaml#
index 3eb7aa8..698beb8 100644 (file)
@@ -74,7 +74,7 @@ additionalProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4740-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4740-cgu.h>
     #include <dt-bindings/iio/adc/ingenic,adc.h>
 
     adc@10070000 {
index a390343..2287697 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Device-Tree bindings for sigma delta modulator
 
 maintainers:
-  - Arnaud Pouliquen <arnaud.pouliquen@st.com>
+  - Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
 
 properties:
   compatible:
index ec0450d..4d60745 100644 (file)
@@ -19,7 +19,7 @@ description: |
   Each STM32 ADC block can have up to 3 ADC instances.
 
 maintainers:
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 
 properties:
   compatible:
index 733351d..7c260f2 100644 (file)
@@ -7,8 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 DFSDM ADC device driver
 
 maintainers:
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
-  - Olivier Moysan <olivier.moysan@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
+  - Olivier Moysan <olivier.moysan@foss.st.com>
 
 description: |
   STM32 DFSDM ADC is a sigma delta analog-to-digital converter dedicated to
index 393f700..6adeda4 100644 (file)
@@ -15,7 +15,7 @@ description: |
   current.
 
 maintainers:
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 
 properties:
   compatible:
index 6d3e68e..d19c881 100644 (file)
@@ -7,8 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STM32 External Interrupt Controller Device Tree Bindings
 
 maintainers:
-  - Alexandre Torgue <alexandre.torgue@st.com>
-  - Ludovic Barre <ludovic.barre@st.com>
+  - Alexandre Torgue <alexandre.torgue@foss.st.com>
+  - Ludovic Barre <ludovic.barre@foss.st.com>
 
 properties:
   compatible:
index b15da9b..8eb4bf5 100644 (file)
@@ -13,8 +13,8 @@ description:
   channels (N) can be read from a dedicated register.
 
 maintainers:
-  - Fabien Dessenne <fabien.dessenne@st.com>
-  - Arnaud Pouliquen <arnaud.pouliquen@st.com>
+  - Fabien Dessenne <fabien.dessenne@foss.st.com>
+  - Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
 
 properties:
   compatible:
index fa54c56..e287468 100644 (file)
@@ -30,7 +30,6 @@ properties:
 
   power-domain-names:
     minItems: 2
-    maxItems: 3
     items:
       - const: venus
       - const: vcodec0
index d75019c..77144cc 100644 (file)
@@ -7,8 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 CEC bindings
 
 maintainers:
-  - Benjamin Gaignard <benjamin.gaignard@st.com>
-  - Yannick Fertre <yannick.fertre@st.com>
+  - Yannick Fertre <yannick.fertre@foss.st.com>
 
 properties:
   compatible:
index 41e1d0c..9c1262a 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 Digital Camera Memory Interface (DCMI) binding
 
 maintainers:
-  - Hugues Fruchet <hugues.fruchet@st.com>
+  - Hugues Fruchet <hugues.fruchet@foss.st.com>
 
 properties:
   compatible:
index fe0ce19..24f9e19 100644 (file)
@@ -84,7 +84,7 @@ additionalProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4780-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
     #include <dt-bindings/gpio/gpio.h>
     nemc: memory-controller@13410000 {
       compatible = "ingenic,jz4780-nemc";
index cba7420..6b516d3 100644 (file)
@@ -19,7 +19,7 @@ description: |
   Select. The FMC2 performs only one access at a time to an external device.
 
 maintainers:
-  - Christophe Kerello <christophe.kerello@st.com>
+  - Christophe Kerello <christophe.kerello@foss.st.com>
 
 properties:
   compatible:
index 8bcea8d..ec7f019 100644 (file)
@@ -17,7 +17,7 @@ description: |
      - simple counter from IN1 input signal.
 
 maintainers:
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 
 properties:
   compatible:
index dace353..10b330d 100644 (file)
@@ -17,8 +17,7 @@ description: |
       programmable prescaler.
 
 maintainers:
-  - Benjamin Gaignard <benjamin.gaignard@st.com>
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 
 properties:
   compatible:
index 19e9afb..b2a4e4a 100644 (file)
@@ -12,7 +12,7 @@ description: ST Multi-Function eXpander (STMFX) is a slave controller using I2C
                through VDD) and resistive touchscreen controller.
 
 maintainers:
-  - Amelie Delaunay <amelie.delaunay@st.com>
+  - Amelie Delaunay <amelie.delaunay@foss.st.com>
 
 properties:
   compatible:
index 305123e..426658a 100644 (file)
@@ -9,7 +9,7 @@ title: STMicroelectonics STPMIC1 Power Management IC bindings
 description: STMicroelectronics STPMIC1 Power Management IC
 
 maintainers:
-  - pascal Paillet <p.paillet@st.com>
+  - pascal Paillet <p.paillet@foss.st.com>
 
 properties:
   compatible:
index 6df1a94..b7e7fa7 100644 (file)
@@ -44,7 +44,7 @@ additionalProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4780-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
 
     cpus {
         #address-cells = <1>;
index 546480f..01d5c6d 100644 (file)
@@ -61,7 +61,7 @@ unevaluatedProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4780-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
     #include <dt-bindings/dma/jz4780-dma.h>
     mmc0: mmc@13450000 {
       compatible = "ingenic,jz4780-mmc";
index 89aa3ce..9de8ef6 100644 (file)
@@ -55,7 +55,7 @@ unevaluatedProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4780-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
     memory-controller@13410000 {
       compatible = "ingenic,jz4780-nemc";
       reg = <0x13410000 0x10000>;
index 29c5ef2..eab8ea3 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics Flexible Memory Controller 2 (FMC2) Bindings
 
 maintainers:
-  - Christophe Kerello <christophe.kerello@st.com>
+  - Christophe Kerello <christophe.kerello@foss.st.com>
 
 properties:
   compatible:
index d08a881..8e52b2e 100644 (file)
@@ -58,7 +58,7 @@ additionalProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/x1000-cgu.h>
+    #include <dt-bindings/clock/ingenic,x1000-cgu.h>
 
     mac: ethernet@134b0000 {
         compatible = "ingenic,x1000-mac";
index 282d774..7ae70dc 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Synopsys DesignWare MAC Device Tree Bindings
 
 maintainers:
-  - Alexandre Torgue <alexandre.torgue@st.com>
+  - Alexandre Torgue <alexandre.torgue@foss.st.com>
   - Giuseppe Cavallaro <peppe.cavallaro@st.com>
   - Jose Abreu <joabreu@synopsys.com>
 
index d3f05d5..577f4e2 100644 (file)
@@ -8,8 +8,8 @@ $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 title: STMicroelectronics STM32 / MCU DWMAC glue layer controller
 
 maintainers:
-  - Alexandre Torgue <alexandre.torgue@st.com>
-  - Christophe Roullier <christophe.roullier@st.com>
+  - Alexandre Torgue <alexandre.torgue@foss.st.com>
+  - Christophe Roullier <christophe.roullier@foss.st.com>
 
 description:
   This file documents platform glue layer for stmmac.
index 1485d3f..bf84768 100644 (file)
@@ -33,7 +33,7 @@ unevaluatedProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4780-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
 
     efuse@134100d0 {
         compatible = "ingenic,jz4780-efuse";
index 0b80ce2..a48c8fa 100644 (file)
@@ -13,7 +13,7 @@ description: |
   internal vref (VREFIN_CAL), unique device ID...
 
 maintainers:
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 
 allOf:
   - $ref: "nvmem.yaml#"
index 0fd93d7..5cab216 100644 (file)
@@ -46,7 +46,7 @@ additionalProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4770-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4770-cgu.h>
     otg_phy: usb-phy@3c {
       compatible = "ingenic,jz4770-phy";
       reg = <0x3c 0x10>;
index 2251283..267b695 100644 (file)
@@ -24,7 +24,7 @@ description:
   |_ UTMI switch_______|          OTG controller
 
 maintainers:
-  - Amelie Delaunay <amelie.delaunay@st.com>
+  - Amelie Delaunay <amelie.delaunay@foss.st.com>
 
 properties:
   compatible:
index dfee6d3..ac88e01 100644 (file)
@@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STM32 GPIO and Pin Mux/Config controller
 
 maintainers:
-  - Alexandre TORGUE <alexandre.torgue@st.com>
+  - Alexandre TORGUE <alexandre.torgue@foss.st.com>
 
 description: |
   STMicroelectronics's STM32 MCUs intregrate a GPIO and Pin mux/config hardware
index 9f1c703..df0191b 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 booster for ADC analog input switches bindings
 
 maintainers:
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 
 description: |
   Some STM32 devices embed a 3.3V booster supplied by Vdda, that can be used
index 3cd4a25..836d415 100644 (file)
@@ -12,7 +12,7 @@ description: |
   components through the dedicated VREF+ pin.
 
 maintainers:
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 
 allOf:
   - $ref: "regulator.yaml#"
index e6322bc..bd07b9c 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STM32MP1 PWR voltage regulators
 
 maintainers:
-  - Pascal Paillet <p.paillet@st.com>
+  - Pascal Paillet <p.paillet@foss.st.com>
 
 properties:
   compatible:
index d0aa91b..aaaaaba 100644 (file)
@@ -58,7 +58,7 @@ additionalProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4770-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4770-cgu.h>
 
     vpu: video-decoder@132a0000 {
       compatible = "ingenic,jz4770-vpu-rproc";
index 1e62256..b587c97 100644 (file)
@@ -11,8 +11,8 @@ description:
   boots firmwares on the ST32MP family chipset.
 
 maintainers:
-  - Fabien Dessenne <fabien.dessenne@st.com>
-  - Arnaud Pouliquen <arnaud.pouliquen@st.com>
+  - Fabien Dessenne <fabien.dessenne@foss.st.com>
+  - Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
 
 properties:
   compatible:
index 808f247..044d9a0 100644 (file)
@@ -32,7 +32,7 @@ additionalProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/x1830-cgu.h>
+    #include <dt-bindings/clock/ingenic,x1830-cgu.h>
 
     dtrng: trng@10072000 {
         compatible = "ingenic,x1830-dtrng";
index 82bb2e9..9a6e4ea 100644 (file)
@@ -11,7 +11,7 @@ description: |
   IP and is fully separated from other crypto functions.
 
 maintainers:
-  - Lionel Debieve <lionel.debieve@st.com>
+  - Lionel Debieve <lionel.debieve@foss.st.com>
 
 properties:
   compatible:
index 60e93e8..b235b24 100644 (file)
@@ -72,7 +72,7 @@ unevaluatedProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4740-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4740-cgu.h>
     rtc_dev: rtc@10003000 {
       compatible = "ingenic,jz4740-rtc";
       reg = <0x10003000 0x40>;
index 5456604..2359f54 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 Real Time Clock Bindings
 
 maintainers:
-  - Gabriel Fernandez <gabriel.fernandez@st.com>
+  - Gabriel Fernandez <gabriel.fernandez@foss.st.com>
 
 properties:
   compatible:
index b432d4d..9ca7a18 100644 (file)
@@ -71,7 +71,7 @@ unevaluatedProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4780-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4780-cgu.h>
     #include <dt-bindings/dma/jz4780-dma.h>
     #include <dt-bindings/gpio/gpio.h>
     serial@10032000 {
index f50f4ca..333dc42 100644 (file)
@@ -5,7 +5,7 @@ $id: http://devicetree.org/schemas/serial/st,stm32-uart.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
 maintainers:
-  - Erwan Le Ray <erwan.leray@st.com>
+  - Erwan Le Ray <erwan.leray@foss.st.com>
 
 title: STMicroelectronics STM32 USART bindings
 
index 0d87e2c..963a871 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: CS42L51 audio codec DT bindings
 
 maintainers:
-  - Olivier Moysan <olivier.moysan@st.com>
+  - Olivier Moysan <olivier.moysan@foss.st.com>
 
 properties:
   compatible:
index cdc0fda..d607325 100644 (file)
@@ -71,7 +71,7 @@ required:
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4740-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4740-cgu.h>
     aic: audio-controller@10020000 {
       compatible = "ingenic,jz4740-i2s";
       reg = <0x10020000 0x38>;
index 97d5f38..48aae54 100644 (file)
@@ -48,7 +48,7 @@ required:
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4740-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4740-cgu.h>
     codec: audio-codec@10020080 {
       compatible = "ingenic,jz4740-codec";
       reg = <0x10020080 0x8>;
index 6feb5a0..d3966ae 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 SPI/I2S Controller
 
 maintainers:
-  - Olivier Moysan <olivier.moysan@st.com>
+  - Olivier Moysan <olivier.moysan@foss.st.com>
 
 description:
   The SPI/I2S block supports I2S/PCM protocols when configured on I2S mode.
index f971324..1538d11 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 Serial Audio Interface (SAI)
 
 maintainers:
-  - Olivier Moysan <olivier.moysan@st.com>
+  - Olivier Moysan <olivier.moysan@foss.st.com>
 
 description:
   The SAI interface (Serial Audio Interface) offers a wide set of audio
index b7f7dc4..837e830 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 S/PDIF receiver (SPDIFRX)
 
 maintainers:
-  - Olivier Moysan <olivier.moysan@st.com>
+  - Olivier Moysan <olivier.moysan@foss.st.com>
 
 description: |
   The SPDIFRX peripheral, is designed to receive an S/PDIF flow compliant with
index cf56cc4..5b1c7a2 100644 (file)
@@ -55,7 +55,7 @@ unevaluatedProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4770-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4770-cgu.h>
     spi@10043000 {
       compatible = "ingenic,jz4770-spi", "ingenic,jz4750-spi";
       reg = <0x10043000 0x1c>;
diff --git a/Documentation/devicetree/bindings/spi/spi-xlp.txt b/Documentation/devicetree/bindings/spi/spi-xlp.txt
deleted file mode 100644 (file)
index f4925ec..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-SPI Master controller for Netlogic XLP MIPS64 SOCs
-==================================================
-
-Currently this SPI controller driver is supported for the following
-Netlogic XLP SoCs:
-       XLP832, XLP316, XLP208, XLP980, XLP532
-
-Required properties:
-- compatible           : Should be "netlogic,xlp832-spi".
-- #address-cells       : Number of cells required to define a chip select address
-                         on the SPI bus.
-- #size-cells          : Should be zero.
-- reg                  : Should contain register location and length.
-- clocks               : Phandle of the spi clock
-- interrupts           : Interrupt number used by this controller.
-
-SPI slave nodes must be children of the SPI master node and can contain
-properties described in Documentation/devicetree/bindings/spi/spi-bus.txt.
-
-Example:
-
-       spi: xlp_spi@3a100 {
-               compatible = "netlogic,xlp832-spi";
-               #address-cells = <1>;
-               #size-cells = <0>;
-               reg = <0 0x3a100 0x100>;
-               clocks = <&spi_clk>;
-               interrupts = <34>;
-               interrupt-parent = <&pic>;
-
-               spi_nor@1 {
-                       compatible = "spansion,s25sl12801";
-                       #address-cells = <1>;
-                       #size-cells = <1>;
-                       reg = <1>;      /* Chip Select */
-                       spi-max-frequency = <40000000>;
-               };
-};
index 983c4e5..6ec6f55 100644 (file)
@@ -7,8 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 Quad Serial Peripheral Interface (QSPI) bindings
 
 maintainers:
-  - Christophe Kerello <christophe.kerello@st.com>
-  - Patrice Chotard <patrice.chotard@st.com>
+  - Christophe Kerello <christophe.kerello@foss.st.com>
+  - Patrice Chotard <patrice.chotard@foss.st.com>
 
 allOf:
   - $ref: "spi-controller.yaml#"
index 2d9af4c..3d64bed 100644 (file)
@@ -13,8 +13,8 @@ description: |
   from 4 to 32-bit data size.
 
 maintainers:
-  - Erwan Leray <erwan.leray@st.com>
-  - Fabrice Gasnier <fabrice.gasnier@st.com>
+  - Erwan Leray <erwan.leray@foss.st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 
 allOf:
   - $ref: "spi-controller.yaml#"
index c0f59c5..bee41cf 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 digital thermal sensor (DTS) binding
 
 maintainers:
-  - David Hernandez Sanchez <david.hernandezsanchez@st.com>
+  - Pascal Paillet <p.paillet@foss.st.com>
 
 properties:
   compatible:
index df3eb76..98648bf 100644 (file)
@@ -46,7 +46,7 @@ additionalProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/x1000-cgu.h>
+    #include <dt-bindings/clock/ingenic,x1000-cgu.h>
 
     ost: timer@12000000 {
         compatible = "ingenic,x1000-ost";
index 8165df4..7fb37ea 100644 (file)
@@ -237,7 +237,7 @@ additionalProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4770-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4770-cgu.h>
     #include <dt-bindings/clock/ingenic,tcu.h>
     tcu: timer@10002000 {
       compatible = "ingenic,jz4770-tcu", "ingenic,jz4760-tcu", "simple-mfd";
index 176aa3c..937aa8a 100644 (file)
@@ -7,7 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 general-purpose 16 and 32 bits timers bindings
 
 maintainers:
-  - Benjamin Gaignard <benjamin.gaignard@st.com>
+  - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
+  - Patrice Chotard <patrice.chotard@foss.st.com>
 
 properties:
   compatible:
index f506225..5921235 100644 (file)
@@ -58,7 +58,7 @@ additionalProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/jz4740-cgu.h>
+    #include <dt-bindings/clock/ingenic,jz4740-cgu.h>
     usb_phy: usb-phy {
       compatible = "usb-nop-xceiv";
       #phy-cells = <0>;
index 9a51efa..ead1571 100644 (file)
@@ -7,7 +7,7 @@ $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 title: STMicroelectronics STUSB160x Type-C controller bindings
 
 maintainers:
-  - Amelie Delaunay <amelie.delaunay@st.com>
+  - Amelie Delaunay <amelie.delaunay@foss.st.com>
 
 properties:
   compatible:
index 44cad94..43afa24 100644 (file)
@@ -40,14 +40,12 @@ properties:
 
   clocks:
     minItems: 1
-    maxItems: 2
     items:
       - description: High-frequency oscillator input, divided internally
       - description: Low-frequency oscillator input, only found on some variants
 
   clock-names:
     minItems: 1
-    maxItems: 2
     items:
       - const: hosc
       - const: losc
index 481bf91..3973644 100644 (file)
@@ -7,8 +7,8 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: STMicroelectronics STM32 Independent WatchDoG (IWDG) bindings
 
 maintainers:
-  - Yannick Fertre <yannick.fertre@st.com>
-  - Christophe Roullier <christophe.roullier@st.com>
+  - Yannick Fertre <yannick.fertre@foss.st.com>
+  - Christophe Roullier <christophe.roullier@foss.st.com>
 
 allOf:
   - $ref: "watchdog.yaml#"
index ec3e71f..e445cb1 100644 (file)
@@ -27,7 +27,7 @@ Sphinx Install
 ==============
 
 The ReST markups currently used by the Documentation/ files are meant to be
-built with ``Sphinx`` version 1.3 or higher.
+built with ``Sphinx`` version 1.7 or higher.
 
 There's a script that checks for the Sphinx requirements. Please see
 :ref:`sphinx-pre-install` for further details.
@@ -43,10 +43,6 @@ or ``virtualenv``, depending on how your distribution packaged Python 3.
 
 .. note::
 
-   #) Sphinx versions below 1.5 don't work properly with Python's
-      docutils version 0.13.1 or higher. So, if you're willing to use
-      those versions, you should run ``pip install 'docutils==0.12'``.
-
    #) It is recommended to use the RTD theme for html output. Depending
       on the Sphinx version, it should be installed separately,
       with ``pip install sphinx_rtd_theme``.
@@ -55,13 +51,13 @@ or ``virtualenv``, depending on how your distribution packaged Python 3.
       those expressions are written using LaTeX notation. It needs texlive
       installed with amsfonts and amsmath in order to evaluate them.
 
-In summary, if you want to install Sphinx version 1.7.9, you should do::
+In summary, if you want to install Sphinx version 2.4.4, you should do::
 
-       $ virtualenv sphinx_1.7.9
-       $ . sphinx_1.7.9/bin/activate
-       (sphinx_1.7.9) $ pip install -r Documentation/sphinx/requirements.txt
+       $ virtualenv sphinx_2.4.4
+       $ . sphinx_2.4.4/bin/activate
+       (sphinx_2.4.4) $ pip install -r Documentation/sphinx/requirements.txt
 
-After running ``. sphinx_1.7.9/bin/activate``, the prompt will change,
+After running ``. sphinx_2.4.4/bin/activate``, the prompt will change,
 in order to indicate that you're using the new environment. If you
 open a new shell, you need to rerun this command to enter again at
 the virtual environment before building the documentation.
@@ -81,7 +77,7 @@ output.
 PDF and LaTeX builds
 --------------------
 
-Such builds are currently supported only with Sphinx versions 1.4 and higher.
+Such builds are currently supported only with Sphinx versions 2.4 and higher.
 
 For PDF and LaTeX output, you'll also need ``XeLaTeX`` version 3.14159265.
 
@@ -104,8 +100,8 @@ command line options for your distro::
        You should run:
 
                sudo dnf install -y texlive-luatex85
-               /usr/bin/virtualenv sphinx_1.7.9
-               . sphinx_1.7.9/bin/activate
+               /usr/bin/virtualenv sphinx_2.4.4
+               . sphinx_2.4.4/bin/activate
                pip install -r Documentation/sphinx/requirements.txt
 
        Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468.
index 681c6a4..4f49027 100644 (file)
@@ -35,7 +35,7 @@ This document describes only the kernel module and the interactions
 required with any user-space program.  Subsequent text refers to this
 as the "automount daemon" or simply "the daemon".
 
-"autofs" is a Linux kernel module with provides the "autofs"
+"autofs" is a Linux kernel module which provides the "autofs"
 filesystem type.  Several "autofs" filesystems can be mounted and they
 can each be managed separately, or all managed by the same daemon.
 
index 6f3c6e9..d7b8469 100644 (file)
@@ -197,10 +197,29 @@ fault_type=%d              Support configuring fault injection type, should be
                         FAULT_DISCARD            0x000002000
                         FAULT_WRITE_IO           0x000004000
                         FAULT_SLAB_ALLOC         0x000008000
+                        FAULT_DQUOT_INIT         0x000010000
                         ===================      ===========
 mode=%s                         Control block allocation mode which supports "adaptive"
                         and "lfs". In "lfs" mode, there should be no random
                         writes towards main area.
+                        "fragment:segment" and "fragment:block" are newly added here.
+                        These are developer options for experiments to simulate filesystem
+                        fragmentation/after-GC situation itself. The developers use these
+                        modes to understand filesystem fragmentation/after-GC condition well,
+                        and eventually get some insights to handle them better.
+                        In "fragment:segment", f2fs allocates a new segment in ramdom
+                        position. With this, we can simulate the after-GC condition.
+                        In "fragment:block", we can scatter block allocation with
+                        "max_fragment_chunk" and "max_fragment_hole" sysfs nodes.
+                        We added some randomness to both chunk and hole size to make
+                        it close to realistic IO pattern. So, in this mode, f2fs will allocate
+                        1..<max_fragment_chunk> blocks in a chunk and make a hole in the
+                        length of 1..<max_fragment_hole> by turns. With this, the newly
+                        allocated blocks will be scattered throughout the whole partition.
+                        Note that "fragment:block" implicitly enables "fragment:segment"
+                        option for more randomness.
+                        Please, use these options for your experiments and we strongly
+                        recommend to re-format the filesystem after using these options.
 io_bits=%u              Set the bit size of write IO requests. It should be set
                         with "mode=lfs".
 usrquota                Enable plain user disk quota accounting.
index 95ef56d..387fda8 100644 (file)
@@ -37,8 +37,7 @@ conn_reuse_mode - INTEGER
 
        0: disable any special handling on port reuse. The new
        connection will be delivered to the same real server that was
-       servicing the previous connection. This will effectively
-       disable expire_nodest_conn.
+       servicing the previous connection.
 
        bit 1: enable rescheduling of new connections when it is safe.
        That is, whenever expire_nodest_conn and for TCP sockets, when
index e35ab74..b398b85 100644 (file)
@@ -54,7 +54,7 @@ mcelog                 0.6              mcelog --version
 iptables               1.4.2            iptables -V
 openssl & libcrypto    1.0.0            openssl version
 bc                     1.06.95          bc --version
-Sphinx\ [#f1]_        1.3              sphinx-build --version
+Sphinx\ [#f1]_         1.7              sphinx-build --version
 ====================== ===============  ========================================
 
 .. [#f1] Sphinx is needed only to build the Kernel documentation
index a0cc969..da085d6 100644 (file)
@@ -22,8 +22,8 @@ use it, it will make your life as a kernel developer and in general much
 easier.
 
 Some subsystems and maintainer trees have additional information about
-their workflow and expectations, see :ref:`Documentation/process/maintainer
-handbooks <maintainer_handbooks_main>`.
+their workflow and expectations, see
+:ref:`Documentation/process/maintainer-handbooks.rst <maintainer_handbooks_main>`.
 
 Obtain a current source tree
 ----------------------------
index 4e5b26f..b3166c4 100644 (file)
@@ -2442,11 +2442,10 @@ Or this simple script!
   #!/bin/bash
 
   tracefs=`sed -ne 's/^tracefs \(.*\) tracefs.*/\1/p' /proc/mounts`
-  echo nop > $tracefs/tracing/current_tracer
-  echo 0 > $tracefs/tracing/tracing_on
-  echo $$ > $tracefs/tracing/set_ftrace_pid
-  echo function > $tracefs/tracing/current_tracer
-  echo 1 > $tracefs/tracing/tracing_on
+  echo 0 > $tracefs/tracing_on
+  echo $$ > $tracefs/set_ftrace_pid
+  echo function > $tracefs/current_tracer
+  echo 1 > $tracefs/tracing_on
   exec "$@"
 
 
index 0046d75..9762452 100644 (file)
@@ -35,7 +35,7 @@ Installazione Sphinx
 ====================
 
 I marcatori ReST utilizzati nei file in Documentation/ sono pensati per essere
-processati da ``Sphinx`` nella versione 1.3 o superiore.
+processati da ``Sphinx`` nella versione 1.7 o superiore.
 
 Esiste uno script che verifica i requisiti Sphinx. Per ulteriori dettagli
 consultate :ref:`it_sphinx-pre-install`.
@@ -53,11 +53,6 @@ pacchettizzato dalla vostra distribuzione.
 
 .. note::
 
-   #) Le versioni di Sphinx inferiori alla 1.5 non funzionano bene
-      con il pacchetto Python docutils versione 0.13.1 o superiore.
-      Se volete usare queste versioni, allora dovere eseguire
-      ``pip install 'docutils==0.12'``.
-
    #) Viene raccomandato l'uso del tema RTD per la documentazione in HTML.
       A seconda della versione di Sphinx, potrebbe essere necessaria
       l'installazione tramite il comando ``pip install sphinx_rtd_theme``.
@@ -67,13 +62,13 @@ pacchettizzato dalla vostra distribuzione.
       utilizzando LaTeX. Per una corretta interpretazione, è necessario aver
       installato texlive con i pacchetti amdfonts e amsmath.
 
-Riassumendo, se volete installare la versione 1.7.9 di Sphinx dovete eseguire::
+Riassumendo, se volete installare la versione 2.4.4 di Sphinx dovete eseguire::
 
-       $ virtualenv sphinx_1.7.9
-       $ . sphinx_1.7.9/bin/activate
-       (sphinx_1.7.9) $ pip install -r Documentation/sphinx/requirements.txt
+       $ virtualenv sphinx_2.4.4
+       $ . sphinx_2.4.4/bin/activate
+       (sphinx_2.4.4) $ pip install -r Documentation/sphinx/requirements.txt
 
-Dopo aver eseguito ``. sphinx_1.7.9/bin/activate``, il prompt cambierà per
+Dopo aver eseguito ``. sphinx_2.4.4/bin/activate``, il prompt cambierà per
 indicare che state usando il nuovo ambiente. Se aprite un nuova sessione,
 prima di generare la documentazione, dovrete rieseguire questo comando per
 rientrare nell'ambiente virtuale.
@@ -94,7 +89,7 @@ Generazione in PDF e LaTeX
 --------------------------
 
 Al momento, la generazione di questi documenti è supportata solo dalle
-versioni di Sphinx superiori alla 1.4.
+versioni di Sphinx superiori alla 2.4.
 
 Per la generazione di PDF e LaTeX, avrete bisogno anche del pacchetto
 ``XeLaTeX`` nella versione 3.14159265
@@ -119,8 +114,8 @@ l'installazione::
        You should run:
 
                sudo dnf install -y texlive-luatex85
-               /usr/bin/virtualenv sphinx_1.7.9
-               . sphinx_1.7.9/bin/activate
+               /usr/bin/virtualenv sphinx_2.4.4
+               . sphinx_2.4.4/bin/activate
                pip install -r Documentation/sphinx/requirements.txt
 
        Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468.
index 87d0818..dc71933 100644 (file)
@@ -57,7 +57,7 @@ mcelog                 0.6                mcelog --version
 iptables               1.4.2              iptables -V
 openssl & libcrypto    1.0.0              openssl version
 bc                     1.06.95            bc --version
-Sphinx\ [#f1]_         1.3                sphinx-build --version
+Sphinx\ [#f1]_         1.7                sphinx-build --version
 ====================== =================  ========================================
 
 .. [#f1] Sphinx è necessario solo per produrre la documentazione del Kernel
index 951595c..23eac67 100644 (file)
@@ -26,7 +26,7 @@ reStructuredText文件可能包含包含来自源文件的结构化文档注释
 安装Sphinx
 ==========
 
-Documentation/ 下的ReST文件现在使用sphinx1.3或更高版本构建。
+Documentation/ 下的ReST文件现在使用sphinx1.7或更高版本构建。
 
 这有一个脚本可以检查Sphinx的依赖项。更多详细信息见
 :ref:`sphinx-pre-install_zh` 。
@@ -40,22 +40,19 @@ Documentation/ 下的ReST文件现在使用sphinx1.3或更高版本构建。
 
 .. note::
 
-   #) 低于1.5版本的Sphinx无法与Python的0.13.1或更高版本docutils一起正常工作。
-      如果您想使用这些版本,那么应该运行 ``pip install 'docutils==0.12'`` 。
-
    #) html输出建议使用RTD主题。根据Sphinx版本的不同,它应该用
       ``pip install sphinx_rtd_theme`` 单独安装。
 
    #) 一些ReST页面包含数学表达式。由于Sphinx的工作方式,这些表达式是使用 LaTeX
       编写的。它需要安装amsfonts和amsmath宏包,以便显示。
 
-总之,如您要安装Sphinx 1.7.9版本,应执行::
+总之,如您要安装Sphinx 2.4.4版本,应执行::
 
-       $ virtualenv sphinx_1.7.9
-       $ . sphinx_1.7.9/bin/activate
-       (sphinx_1.7.9) $ pip install -r Documentation/sphinx/requirements.txt
+       $ virtualenv sphinx_2.4.4
+       $ . sphinx_2.4.4/bin/activate
+       (sphinx_2.4.4) $ pip install -r Documentation/sphinx/requirements.txt
 
-在运行 ``. sphinx_1.7.9/bin/activate`` 之后,提示符将变化,以指示您正在使用新
+在运行 ``. sphinx_2.4.4/bin/activate`` 之后,提示符将变化,以指示您正在使用新
 环境。如果您打开了一个新的shell,那么在构建文档之前,您需要重新运行此命令以再
 次进入虚拟环境中。
 
@@ -71,7 +68,7 @@ Documentation/ 下的ReST文件现在使用sphinx1.3或更高版本构建。
 PDF和LaTeX构建
 --------------
 
-目前只有Sphinx 1.4及更高版本才支持这种构建。
+目前只有Sphinx 2.4及更高版本才支持这种构建。
 
 对于PDF和LaTeX输出,还需要 ``XeLaTeX`` 3.14159265版本。(译注:此版本号真实
 存在)
@@ -93,8 +90,8 @@ PDF和LaTeX构建
        You should run:
 
                sudo dnf install -y texlive-luatex85
-               /usr/bin/virtualenv sphinx_1.7.9
-               . sphinx_1.7.9/bin/activate
+               /usr/bin/virtualenv sphinx_2.4.4
+               . sphinx_2.4.4/bin/activate
                pip install -r Documentation/sphinx/requirements.txt
 
        Can't build as 1 mandatory dependency is missing at ./scripts/sphinx-pre-install line 468.
index c6a5bb2..8053ae4 100644 (file)
@@ -36,14 +36,14 @@ Linux内核管理风格
 每个人都认为管理者做决定,而且决策很重要。决定越大越痛苦,管理者就必须越高级。
 这很明显,但事实并非如此。
 
¸¸æ\88\8fç\9a\84å\90\8då­\97是 **避免** 做出决定。尤其是,如果有人告诉你“选择(a)或(b),
\9c\80é\87\8dè¦\81ç\9a\84是 **避免** 做出决定。尤其是,如果有人告诉你“选择(a)或(b),
 我们真的需要你来做决定”,你就是陷入麻烦的管理者。你管理的人比你更了解细节,
 所以如果他们来找你做技术决策,你完蛋了。你显然没有能力为他们做这个决定。
 
 (推论:如果你管理的人不比你更了解细节,你也会被搞砸,尽管原因完全不同。
 也就是说,你的工作是错的,他们应该管理你的才智)
 
\89\80以游æ\88\8fç\9a\84å\90\8då­\97是 **避免** 做出决定,至少是那些大而痛苦的决定。做一些小的
\89\80以æ\9c\80é\87\8dè¦\81ç\9a\84是 **避免** 做出决定,至少是那些大而痛苦的决定。做一些小的
 和非结果性的决定是很好的,并且使您看起来好像知道自己在做什么,所以内核管理者
 需要做的是将那些大的和痛苦的决定变成那些没有人真正关心的小事情。
 
index 3b093d6..aeeb071 100644 (file)
@@ -6911,6 +6911,20 @@ MAP_SHARED mmap will result in an -EINVAL return.
 When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to
 perform a bulk copy of tags to/from the guest.
 
+7.29 KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM
+-------------------------------------
+
+Architectures: x86 SEV enabled
+Type: vm
+Parameters: args[0] is the fd of the source vm
+Returns: 0 on success
+
+This capability enables userspace to migrate the encryption context from the VM
+indicated by the fd to the VM this is called on.
+
+This is intended to support intra-host migration of VMs between userspace VMMs,
+upgrading the VMM process without interrupting the guest.
+
 8. Other capabilities.
 ======================
 
index 65de3f0..5cec7fb 100644 (file)
@@ -63,3 +63,12 @@ kernel sends SIGILL to the application. If the process has permission then
 the handler allocates a larger xstate buffer for the task so the large
 state can be context switched. In the unlikely cases that the allocation
 fails, the kernel sends SIGSEGV.
+
+Dynamic features in signal frames
+---------------------------------
+
+Dynamcally enabled features are not written to the signal frame upon signal
+entry if the feature is in its initial configuration.  This differs from
+non-dynamic features which are always written regardless of their
+configuration.  Signal handlers can examine the XSAVE buffer's XSTATE_BV
+field to determine if a features was written.
index 4c74516..2f1e783 100644 (file)
@@ -3733,7 +3733,7 @@ F:        drivers/scsi/bnx2i/
 BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER
 M:     Ariel Elior <aelior@marvell.com>
 M:     Sudarsana Kalluru <skalluru@marvell.com>
-M:     GR-everest-linux-l2@marvell.com
+M:     Manish Chopra <manishc@marvell.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/broadcom/bnx2x/
@@ -4676,11 +4676,10 @@ COCCINELLE/Semantic Patches (SmPL)
 M:     Julia Lawall <Julia.Lawall@inria.fr>
 M:     Gilles Muller <Gilles.Muller@inria.fr>
 M:     Nicolas Palix <nicolas.palix@imag.fr>
-M:     Michal Marek <michal.lkml@markovi.net>
-L:     cocci@systeme.lip6.fr (moderated for non-subscribers)
+L:     cocci@inria.fr (moderated for non-subscribers)
 S:     Supported
-W:     http://coccinelle.lip6.fr/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git misc
+W:     https://coccinelle.gitlabpages.inria.fr/website/
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/jlawall/linux.git
 F:     Documentation/dev-tools/coccinelle.rst
 F:     scripts/coccicheck
 F:     scripts/coccinelle/
@@ -14413,7 +14412,9 @@ M:      Juergen Gross <jgross@suse.com>
 M:     Deep Shah <sdeep@vmware.com>
 M:     "VMware, Inc." <pv-drivers@vmware.com>
 L:     virtualization@lists.linux-foundation.org
+L:     x86@kernel.org
 S:     Supported
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core
 F:     Documentation/virt/paravirt_ops.rst
 F:     arch/*/include/asm/paravirt*.h
 F:     arch/*/kernel/paravirt*
@@ -15592,7 +15593,7 @@ F:      drivers/scsi/qedi/
 
 QLOGIC QL4xxx ETHERNET DRIVER
 M:     Ariel Elior <aelior@marvell.com>
-M:     GR-everest-linux-l2@marvell.com
+M:     Manish Chopra <manishc@marvell.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/qlogic/qed/
@@ -18482,6 +18483,7 @@ F:      include/uapi/linux/pkt_sched.h
 F:     include/uapi/linux/tc_act/
 F:     include/uapi/linux/tc_ematch/
 F:     net/sched/
+F:     tools/testing/selftests/tc-testing
 
 TC90522 MEDIA DRIVER
 M:     Akihiro Tsukada <tskd08@gmail.com>
@@ -21079,6 +21081,18 @@ F:     Documentation/vm/zsmalloc.rst
 F:     include/linux/zsmalloc.h
 F:     mm/zsmalloc.c
 
+ZSTD
+M:     Nick Terrell <terrelln@fb.com>
+S:     Maintained
+B:     https://github.com/facebook/zstd/issues
+T:     git git://github.com/terrelln/linux.git
+F:     include/linux/zstd*
+F:     lib/zstd/
+F:     lib/decompress_unzstd.c
+F:     crypto/zstd.c
+N:     zstd
+K:     zstd
+
 ZSWAP COMPRESSED SWAP CACHING
 M:     Seth Jennings <sjenning@redhat.com>
 M:     Dan Streetman <ddstreet@ieee.org>
index d83d72c..9e12c14 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 5
-PATCHLEVEL = 15
+PATCHLEVEL = 16
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
 NAME = Trick or Treat
 
 # *DOCUMENTATION*
@@ -789,7 +789,7 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG)      := -fstack-protector-strong
 KBUILD_CFLAGS += $(stackp-flags-y)
 
 KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror
-KBUILD_CFLAGS += $(KBUILD_CFLAGS-y)
+KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)
 
 ifdef CONFIG_CC_IS_CLANG
 KBUILD_CPPFLAGS += -Qunused-arguments
@@ -801,10 +801,6 @@ KBUILD_CFLAGS += -Wno-gnu
 KBUILD_CFLAGS += -mno-global-merge
 else
 
-# Warn about unmarked fall-throughs in switch statement.
-# Disabled for clang while comment to attribute conversion happens and
-# https://github.com/ClangBuiltLinux/linux/issues/636 is discussed.
-KBUILD_CFLAGS += $(call cc-option,-Wimplicit-fallthrough=5,)
 # gcc inanely warns about local variables called 'main'
 KBUILD_CFLAGS += -Wno-main
 endif
index a522716..77172d5 100644 (file)
@@ -60,15 +60,15 @@ KBUILD_CFLAGS       += $(call cc-option,-fno-ipa-sra)
 # Note that GCC does not numerically define an architecture version
 # macro, but instead defines a whole series of macros which makes
 # testing for a specific architecture or later rather impossible.
-arch-$(CONFIG_CPU_32v7M)       =-D__LINUX_ARM_ARCH__=7 -march=armv7-m -Wa,-march=armv7-m
-arch-$(CONFIG_CPU_32v7)                =-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a)
-arch-$(CONFIG_CPU_32v6)                =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6)
+arch-$(CONFIG_CPU_32v7M)       =-D__LINUX_ARM_ARCH__=7 -march=armv7-m
+arch-$(CONFIG_CPU_32v7)                =-D__LINUX_ARM_ARCH__=7 -march=armv7-a
+arch-$(CONFIG_CPU_32v6)                =-D__LINUX_ARM_ARCH__=6 -march=armv6
 # Only override the compiler option if ARMv6. The ARMv6K extensions are
 # always available in ARMv7
 ifeq ($(CONFIG_CPU_32v6),y)
-arch-$(CONFIG_CPU_32v6K)       =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6k,-march=armv5t -Wa$(comma)-march=armv6k)
+arch-$(CONFIG_CPU_32v6K)       =-D__LINUX_ARM_ARCH__=6 -march=armv6k
 endif
-arch-$(CONFIG_CPU_32v5)                =-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t)
+arch-$(CONFIG_CPU_32v5)                =-D__LINUX_ARM_ARCH__=5 -march=armv5te
 arch-$(CONFIG_CPU_32v4T)       =-D__LINUX_ARM_ARCH__=4 -march=armv4t
 arch-$(CONFIG_CPU_32v4)                =-D__LINUX_ARM_ARCH__=4 -march=armv4
 arch-$(CONFIG_CPU_32v3)                =-D__LINUX_ARM_ARCH__=3 -march=armv3m
@@ -82,7 +82,7 @@ tune-$(CONFIG_CPU_ARM720T)    =-mtune=arm7tdmi
 tune-$(CONFIG_CPU_ARM740T)     =-mtune=arm7tdmi
 tune-$(CONFIG_CPU_ARM9TDMI)    =-mtune=arm9tdmi
 tune-$(CONFIG_CPU_ARM940T)     =-mtune=arm9tdmi
-tune-$(CONFIG_CPU_ARM946E)     =$(call cc-option,-mtune=arm9e,-mtune=arm9tdmi)
+tune-$(CONFIG_CPU_ARM946E)     =-mtune=arm9e
 tune-$(CONFIG_CPU_ARM920T)     =-mtune=arm9tdmi
 tune-$(CONFIG_CPU_ARM922T)     =-mtune=arm9tdmi
 tune-$(CONFIG_CPU_ARM925T)     =-mtune=arm9tdmi
@@ -90,11 +90,11 @@ tune-$(CONFIG_CPU_ARM926T)  =-mtune=arm9tdmi
 tune-$(CONFIG_CPU_FA526)       =-mtune=arm9tdmi
 tune-$(CONFIG_CPU_SA110)       =-mtune=strongarm110
 tune-$(CONFIG_CPU_SA1100)      =-mtune=strongarm1100
-tune-$(CONFIG_CPU_XSCALE)      =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
-tune-$(CONFIG_CPU_XSC3)                =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
-tune-$(CONFIG_CPU_FEROCEON)    =$(call cc-option,-mtune=marvell-f,-mtune=xscale)
-tune-$(CONFIG_CPU_V6)          =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
-tune-$(CONFIG_CPU_V6K)         =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm)
+tune-$(CONFIG_CPU_XSCALE)      =-mtune=xscale
+tune-$(CONFIG_CPU_XSC3)                =-mtune=xscale
+tune-$(CONFIG_CPU_FEROCEON)    =-mtune=xscale
+tune-$(CONFIG_CPU_V6)          =-mtune=arm1136j-s
+tune-$(CONFIG_CPU_V6K)         =-mtune=arm1136j-s
 
 # Evaluate tune cc-option calls now
 tune-y := $(tune-y)
index a4e0060..274e4f7 100644 (file)
@@ -390,9 +390,9 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
        BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) < FIXADDR_START);
        BUG_ON(idx >= __end_of_fixed_addresses);
 
-       /* we only support device mappings until pgprot_kernel has been set */
+       /* We support only device mappings before pgprot_kernel is set. */
        if (WARN_ON(pgprot_val(prot) != pgprot_val(FIXMAP_PAGE_IO) &&
-                   pgprot_val(pgprot_kernel) == 0))
+                   pgprot_val(prot) && pgprot_val(pgprot_kernel) == 0))
                return;
 
        if (pgprot_val(prot))
index a305ce2..d52a0b2 100644 (file)
@@ -68,6 +68,7 @@
 #define ESR_ELx_EC_MAX         (0x3F)
 
 #define ESR_ELx_EC_SHIFT       (26)
+#define ESR_ELx_EC_WIDTH       (6)
 #define ESR_ELx_EC_MASK                (UL(0x3F) << ESR_ELx_EC_SHIFT)
 #define ESR_ELx_EC(esr)                (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT)
 
index 4be8486..2a5f7f3 100644 (file)
@@ -584,7 +584,7 @@ struct kvm_vcpu_stat {
        u64 exits;
 };
 
-int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
+void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
index f5490af..e4727dc 100644 (file)
@@ -223,7 +223,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                r = 1;
                break;
        case KVM_CAP_NR_VCPUS:
-               r = num_online_cpus();
+               /*
+                * ARM64 treats KVM_CAP_NR_CPUS differently from all other
+                * architectures, as it does not always bound it to
+                * KVM_CAP_MAX_VCPUS. It should not matter much because
+                * this is just an advisory value.
+                */
+               r = min_t(unsigned int, num_online_cpus(),
+                         kvm_arm_default_max_vcpus());
                break;
        case KVM_CAP_MAX_VCPUS:
        case KVM_CAP_MAX_VCPU_ID:
@@ -1389,12 +1396,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
                return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
        }
        case KVM_ARM_PREFERRED_TARGET: {
-               int err;
                struct kvm_vcpu_init init;
 
-               err = kvm_vcpu_preferred_target(&init);
-               if (err)
-                       return err;
+               kvm_vcpu_preferred_target(&init);
 
                if (copy_to_user(argp, &init, sizeof(init)))
                        return -EFAULT;
index 5ce26be..e116c77 100644 (file)
@@ -869,13 +869,10 @@ u32 __attribute_const__ kvm_target_cpu(void)
        return KVM_ARM_TARGET_GENERIC_V8;
 }
 
-int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
+void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
 {
        u32 target = kvm_target_cpu();
 
-       if (target < 0)
-               return -ENODEV;
-
        memset(init, 0, sizeof(*init));
 
        /*
@@ -885,8 +882,6 @@ int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
         * target type.
         */
        init->target = (__u32)target;
-
-       return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
index 9aa9b73..b6b6801 100644 (file)
@@ -44,7 +44,7 @@
 el1_sync:                              // Guest trapped into EL2
 
        mrs     x0, esr_el2
-       lsr     x0, x0, #ESR_ELx_EC_SHIFT
+       ubfx    x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
        cmp     x0, #ESR_ELx_EC_HVC64
        ccmp    x0, #ESR_ELx_EC_HVC32, #4, ne
        b.ne    el1_trap
index 0c6116d..3d613e7 100644 (file)
@@ -141,7 +141,7 @@ SYM_FUNC_END(__host_hvc)
 .L__vect_start\@:
        stp     x0, x1, [sp, #-16]!
        mrs     x0, esr_el2
-       lsr     x0, x0, #ESR_ELx_EC_SHIFT
+       ubfx    x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH
        cmp     x0, #ESR_ELx_EC_HVC64
        b.eq    __host_hvc
        b       __host_exit
index 862c7b5..578f717 100644 (file)
@@ -178,7 +178,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
 
        phys = kvm_pte_to_phys(pte);
        if (!addr_is_memory(phys))
-               return 0;
+               return -EINVAL;
 
        /*
         * Adjust the host stage-2 mappings to match the ownership attributes
@@ -207,8 +207,18 @@ static int finalize_host_mappings(void)
                .cb     = finalize_host_mappings_walker,
                .flags  = KVM_PGTABLE_WALK_LEAF,
        };
+       int i, ret;
+
+       for (i = 0; i < hyp_memblock_nr; i++) {
+               struct memblock_region *reg = &hyp_memory[i];
+               u64 start = (u64)hyp_phys_to_virt(reg->base);
+
+               ret = kvm_pgtable_walk(&pkvm_pgtable, start, reg->size, &walker);
+               if (ret)
+                       return ret;
+       }
 
-       return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), &walker);
+       return 0;
 }
 
 void __noreturn __pkvm_init_finalise(void)
index 3787ee6..792cf6e 100644 (file)
@@ -474,7 +474,7 @@ bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
        return true;
 }
 
-/**
+/*
  * Handler for protected VM restricted exceptions.
  *
  * Inject an undefined exception into the guest and return true to indicate that
index 2c57994..30193bc 100644 (file)
@@ -37,4 +37,4 @@ platform-$(CONFIG_MACH_TX49XX)                += txx9/
 platform-$(CONFIG_MACH_VR41XX)         += vr41xx/
 
 # include the platform specific files
-include $(patsubst %, $(srctree)/arch/mips/%/Platform, $(platform-y))
+include $(patsubst %/, $(srctree)/arch/mips/%/Platform, $(platform-y))
index 8651074..de60ad1 100644 (file)
@@ -292,6 +292,8 @@ config BMIPS_GENERIC
        select USB_OHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
        select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
        select HARDIRQS_SW_RESEND
+       select HAVE_PCI
+       select PCI_DRIVERS_GENERIC
        help
          Build a generic DT-based kernel image that boots on select
          BCM33xx cable modem chips, BCM63xx DSL chips, and BCM7xxx set-top
@@ -333,6 +335,9 @@ config BCM63XX
        select SYS_SUPPORTS_32BIT_KERNEL
        select SYS_SUPPORTS_BIG_ENDIAN
        select SYS_HAS_EARLY_PRINTK
+       select SYS_HAS_CPU_BMIPS32_3300
+       select SYS_HAS_CPU_BMIPS4350
+       select SYS_HAS_CPU_BMIPS4380
        select SWAP_IO_SPACE
        select GPIOLIB
        select MIPS_L1_CACHE_SHIFT_4
index e036fc0..ace7f03 100644 (file)
@@ -253,7 +253,9 @@ endif
 #
 # Board-dependent options and extra files
 #
+ifdef need-compiler
 include $(srctree)/arch/mips/Kbuild.platforms
+endif
 
 ifdef CONFIG_PHYSICAL_START
 load-y                                 = $(CONFIG_PHYSICAL_START)
index 5a3e325..1c91064 100644 (file)
@@ -381,6 +381,12 @@ void clk_disable(struct clk *clk)
 
 EXPORT_SYMBOL(clk_disable);
 
+struct clk *clk_get_parent(struct clk *clk)
+{
+       return NULL;
+}
+EXPORT_SYMBOL(clk_get_parent);
+
 unsigned long clk_get_rate(struct clk *clk)
 {
        if (!clk)
diff --git a/arch/mips/boot/compressed/.gitignore b/arch/mips/boot/compressed/.gitignore
deleted file mode 100644 (file)
index d358395..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-ashldi3.c
-bswapsi.c
index 3548b3b..2861a05 100644 (file)
@@ -50,19 +50,9 @@ vmlinuzobjs-$(CONFIG_MIPS_ALCHEMY)              += $(obj)/uart-alchemy.o
 vmlinuzobjs-$(CONFIG_ATH79)                       += $(obj)/uart-ath79.o
 endif
 
-extra-y += uart-ath79.c
-$(obj)/uart-ath79.c: $(srctree)/arch/mips/ath79/early_printk.c
-       $(call cmd,shipped)
-
 vmlinuzobjs-$(CONFIG_KERNEL_XZ) += $(obj)/ashldi3.o
 
-extra-y += ashldi3.c
-$(obj)/ashldi3.c: $(obj)/%.c: $(srctree)/lib/%.c FORCE
-       $(call if_changed,shipped)
-
-extra-y += bswapsi.c
-$(obj)/bswapsi.c: $(obj)/%.c: $(srctree)/arch/mips/lib/%.c FORCE
-       $(call if_changed,shipped)
+vmlinuzobjs-$(CONFIG_KERNEL_ZSTD) += $(obj)/bswapdi.o
 
 targets := $(notdir $(vmlinuzobjs-y))
 
diff --git a/arch/mips/boot/compressed/ashldi3.c b/arch/mips/boot/compressed/ashldi3.c
new file mode 100644 (file)
index 0000000..f7bf6a7
--- /dev/null
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../../../lib/ashldi3.c"
diff --git a/arch/mips/boot/compressed/bswapdi.c b/arch/mips/boot/compressed/bswapdi.c
new file mode 100644 (file)
index 0000000..acb28ae
--- /dev/null
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../lib/bswapdi.c"
diff --git a/arch/mips/boot/compressed/bswapsi.c b/arch/mips/boot/compressed/bswapsi.c
new file mode 100644 (file)
index 0000000..fdb9c64
--- /dev/null
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../lib/bswapsi.c"
diff --git a/arch/mips/boot/compressed/uart-ath79.c b/arch/mips/boot/compressed/uart-ath79.c
new file mode 100644 (file)
index 0000000..d686820
--- /dev/null
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../ath79/early_printk.c"
index a1f0b71..0c6a5a4 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <dt-bindings/clock/jz4725b-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4725b-cgu.h>
 #include <dt-bindings/clock/ingenic,tcu.h>
 
 / {
index c1afdfd..772542e 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <dt-bindings/clock/jz4740-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4740-cgu.h>
 #include <dt-bindings/clock/ingenic,tcu.h>
 
 / {
index 05c00b9..dfe7432 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <dt-bindings/clock/jz4770-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4770-cgu.h>
 #include <dt-bindings/clock/ingenic,tcu.h>
 
 / {
index 28adc3d..b0a4e2e 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <dt-bindings/clock/jz4780-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4780-cgu.h>
 #include <dt-bindings/clock/ingenic,tcu.h>
 #include <dt-bindings/dma/jz4780-dma.h>
 
index dec7909..8bd27ed 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <dt-bindings/clock/ingenic,tcu.h>
-#include <dt-bindings/clock/x1000-cgu.h>
+#include <dt-bindings/clock/ingenic,x1000-cgu.h>
 #include <dt-bindings/dma/x1000-dma.h>
 
 / {
index 215257f..2595df8 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <dt-bindings/clock/ingenic,tcu.h>
-#include <dt-bindings/clock/x1830-cgu.h>
+#include <dt-bindings/clock/ingenic,x1830-cgu.h>
 #include <dt-bindings/dma/x1830-dma.h>
 
 / {
index 625bd2d..5956fb9 100644 (file)
@@ -1,6 +1,7 @@
 # CONFIG_LOCALVERSION_AUTO is not set
 # CONFIG_SWAP is not set
 CONFIG_NO_HZ=y
+CONFIG_HZ=1000
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
 # CONFIG_VM_EVENT_COUNTERS is not set
@@ -8,17 +9,34 @@ CONFIG_EXPERT=y
 CONFIG_BMIPS_GENERIC=y
 CONFIG_CPU_LITTLE_ENDIAN=y
 CONFIG_HIGHMEM=y
+CONFIG_HIGH_RES_TIMERS=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=4
+CONFIG_CC_STACKPROTECTOR_STRONG=y
 # CONFIG_SECCOMP is not set
 CONFIG_MIPS_O32_FP64_SUPPORT=y
+# CONFIG_RD_GZIP is not set
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+CONFIG_RD_XZ=y
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_IOSCHED_CFQ is not set
+CONFIG_PCI=y
+CONFIG_PCI_MSI=y
+CONFIG_PCIEASPM_POWERSAVE=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCIE_BRCMSTB=y
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_FREQ_STAT_DETAILS=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=y
 CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_GOV_ONDEMAND=y
 CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
-CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
 CONFIG_BMIPS_CPUFREQ=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_NET=y
@@ -32,32 +50,99 @@ CONFIG_INET=y
 # CONFIG_INET_DIAG is not set
 CONFIG_CFG80211=y
 CONFIG_NL80211_TESTMODE=y
+CONFIG_WIRELESS=y
 CONFIG_MAC80211=y
+CONFIG_NL80211=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 # CONFIG_STANDALONE is not set
 # CONFIG_PREVENT_FIRMWARE_BUILD is not set
+CONFIG_BRCMSTB_GISB_ARB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_FORCE_LOAD=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_INET_UDP_DIAG=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_BIC=y
+# CONFIG_TCP_CONG_WESTWOOD is not set
+# CONFIG_TCP_CONG_HTCP is not set
+# CONFIG_IPV6 is not set
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_XTABLES=y
+CONFIG_BRIDGE=y
+CONFIG_BRIDGE_NETFILTER=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_NET_DSA=y
+CONFIG_NET_SWITCHDEV=y
+CONFIG_DMA_CMA=y
+CONFIG_CMA_ALIGNMENT=12
+CONFIG_SPI=y
+CONFIG_SPI_BRCMSTB=y
 CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
 CONFIG_MTD_CFI=y
+CONFIG_MTD_JEDECPROBE=y
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_CFI_STAA=y
+CONFIG_MTD_ROM=y
+CONFIG_MTD_ABSENT=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_M25P80=y
+CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_BRCMNAND=y
+CONFIG_MTD_SPI_NOR=y
+# CONFIG_MTD_SPI_NOR_USE_4K_SECTORS is not set
+CONFIG_MTD_UBI=y
+CONFIG_MTD_UBI_GLUEBI=y
+CONFIG_PROC_DEVICETREE=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=8192
 # CONFIG_BLK_DEV is not set
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_SCSI_MULTI_LUN=y
 # CONFIG_SCSI_LOWLEVEL is not set
 CONFIG_NETDEVICES=y
+CONFIG_VLAN_8021Q=y
+CONFIG_MACVLAN=y
 CONFIG_BCMGENET=y
 CONFIG_USB_USBNET=y
-# CONFIG_INPUT is not set
+CONFIG_INPUT_EVDEV=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_UINPUT=y
 # CONFIG_SERIO is not set
-# CONFIG_VT is not set
+CONFIG_VT=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_BRCMSTB=y
 CONFIG_POWER_RESET_SYSCON=y
 CONFIG_POWER_SUPPLY=y
 # CONFIG_HWMON is not set
@@ -69,22 +154,76 @@ CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_OHCI_HCD_PLATFORM=y
 CONFIG_USB_STORAGE=y
 CONFIG_SOC_BRCMSTB=y
+CONFIG_MMC=y
+CONFIG_MMC_BLOCK_MINORS=16
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
 # CONFIG_DNOTIFY is not set
+CONFIG_PROC_KCORE=y
+CONFIG_CIFS=y
+CONFIG_JBD2_DEBUG=y
 CONFIG_FUSE_FS=y
+CONFIG_FHANDLE=y
+CONFIG_CGROUPS=y
+CONFIG_CUSE=y
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_ZISOFS=y
+CONFIG_UDF_FS=y
+CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
-CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_UBIFS_FS=y
+CONFIG_SQUASHFS=y
+CONFIG_SQUASHFS_LZO=y
+CONFIG_SQUASHFS_XZ=y
 CONFIG_NFS_FS=y
-CONFIG_CIFS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFS_V4_1=y
+CONFIG_NFS_V4_2=y
+CONFIG_ROOT_NFS=y
 CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ASCII=y
 CONFIG_NLS_ISO8859_1=y
-# CONFIG_CRYPTO_HW is not set
 CONFIG_PRINTK_TIME=y
+CONFIG_DYNAMIC_DEBUG=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_INFO_REDUCED is not set
 CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_DEBUG_USER=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="earlycon"
+# CONFIG_MIPS_CMDLINE_FROM_DTB is not set
+CONFIG_MIPS_CMDLINE_DTB_EXTEND=y
+# CONFIG_MIPS_CMDLINE_FROM_BOOTLOADER is not set
+# CONFIG_CRYPTO_HW is not set
+CONFIG_DT_BCM974XX=y
+CONFIG_FW_CFE=y
+CONFIG_ATA=y
+CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_AHCI_BRCMSTB=y
+CONFIG_GENERIC_PHY=y
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_PHY_BRCM_USB=y
+CONFIG_PHY_BRCM_SATA=y
+CONFIG_PM_RUNTIME=y
+CONFIG_PM_DEBUG=y
+CONFIG_SYSVIPC=y
+CONFIG_FUNCTION_GRAPH_TRACER=y
+CONFIG_DYNAMIC_FTRACE=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_IRQSOFF_TRACER=y
+CONFIG_SCHED_TRACER=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_TRACER_SNAPSHOT=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_STACK_TRACER=y
index eaad0ed..a8a30bb 100644 (file)
@@ -117,21 +117,21 @@ static void __init dec_be_init(void)
 {
        switch (mips_machtype) {
        case MACH_DS23100:      /* DS2100/DS3100 Pmin/Pmax */
-               board_be_handler = dec_kn01_be_handler;
+               mips_set_be_handler(dec_kn01_be_handler);
                busirq_handler = dec_kn01_be_interrupt;
                busirq_flags |= IRQF_SHARED;
                dec_kn01_be_init();
                break;
        case MACH_DS5000_1XX:   /* DS5000/1xx 3min */
        case MACH_DS5000_XX:    /* DS5000/xx Maxine */
-               board_be_handler = dec_kn02xa_be_handler;
+               mips_set_be_handler(dec_kn02xa_be_handler);
                busirq_handler = dec_kn02xa_be_interrupt;
                dec_kn02xa_be_init();
                break;
        case MACH_DS5000_200:   /* DS5000/200 3max */
        case MACH_DS5000_2X0:   /* DS5000/240 3max+ */
        case MACH_DS5900:       /* DS5900 bigmax */
-               board_be_handler = dec_ecc_be_handler;
+               mips_set_be_handler(dec_ecc_be_handler);
                busirq_handler = dec_ecc_be_interrupt;
                dec_ecc_be_init();
                break;
index a3aa22c..a07a5ed 100644 (file)
@@ -75,7 +75,7 @@ static unsigned int __init gen_fdt_mem_array(
 __init int yamon_dt_append_memory(void *fdt,
                                  const struct yamon_mem_region *regions)
 {
-       unsigned long phys_memsize, memsize;
+       unsigned long phys_memsize = 0, memsize;
        __be32 mem_array[2 * MAX_MEM_ARRAY_ENTRIES];
        unsigned int mem_entries;
        int i, err, mem_off;
index b710e76..15cde63 100644 (file)
@@ -15,7 +15,7 @@
 #define MIPS_BE_FATAL  2               /* treat as an unrecoverable error */
 
 extern void (*board_be_init)(void);
-extern int (*board_be_handler)(struct pt_regs *regs, int is_fixup);
+void mips_set_be_handler(int (*handler)(struct pt_regs *reg, int is_fixup));
 
 extern void (*board_nmi_handler_setup)(void);
 extern void (*board_ejtag_handler_setup)(void);
index 70e32de..72d02d3 100644 (file)
 446    n32     landlock_restrict_self          sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448    n32     process_mrelease                sys_process_mrelease
+449    n32     futex_waitv                     sys_futex_waitv
index 1ca7bc3..e2c481f 100644 (file)
 446    n64     landlock_restrict_self          sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448    n64     process_mrelease                sys_process_mrelease
+449    n64     futex_waitv                     sys_futex_waitv
index a61c35e..3714c97 100644 (file)
 446    o32     landlock_restrict_self          sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448    o32     process_mrelease                sys_process_mrelease
+449    o32     futex_waitv                     sys_futex_waitv
index 6f07362..d26b0fb 100644 (file)
@@ -103,13 +103,19 @@ extern asmlinkage void handle_reserved(void);
 extern void tlb_do_page_fault_0(void);
 
 void (*board_be_init)(void);
-int (*board_be_handler)(struct pt_regs *regs, int is_fixup);
+static int (*board_be_handler)(struct pt_regs *regs, int is_fixup);
 void (*board_nmi_handler_setup)(void);
 void (*board_ejtag_handler_setup)(void);
 void (*board_bind_eic_interrupt)(int irq, int regset);
 void (*board_ebase_setup)(void);
 void(*board_cache_error_setup)(void);
 
+void mips_set_be_handler(int (*handler)(struct pt_regs *regs, int is_fixup))
+{
+       board_be_handler = handler;
+}
+EXPORT_SYMBOL_GPL(mips_set_be_handler);
+
 static void show_raw_backtrace(unsigned long reg29, const char *loglvl,
                               bool user)
 {
index 562aa87..aa20d07 100644 (file)
@@ -1067,7 +1067,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                r = 1;
                break;
        case KVM_CAP_NR_VCPUS:
-               r = num_online_cpus();
+               r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
                break;
        case KVM_CAP_MAX_VCPUS:
                r = KVM_MAX_VCPUS;
index dd819e3..4916ccc 100644 (file)
@@ -158,6 +158,12 @@ void clk_deactivate(struct clk *clk)
 }
 EXPORT_SYMBOL(clk_deactivate);
 
+struct clk *clk_get_parent(struct clk *clk)
+{
+       return NULL;
+}
+EXPORT_SYMBOL(clk_get_parent);
+
 static inline u32 get_counter_resolution(void)
 {
        u32 res;
index dc0110a..afe8a61 100644 (file)
@@ -112,5 +112,5 @@ static int ip22_be_handler(struct pt_regs *regs, int is_fixup)
 
 void __init ip22_be_init(void)
 {
-       board_be_handler = ip22_be_handler;
+       mips_set_be_handler(ip22_be_handler);
 }
index c61362d..16ca470 100644 (file)
@@ -468,7 +468,7 @@ static int ip28_be_handler(struct pt_regs *regs, int is_fixup)
 
 void __init ip22_be_init(void)
 {
-       board_be_handler = ip28_be_handler;
+       mips_set_be_handler(ip28_be_handler);
 }
 
 int ip28_show_be_info(struct seq_file *m)
index 5a38ae6..923a63a 100644 (file)
@@ -85,7 +85,7 @@ void __init ip27_be_init(void)
        int cpu = LOCAL_HUB_L(PI_CPU_NUM);
        int cpuoff = cpu << 8;
 
-       board_be_handler = ip27_be_handler;
+       mips_set_be_handler(ip27_be_handler);
 
        LOCAL_HUB_S(PI_ERR_INT_PEND,
                    cpu ? PI_ERR_CLEAR_ALL_B : PI_ERR_CLEAR_ALL_A);
index c860f95..478b63b 100644 (file)
@@ -34,5 +34,5 @@ static int ip32_be_handler(struct pt_regs *regs, int is_fixup)
 
 void __init ip32_be_init(void)
 {
-       board_be_handler = ip32_be_handler;
+       mips_set_be_handler(ip32_be_handler);
 }
index f07b15d..72a31ee 100644 (file)
@@ -122,7 +122,7 @@ void __init plat_mem_setup(void)
 #error invalid SiByte board configuration
 #endif
 
-       board_be_handler = swarm_be_handler;
+       mips_set_be_handler(swarm_be_handler);
 
        if (xicor_probe())
                swarm_rtc_type = RTC_XICOR;
index 46e9c41..63f9725 100644 (file)
@@ -80,7 +80,7 @@ static int tx4927_be_handler(struct pt_regs *regs, int is_fixup)
 }
 static void __init tx4927_be_init(void)
 {
-       board_be_handler = tx4927_be_handler;
+       mips_set_be_handler(tx4927_be_handler);
 }
 
 static struct resource tx4927_sdram_resource[4];
index 17395d5..ba64654 100644 (file)
@@ -82,7 +82,7 @@ static int tx4938_be_handler(struct pt_regs *regs, int is_fixup)
 }
 static void __init tx4938_be_init(void)
 {
-       board_be_handler = tx4938_be_handler;
+       mips_set_be_handler(tx4938_be_handler);
 }
 
 static struct resource tx4938_sdram_resource[4];
index bf8a3cd..f5f59b7 100644 (file)
@@ -86,7 +86,7 @@ static int tx4939_be_handler(struct pt_regs *regs, int is_fixup)
 }
 static void __init tx4939_be_init(void)
 {
-       board_be_handler = tx4939_be_handler;
+       mips_set_be_handler(tx4939_be_handler);
 }
 
 static struct resource tx4939_sdram_resource[4];
index 1b2ea34..d65f55f 100644 (file)
@@ -57,7 +57,7 @@ endif
 
 # VDSO linker flags.
 ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \
-       $(filter -E%,$(KBUILD_CFLAGS)) -nostdlib -shared \
+       $(filter -E%,$(KBUILD_CFLAGS)) -shared \
        -G 0 --eh-frame-hdr --hash-style=sysv --build-id=sha1 -T
 
 CFLAGS_REMOVE_vdso.o = $(CC_FLAGS_FTRACE)
index d6fd8fa..53061cb 100644 (file)
@@ -231,6 +231,7 @@ CONFIG_CRYPTO_DEFLATE=y
 CONFIG_CRC_CCITT=m
 CONFIG_CRC_T10DIF=y
 CONFIG_FONTS=y
+CONFIG_PRINTK_TIME=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_MEMORY_INIT=y
index 7085df0..39e7985 100644 (file)
@@ -3,38 +3,19 @@
  * Copyright (C) 1999 Hewlett-Packard (Frank Rowand)
  * Copyright (C) 1999 Philipp Rumpf <prumpf@tux.org>
  * Copyright (C) 1999 SuSE GmbH
+ * Copyright (C) 2021 Helge Deller <deller@gmx.de>
  */
 
 #ifndef _PARISC_ASSEMBLY_H
 #define _PARISC_ASSEMBLY_H
 
-#define CALLEE_FLOAT_FRAME_SIZE        80
-
 #ifdef CONFIG_64BIT
-#define LDREG  ldd
-#define STREG  std
-#define LDREGX  ldd,s
-#define LDREGM ldd,mb
-#define STREGM std,ma
-#define SHRREG shrd
-#define SHLREG shld
-#define ANDCM   andcm,*
-#define        COND(x) * ## x
 #define RP_OFFSET      16
 #define FRAME_SIZE     128
 #define CALLEE_REG_FRAME_SIZE  144
 #define REG_SZ         8
 #define ASM_ULONG_INSN .dword
 #else  /* CONFIG_64BIT */
-#define LDREG  ldw
-#define STREG  stw
-#define LDREGX  ldwx,s
-#define LDREGM ldwm
-#define STREGM stwm
-#define SHRREG shr
-#define SHLREG shlw
-#define ANDCM   andcm
-#define COND(x)        x
 #define RP_OFFSET      20
 #define FRAME_SIZE     64
 #define CALLEE_REG_FRAME_SIZE  128
@@ -45,6 +26,7 @@
 /* Frame alignment for 32- and 64-bit */
 #define FRAME_ALIGN     64
 
+#define CALLEE_FLOAT_FRAME_SIZE        80
 #define CALLEE_SAVE_FRAME_SIZE (CALLEE_REG_FRAME_SIZE + CALLEE_FLOAT_FRAME_SIZE)
 
 #ifdef CONFIG_PA20
 #ifdef __ASSEMBLY__
 
 #ifdef CONFIG_64BIT
+#define LDREG  ldd
+#define STREG  std
+#define LDREGX  ldd,s
+#define LDREGM ldd,mb
+#define STREGM std,ma
+#define SHRREG shrd
+#define SHLREG shld
+#define ANDCM   andcm,*
+#define        COND(x) * ## x
+#else  /* CONFIG_64BIT */
+#define LDREG  ldw
+#define STREG  stw
+#define LDREGX  ldwx,s
+#define LDREGM ldwm
+#define STREGM stwm
+#define SHRREG shr
+#define SHLREG shlw
+#define ANDCM   andcm
+#define COND(x)        x
+#endif
+
+#ifdef CONFIG_64BIT
 /* the 64-bit pa gnu assembler unfortunately defaults to .level 1.1 or 2.0 so
  * work around that for now... */
        .level 2.0w
index 7efb1aa..af2a598 100644 (file)
@@ -5,6 +5,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
+#include <linux/stringify.h>
 #include <asm/assembly.h>
 
 #define JUMP_LABEL_NOP_SIZE 4
index 7badd87..3e7cf88 100644 (file)
@@ -76,6 +76,8 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
        purge_tlb_end(flags);
 }
 
+extern void __update_cache(pte_t pte);
+
 /* Certain architectures need to do special things when PTEs
  * within a page table are directly modified.  Thus, the following
  * hook is made available.
@@ -83,11 +85,14 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
 #define set_pte(pteptr, pteval)                        \
        do {                                    \
                *(pteptr) = (pteval);           \
-               barrier();                      \
+               mb();                           \
        } while(0)
 
 #define set_pte_at(mm, addr, pteptr, pteval)   \
        do {                                    \
+               if (pte_present(pteval) &&      \
+                   pte_user(pteval))           \
+                       __update_cache(pteval); \
                *(pteptr) = (pteval);           \
                purge_tlb_entries(mm, addr);    \
        } while (0)
@@ -303,6 +308,7 @@ extern unsigned long *empty_zero_page;
 
 #define pte_none(x)     (pte_val(x) == 0)
 #define pte_present(x) (pte_val(x) & _PAGE_PRESENT)
+#define pte_user(x)    (pte_val(x) & _PAGE_USER)
 #define pte_clear(mm, addr, xp)  set_pte_at(mm, addr, xp, __pte(0))
 
 #define pmd_flag(x)    (pmd_val(x) & PxD_FLAG_MASK)
@@ -410,7 +416,7 @@ extern void paging_init (void);
 
 #define PG_dcache_dirty         PG_arch_1
 
-extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
+#define update_mmu_cache(vms,addr,ptep) __update_cache(*ptep)
 
 /* Encode and de-code a swap entry */
 
index 4b9e3d7..2b3010a 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef _ASM_PARISC_RT_SIGFRAME_H
 #define _ASM_PARISC_RT_SIGFRAME_H
 
-#define SIGRETURN_TRAMP 3
+#define SIGRETURN_TRAMP 4
 #define SIGRESTARTBLOCK_TRAMP 5 
 #define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP)
 
index c61827e..94150b9 100644 (file)
@@ -83,9 +83,9 @@ EXPORT_SYMBOL(flush_cache_all_local);
 #define pfn_va(pfn)    __va(PFN_PHYS(pfn))
 
 void
-update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+__update_cache(pte_t pte)
 {
-       unsigned long pfn = pte_pfn(*ptep);
+       unsigned long pfn = pte_pfn(pte);
        struct page *page;
 
        /* We don't have pte special.  As a result, we can be called with
index 57944d6..88c188a 100644 (file)
@@ -1805,7 +1805,7 @@ syscall_restore:
 
        /* Are we being ptraced? */
        LDREG   TASK_TI_FLAGS(%r1),%r19
-       ldi     _TIF_SYSCALL_TRACE_MASK,%r2
+       ldi     _TIF_SINGLESTEP|_TIF_BLOCKSTEP,%r2
        and,COND(=)     %r19,%r2,%r0
        b,n     syscall_restore_rfi
 
index bbfe23c..46b1050 100644 (file)
@@ -288,21 +288,22 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
           already in userspace. The first words of tramp are used to
           save the previous sigrestartblock trampoline that might be
           on the stack. We start the sigreturn trampoline at 
-          SIGRESTARTBLOCK_TRAMP. */
+          SIGRESTARTBLOCK_TRAMP+X. */
        err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0,
                        &frame->tramp[SIGRESTARTBLOCK_TRAMP+0]);
-       err |= __put_user(INSN_BLE_SR2_R0, 
+       err |= __put_user(INSN_LDI_R20, 
                        &frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
-       err |= __put_user(INSN_LDI_R20,
+       err |= __put_user(INSN_BLE_SR2_R0, 
                        &frame->tramp[SIGRESTARTBLOCK_TRAMP+2]);
+       err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]);
 
-       start = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+0];
-       end = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+3];
+       start = (unsigned long) &frame->tramp[0];
+       end = (unsigned long) &frame->tramp[TRAMP_SIZE];
        flush_user_dcache_range_asm(start, end);
        flush_user_icache_range_asm(start, end);
 
        /* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP
-        * TRAMP Words 5-7, Length 3 = SIGRETURN_TRAMP
+        * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP
         * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP
         */
        rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP];
index a5bdbb5..f166250 100644 (file)
@@ -36,7 +36,7 @@ struct compat_regfile {
         compat_int_t rf_sar;
 };
 
-#define COMPAT_SIGRETURN_TRAMP 3
+#define COMPAT_SIGRETURN_TRAMP 4
 #define COMPAT_SIGRESTARTBLOCK_TRAMP 5
 #define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \
                                COMPAT_SIGRESTARTBLOCK_TRAMP)
index 6b4ca91..023834e 100644 (file)
@@ -8,6 +8,7 @@
  *
  *  TODO: Userspace stacktrace (CONFIG_USER_STACKTRACE_SUPPORT)
  */
+#include <linux/kernel.h>
 #include <linux/stacktrace.h>
 
 #include <asm/unwind.h>
index bf751e0..358c000 100644 (file)
 446    common  landlock_restrict_self          sys_landlock_restrict_self
 # 447 reserved for memfd_secret
 448    common  process_mrelease                sys_process_mrelease
+449    common  futex_waitv                     sys_futex_waitv
index 2769eb9..3d208af 100644 (file)
@@ -57,6 +57,8 @@ SECTIONS
 {
        . = KERNEL_BINARY_TEXT_START;
 
+       _stext = .;     /* start of kernel text, includes init code & data */
+
        __init_begin = .;
        HEAD_TEXT_SECTION
        MLONGCALL_DISCARD(INIT_TEXT_SECTION(8))
@@ -80,7 +82,6 @@ SECTIONS
        /* freed after init ends here */
 
        _text = .;              /* Text and read-only data */
-       _stext = .;
        MLONGCALL_KEEP(INIT_TEXT_SECTION(8))
        .text ALIGN(PAGE_SIZE) : {
                TEXT_TEXT
index f9ea0e5..3fa6d24 100644 (file)
@@ -187,6 +187,12 @@ static void watchdog_smp_panic(int cpu, u64 tb)
        if (sysctl_hardlockup_all_cpu_backtrace)
                trigger_allbutself_cpu_backtrace();
 
+       /*
+        * Force flush any remote buffers that might be stuck in IRQ context
+        * and therefore could not run their irq_work.
+        */
+       printk_trigger_flush();
+
        if (hardlockup_panic)
                nmi_panic(NULL, "Hard LOCKUP");
 
index 35e9ccc..a72920f 100644 (file)
@@ -641,9 +641,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                 * implementations just count online CPUs.
                 */
                if (hv_enabled)
-                       r = num_present_cpus();
+                       r = min_t(unsigned int, num_present_cpus(), KVM_MAX_VCPUS);
                else
-                       r = num_online_cpus();
+                       r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
                break;
        case KVM_CAP_MAX_VCPUS:
                r = KVM_MAX_VCPUS;
index a34c531..821252b 100644 (file)
@@ -62,6 +62,7 @@ config RISCV
        select GENERIC_SCHED_CLOCK
        select GENERIC_SMP_IDLE_THREAD
        select GENERIC_TIME_VSYSCALL if MMU && 64BIT
+       select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
        select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL
index 7f19b78..5927c94 100644 (file)
@@ -136,3 +136,13 @@ zinstall: install-image = Image.gz
 install zinstall:
        $(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \
        $(boot)/$(install-image) System.map "$(INSTALL_PATH)"
+
+PHONY += rv32_randconfig
+rv32_randconfig:
+       $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/riscv/configs/32-bit.config \
+               -f $(srctree)/Makefile randconfig
+
+PHONY += rv64_randconfig
+rv64_randconfig:
+       $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/riscv/configs/64-bit.config \
+               -f $(srctree)/Makefile randconfig
index b254c60..fc1e586 100644 (file)
@@ -9,10 +9,8 @@
 #define RTCCLK_FREQ            1000000
 
 / {
-       #address-cells = <2>;
-       #size-cells = <2>;
        model = "Microchip PolarFire-SoC Icicle Kit";
-       compatible = "microchip,mpfs-icicle-kit";
+       compatible = "microchip,mpfs-icicle-kit", "microchip,mpfs";
 
        aliases {
                ethernet0 = &emac1;
@@ -35,9 +33,6 @@
                reg = <0x0 0x80000000 0x0 0x40000000>;
                clocks = <&clkcfg 26>;
        };
-
-       soc {
-       };
 };
 
 &serial0 {
        status = "okay";
 };
 
-&sdcard {
+&mmc {
        status = "okay";
+
+       bus-width = <4>;
+       disable-wp;
+       cap-sd-highspeed;
+       card-detect-delay = <200>;
+       sd-uhs-sdr12;
+       sd-uhs-sdr25;
+       sd-uhs-sdr50;
+       sd-uhs-sdr104;
 };
 
 &emac0 {
index 9d2fbbc..c9f6d20 100644 (file)
@@ -6,8 +6,8 @@
 / {
        #address-cells = <2>;
        #size-cells = <2>;
-       model = "Microchip MPFS Icicle Kit";
-       compatible = "microchip,mpfs-icicle-kit";
+       model = "Microchip PolarFire SoC";
+       compatible = "microchip,mpfs";
 
        chosen {
        };
                };
 
                clint@2000000 {
-                       compatible = "sifive,clint0";
+                       compatible = "sifive,fu540-c000-clint", "sifive,clint0";
                        reg = <0x0 0x2000000 0x0 0xC000>;
                        interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7
                                                &cpu1_intc 3 &cpu1_intc 7
 
                plic: interrupt-controller@c000000 {
                        #interrupt-cells = <1>;
-                       compatible = "sifive,plic-1.0.0";
+                       compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0";
                        reg = <0x0 0xc000000 0x0 0x4000000>;
                        riscv,ndev = <186>;
                        interrupt-controller;
                        status = "disabled";
                };
 
-               emmc: mmc@20008000 {
-                       compatible = "cdns,sd4hc";
+               /* Common node entry for emmc/sd */
+               mmc: mmc@20008000 {
+                       compatible = "microchip,mpfs-sd4hc", "cdns,sd4hc";
                        reg = <0x0 0x20008000 0x0 0x1000>;
                        interrupt-parent = <&plic>;
                        interrupts = <88 89>;
-                       pinctrl-names = "default";
                        clocks = <&clkcfg 6>;
-                       bus-width = <4>;
-                       cap-mmc-highspeed;
-                       mmc-ddr-3_3v;
-                       max-frequency = <200000000>;
-                       non-removable;
-                       no-sd;
-                       no-sdio;
-                       voltage-ranges = <3300 3300>;
-                       status = "disabled";
-               };
-
-               sdcard: sdhc@20008000 {
-                       compatible = "cdns,sd4hc";
-                       reg = <0x0 0x20008000 0x0 0x1000>;
-                       interrupt-parent = <&plic>;
-                       interrupts = <88>;
-                       pinctrl-names = "default";
-                       clocks = <&clkcfg 6>;
-                       bus-width = <4>;
-                       disable-wp;
-                       cap-sd-highspeed;
-                       card-detect-delay = <200>;
-                       sd-uhs-sdr12;
-                       sd-uhs-sdr25;
-                       sd-uhs-sdr50;
-                       sd-uhs-sdr104;
                        max-frequency = <200000000>;
                        status = "disabled";
                };
index 7db8610..0655b5c 100644 (file)
                ranges;
                plic0: interrupt-controller@c000000 {
                        #interrupt-cells = <1>;
-                       compatible = "sifive,plic-1.0.0";
+                       compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0";
                        reg = <0x0 0xc000000 0x0 0x4000000>;
                        riscv,ndev = <53>;
                        interrupt-controller;
index 60846e8..ba304d4 100644 (file)
@@ -8,10 +8,9 @@
 #define RTCCLK_FREQ            1000000
 
 / {
-       #address-cells = <2>;
-       #size-cells = <2>;
        model = "SiFive HiFive Unleashed A00";
-       compatible = "sifive,hifive-unleashed-a00", "sifive,fu540-c000";
+       compatible = "sifive,hifive-unleashed-a00", "sifive,fu540-c000",
+                    "sifive,fu540";
 
        chosen {
                stdout-path = "serial0";
@@ -26,9 +25,6 @@
                reg = <0x0 0x80000000 0x2 0x00000000>;
        };
 
-       soc {
-       };
-
        hfclk: hfclk {
                #clock-cells = <0>;
                compatible = "fixed-clock";
@@ -63,7 +59,7 @@
 &qspi0 {
        status = "okay";
        flash@0 {
-               compatible = "issi,is25wp256", "jedec,spi-nor";
+               compatible = "jedec,spi-nor";
                reg = <0>;
                spi-max-frequency = <50000000>;
                m25p,fast-read;
index 2e4ea84..4f66919 100644 (file)
@@ -8,8 +8,6 @@
 #define RTCCLK_FREQ            1000000
 
 / {
-       #address-cells = <2>;
-       #size-cells = <2>;
        model = "SiFive HiFive Unmatched A00";
        compatible = "sifive,hifive-unmatched-a00", "sifive,fu740-c000",
                     "sifive,fu740";
@@ -27,9 +25,6 @@
                reg = <0x0 0x80000000 0x4 0x00000000>;
        };
 
-       soc {
-       };
-
        hfclk: hfclk {
                #clock-cells = <0>;
                compatible = "fixed-clock";
 &qspi0 {
        status = "okay";
        flash@0 {
-               compatible = "issi,is25wp256", "jedec,spi-nor";
+               compatible = "jedec,spi-nor";
                reg = <0>;
                spi-max-frequency = <50000000>;
                m25p,fast-read;
diff --git a/arch/riscv/configs/32-bit.config b/arch/riscv/configs/32-bit.config
new file mode 100644 (file)
index 0000000..43f4132
--- /dev/null
@@ -0,0 +1,2 @@
+CONFIG_ARCH_RV32I=y
+CONFIG_32BIT=y
diff --git a/arch/riscv/configs/64-bit.config b/arch/riscv/configs/64-bit.config
new file mode 100644 (file)
index 0000000..313edc5
--- /dev/null
@@ -0,0 +1,2 @@
+CONFIG_ARCH_RV64I=y
+CONFIG_64BIT=y
index 4ebc803..c252fd5 100644 (file)
@@ -72,9 +72,10 @@ CONFIG_GPIOLIB=y
 CONFIG_GPIO_SIFIVE=y
 # CONFIG_PTP_1588_CLOCK is not set
 CONFIG_POWER_RESET=y
-CONFIG_DRM=y
-CONFIG_DRM_RADEON=y
-CONFIG_DRM_VIRTIO_GPU=y
+CONFIG_DRM=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_NOUVEAU=m
+CONFIG_DRM_VIRTIO_GPU=m
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_USB=y
 CONFIG_USB_XHCI_HCD=y
index 109c97e..b3e5ff0 100644 (file)
@@ -157,6 +157,8 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x);
 #define page_to_bus(page)      (page_to_phys(page))
 #define phys_to_page(paddr)    (pfn_to_page(phys_to_pfn(paddr)))
 
+#define sym_to_pfn(x)           __phys_to_pfn(__pa_symbol(x))
+
 #ifdef CONFIG_FLATMEM
 #define pfn_valid(pfn) \
        (((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))
index 39b5503..bf204e7 100644 (file)
@@ -75,7 +75,8 @@
 #endif
 
 #ifdef CONFIG_XIP_KERNEL
-#define XIP_OFFSET             SZ_8M
+#define XIP_OFFSET             SZ_32M
+#define XIP_OFFSET_MASK                (SZ_32M - 1)
 #else
 #define XIP_OFFSET             0
 #endif
@@ -97,7 +98,8 @@
 #ifdef CONFIG_XIP_KERNEL
 #define XIP_FIXUP(addr) ({                                                     \
        uintptr_t __a = (uintptr_t)(addr);                                      \
-       (__a >= CONFIG_XIP_PHYS_ADDR && __a < CONFIG_XIP_PHYS_ADDR + SZ_16M) ?  \
+       (__a >= CONFIG_XIP_PHYS_ADDR && \
+        __a < CONFIG_XIP_PHYS_ADDR + XIP_OFFSET * 2) ? \
                __a - CONFIG_XIP_PHYS_ADDR + CONFIG_PHYS_RAM_BASE - XIP_OFFSET :\
                __a;                                                            \
        })
index 208e31b..bc6f75f 100644 (file)
@@ -8,30 +8,19 @@
 #ifndef _ASM_RISCV_VDSO_H
 #define _ASM_RISCV_VDSO_H
 
-
-/*
- * All systems with an MMU have a VDSO, but systems without an MMU don't
- * support shared libraries and therefor don't have one.
- */
-#ifdef CONFIG_MMU
-
-#include <linux/types.h>
 /*
  * All systems with an MMU have a VDSO, but systems without an MMU don't
  * support shared libraries and therefor don't have one.
  */
 #ifdef CONFIG_MMU
 
-#define __VVAR_PAGES    1
+#define __VVAR_PAGES    2
 
 #ifndef __ASSEMBLY__
 #include <generated/vdso-offsets.h>
 
 #define VDSO_SYMBOL(base, name)                                                        \
        (void __user *)((unsigned long)(base) + __vdso_##name##_offset)
-
-#endif /* CONFIG_MMU */
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* CONFIG_MMU */
index f839f16..77d9c2f 100644 (file)
@@ -76,6 +76,13 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
        return _vdso_data;
 }
 
+#ifdef CONFIG_TIME_NS
+static __always_inline
+const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
+{
+       return _timens_data;
+}
+#endif
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_VDSO_GETTIMEOFDAY_H */
index 25ec505..f52f01e 100644 (file)
        REG_L t0, _xip_fixup
        add \reg, \reg, t0
 .endm
+.macro XIP_FIXUP_FLASH_OFFSET reg
+       la t1, __data_loc
+       li t0, XIP_OFFSET_MASK
+       and t1, t1, t0
+       li t1, XIP_OFFSET
+       sub t0, t0, t1
+       sub \reg, \reg, t0
+.endm
 _xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET
 #else
 .macro XIP_FIXUP_OFFSET reg
 .endm
+.macro XIP_FIXUP_FLASH_OFFSET reg
+.endm
 #endif /* CONFIG_XIP_KERNEL */
 
 __HEAD
@@ -267,6 +277,7 @@ pmp_done:
        la a3, hart_lottery
        mv a2, a3
        XIP_FIXUP_OFFSET a2
+       XIP_FIXUP_FLASH_OFFSET a3
        lw t1, (a3)
        amoswap.w t0, t1, (a2)
        /* first time here if hart_lottery in RAM is not set */
@@ -305,6 +316,7 @@ clear_bss_done:
        XIP_FIXUP_OFFSET sp
 #ifdef CONFIG_BUILTIN_DTB
        la a0, __dtb_start
+       XIP_FIXUP_OFFSET a0
 #else
        mv a0, s1
 #endif /* CONFIG_BUILTIN_DTB */
index ee5878d..9c842c4 100644 (file)
@@ -12,7 +12,7 @@ static void default_power_off(void)
                wait_for_interrupt();
 }
 
-void (*pm_power_off)(void) = default_power_off;
+void (*pm_power_off)(void) = NULL;
 EXPORT_SYMBOL(pm_power_off);
 
 void machine_restart(char *cmd)
@@ -23,10 +23,16 @@ void machine_restart(char *cmd)
 
 void machine_halt(void)
 {
-       pm_power_off();
+       if (pm_power_off != NULL)
+               pm_power_off();
+       else
+               default_power_off();
 }
 
 void machine_power_off(void)
 {
-       pm_power_off();
+       if (pm_power_off != NULL)
+               pm_power_off();
+       else
+               default_power_off();
 }
index b70956d..a9436a6 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/err.h>
 #include <asm/page.h>
 #include <asm/vdso.h>
+#include <linux/time_namespace.h>
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
 #include <vdso/datapage.h>
@@ -25,14 +26,12 @@ extern char vdso_start[], vdso_end[];
 
 enum vvar_pages {
        VVAR_DATA_PAGE_OFFSET,
+       VVAR_TIMENS_PAGE_OFFSET,
        VVAR_NR_PAGES,
 };
 
 #define VVAR_SIZE  (VVAR_NR_PAGES << PAGE_SHIFT)
 
-static unsigned int vdso_pages __ro_after_init;
-static struct page **vdso_pagelist __ro_after_init;
-
 /*
  * The vDSO data page.
  */
@@ -42,83 +41,228 @@ static union {
 } vdso_data_store __page_aligned_data;
 struct vdso_data *vdso_data = &vdso_data_store.data;
 
-static int __init vdso_init(void)
+struct __vdso_info {
+       const char *name;
+       const char *vdso_code_start;
+       const char *vdso_code_end;
+       unsigned long vdso_pages;
+       /* Data Mapping */
+       struct vm_special_mapping *dm;
+       /* Code Mapping */
+       struct vm_special_mapping *cm;
+};
+
+static struct __vdso_info vdso_info __ro_after_init = {
+       .name = "vdso",
+       .vdso_code_start = vdso_start,
+       .vdso_code_end = vdso_end,
+};
+
+static int vdso_mremap(const struct vm_special_mapping *sm,
+                      struct vm_area_struct *new_vma)
+{
+       current->mm->context.vdso = (void *)new_vma->vm_start;
+
+       return 0;
+}
+
+static int __init __vdso_init(void)
 {
        unsigned int i;
+       struct page **vdso_pagelist;
+       unsigned long pfn;
 
-       vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
-       vdso_pagelist =
-               kcalloc(vdso_pages + VVAR_NR_PAGES, sizeof(struct page *), GFP_KERNEL);
-       if (unlikely(vdso_pagelist == NULL)) {
-               pr_err("vdso: pagelist allocation failed\n");
-               return -ENOMEM;
+       if (memcmp(vdso_info.vdso_code_start, "\177ELF", 4)) {
+               pr_err("vDSO is not a valid ELF object!\n");
+               return -EINVAL;
        }
 
-       for (i = 0; i < vdso_pages; i++) {
-               struct page *pg;
+       vdso_info.vdso_pages = (
+               vdso_info.vdso_code_end -
+               vdso_info.vdso_code_start) >>
+               PAGE_SHIFT;
+
+       vdso_pagelist = kcalloc(vdso_info.vdso_pages,
+                               sizeof(struct page *),
+                               GFP_KERNEL);
+       if (vdso_pagelist == NULL)
+               return -ENOMEM;
+
+       /* Grab the vDSO code pages. */
+       pfn = sym_to_pfn(vdso_info.vdso_code_start);
+
+       for (i = 0; i < vdso_info.vdso_pages; i++)
+               vdso_pagelist[i] = pfn_to_page(pfn + i);
+
+       vdso_info.cm->pages = vdso_pagelist;
+
+       return 0;
+}
+
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+       return (struct vdso_data *)(vvar_page);
+}
+
+/*
+ * The vvar mapping contains data for a specific time namespace, so when a task
+ * changes namespace we must unmap its vvar data for the old namespace.
+ * Subsequent faults will map in data for the new namespace.
+ *
+ * For more details see timens_setup_vdso_data().
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+       struct mm_struct *mm = task->mm;
+       struct vm_area_struct *vma;
+
+       mmap_read_lock(mm);
 
-               pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
-               vdso_pagelist[i] = pg;
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               unsigned long size = vma->vm_end - vma->vm_start;
+
+               if (vma_is_special_mapping(vma, vdso_info.dm))
+                       zap_page_range(vma, vma->vm_start, size);
        }
-       vdso_pagelist[i] = virt_to_page(vdso_data);
 
+       mmap_read_unlock(mm);
        return 0;
 }
+
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+       if (likely(vma->vm_mm == current->mm))
+               return current->nsproxy->time_ns->vvar_page;
+
+       /*
+        * VM_PFNMAP | VM_IO protect .fault() handler from being called
+        * through interfaces like /proc/$pid/mem or
+        * process_vm_{readv,writev}() as long as there's no .access()
+        * in special_mapping_vmops.
+        * For more details check_vma_flags() and __access_remote_vm()
+        */
+       WARN(1, "vvar_page accessed remotely");
+
+       return NULL;
+}
+#else
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+       return NULL;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+                            struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       struct page *timens_page = find_timens_vvar_page(vma);
+       unsigned long pfn;
+
+       switch (vmf->pgoff) {
+       case VVAR_DATA_PAGE_OFFSET:
+               if (timens_page)
+                       pfn = page_to_pfn(timens_page);
+               else
+                       pfn = sym_to_pfn(vdso_data);
+               break;
+#ifdef CONFIG_TIME_NS
+       case VVAR_TIMENS_PAGE_OFFSET:
+               /*
+                * If a task belongs to a time namespace then a namespace
+                * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+                * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+                * offset.
+                * See also the comment near timens_setup_vdso_data().
+                */
+               if (!timens_page)
+                       return VM_FAULT_SIGBUS;
+               pfn = sym_to_pfn(vdso_data);
+               break;
+#endif /* CONFIG_TIME_NS */
+       default:
+               return VM_FAULT_SIGBUS;
+       }
+
+       return vmf_insert_pfn(vma, vmf->address, pfn);
+}
+
+enum rv_vdso_map {
+       RV_VDSO_MAP_VVAR,
+       RV_VDSO_MAP_VDSO,
+};
+
+static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = {
+       [RV_VDSO_MAP_VVAR] = {
+               .name   = "[vvar]",
+               .fault = vvar_fault,
+       },
+       [RV_VDSO_MAP_VDSO] = {
+               .name   = "[vdso]",
+               .mremap = vdso_mremap,
+       },
+};
+
+static int __init vdso_init(void)
+{
+       vdso_info.dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR];
+       vdso_info.cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO];
+
+       return __vdso_init();
+}
 arch_initcall(vdso_init);
 
-int arch_setup_additional_pages(struct linux_binprm *bprm,
-       int uses_interp)
+static int __setup_additional_pages(struct mm_struct *mm,
+                                   struct linux_binprm *bprm,
+                                   int uses_interp)
 {
-       struct mm_struct *mm = current->mm;
-       unsigned long vdso_base, vdso_len;
-       int ret;
+       unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
+       void *ret;
 
        BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
 
-       vdso_len = (vdso_pages + VVAR_NR_PAGES) << PAGE_SHIFT;
+       vdso_text_len = vdso_info.vdso_pages << PAGE_SHIFT;
+       /* Be sure to map the data page */
+       vdso_mapping_len = vdso_text_len + VVAR_SIZE;
 
-       if (mmap_write_lock_killable(mm))
-               return -EINTR;
-
-       vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0);
+       vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
        if (IS_ERR_VALUE(vdso_base)) {
-               ret = vdso_base;
-               goto end;
+               ret = ERR_PTR(vdso_base);
+               goto up_fail;
        }
 
-       mm->context.vdso = NULL;
-       ret = install_special_mapping(mm, vdso_base, VVAR_SIZE,
-               (VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
-       if (unlikely(ret))
-               goto end;
+       ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE,
+               (VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info.dm);
+       if (IS_ERR(ret))
+               goto up_fail;
 
+       vdso_base += VVAR_SIZE;
+       mm->context.vdso = (void *)vdso_base;
        ret =
-          install_special_mapping(mm, vdso_base + VVAR_SIZE,
-               vdso_pages << PAGE_SHIFT,
+          _install_special_mapping(mm, vdso_base, vdso_text_len,
                (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
-               vdso_pagelist);
+               vdso_info.cm);
 
-       if (unlikely(ret))
-               goto end;
+       if (IS_ERR(ret))
+               goto up_fail;
 
-       /*
-        * Put vDSO base into mm struct. We need to do this before calling
-        * install_special_mapping or the perf counter mmap tracking code
-        * will fail to recognise it as a vDSO (since arch_vma_name fails).
-        */
-       mm->context.vdso = (void *)vdso_base + VVAR_SIZE;
+       return 0;
 
-end:
-       mmap_write_unlock(mm);
-       return ret;
+up_fail:
+       mm->context.vdso = NULL;
+       return PTR_ERR(ret);
 }
 
-const char *arch_vma_name(struct vm_area_struct *vma)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
-       if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
-               return "[vdso]";
-       if (vma->vm_mm && (vma->vm_start ==
-                          (long)vma->vm_mm->context.vdso - VVAR_SIZE))
-               return "[vdso_data]";
-       return NULL;
+       struct mm_struct *mm = current->mm;
+       int ret;
+
+       if (mmap_write_lock_killable(mm))
+               return -EINTR;
+
+       ret = __setup_additional_pages(mm, bprm, uses_interp);
+       mmap_write_unlock(mm);
+
+       return ret;
 }
index e9111f7..01d94aa 100644 (file)
@@ -10,6 +10,9 @@ OUTPUT_ARCH(riscv)
 SECTIONS
 {
        PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
+#ifdef CONFIG_TIME_NS
+       PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
+#endif
        . = SIZEOF_HEADERS;
 
        .hash           : { *(.hash) }                  :text
index 9c9f350..f5ed082 100644 (file)
@@ -64,8 +64,11 @@ SECTIONS
 /*
  * From this point, stuff is considered writable and will be copied to RAM
  */
-       __data_loc = ALIGN(16);         /* location in file */
-       . = LOAD_OFFSET + XIP_OFFSET;   /* location in memory */
+       __data_loc = ALIGN(PAGE_SIZE);          /* location in file */
+       . = KERNEL_LINK_ADDR + XIP_OFFSET;      /* location in memory */
+
+#undef LOAD_OFFSET
+#define LOAD_OFFSET (KERNEL_LINK_ADDR + XIP_OFFSET - (__data_loc & XIP_OFFSET_MASK))
 
        _sdata = .;                     /* Start of data section */
        _data = .;
@@ -96,7 +99,6 @@ SECTIONS
                KEEP(*(__soc_builtin_dtb_table))
                __soc_builtin_dtb_table_end = .;
        }
-       PERCPU_SECTION(L1_CACHE_BYTES)
 
        . = ALIGN(8);
        .alternative : {
@@ -122,6 +124,8 @@ SECTIONS
 
        BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
 
+       PERCPU_SECTION(L1_CACHE_BYTES)
+
        .rel.dyn : AT(ADDR(.rel.dyn) - LOAD_OFFSET) {
                *(.rel.dyn*)
        }
index e3d3aed..fb84619 100644 (file)
@@ -740,7 +740,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
                 * Ensure we set mode to IN_GUEST_MODE after we disable
                 * interrupts and before the final VCPU requests check.
                 * See the comment in kvm_vcpu_exiting_guest_mode() and
-                * Documentation/virtual/kvm/vcpu-requests.rst
+                * Documentation/virt/kvm/vcpu-requests.rst
                 */
                vcpu->mode = IN_GUEST_MODE;
 
index eb3c045..3b0e703 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/**
+/*
  * Copyright (c) 2019 Western Digital Corporation or its affiliates.
  *
  * Authors:
index 26399df..fb18af3 100644 (file)
@@ -74,7 +74,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                r = 1;
                break;
        case KVM_CAP_NR_VCPUS:
-               r = num_online_cpus();
+               r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
                break;
        case KVM_CAP_MAX_VCPUS:
                r = KVM_MAX_VCPUS;
index ee3459c..ea54cc0 100644 (file)
@@ -233,8 +233,10 @@ static int __init asids_init(void)
        local_flush_tlb_all();
 
        /* Pre-compute ASID details */
-       num_asids = 1 << asid_bits;
-       asid_mask = num_asids - 1;
+       if (asid_bits) {
+               num_asids = 1 << asid_bits;
+               asid_mask = num_asids - 1;
+       }
 
        /*
         * Use ASID allocator only if number of HW ASIDs are
@@ -255,7 +257,7 @@ static int __init asids_init(void)
                pr_info("ASID allocator using %lu bits (%lu entries)\n",
                        asid_bits, num_asids);
        } else {
-               pr_info("ASID allocator disabled\n");
+               pr_info("ASID allocator disabled (%lu bits)\n", asid_bits);
        }
 
        return 0;
index c0cddf0..24b2b80 100644 (file)
@@ -41,7 +41,7 @@ phys_addr_t phys_ram_base __ro_after_init;
 EXPORT_SYMBOL(phys_ram_base);
 
 #ifdef CONFIG_XIP_KERNEL
-extern char _xiprom[], _exiprom[];
+extern char _xiprom[], _exiprom[], __data_loc;
 #endif
 
 unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
@@ -454,10 +454,9 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
 /* called from head.S with MMU off */
 asmlinkage void __init __copy_data(void)
 {
-       void *from = (void *)(&_sdata);
-       void *end = (void *)(&_end);
+       void *from = (void *)(&__data_loc);
        void *to = (void *)CONFIG_PHYS_RAM_BASE;
-       size_t sz = (size_t)(end - from + 1);
+       size_t sz = (size_t)((uintptr_t)(&_end) - (uintptr_t)(&_sdata));
 
        memcpy(to, from, sz);
 }
index 6b3c366..90824be 100644 (file)
@@ -210,9 +210,11 @@ int zpci_deconfigure_device(struct zpci_dev *zdev);
 void zpci_device_reserved(struct zpci_dev *zdev);
 bool zpci_is_device_configured(struct zpci_dev *zdev);
 
+int zpci_hot_reset_device(struct zpci_dev *zdev);
 int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
 int zpci_unregister_ioat(struct zpci_dev *, u8);
 void zpci_remove_reserved_devices(void);
+void zpci_update_fh(struct zpci_dev *zdev, u32 fh);
 
 /* CLP */
 int clp_setup_writeback_mio(void);
@@ -294,8 +296,10 @@ void zpci_debug_exit(void);
 void zpci_debug_init_device(struct zpci_dev *, const char *);
 void zpci_debug_exit_device(struct zpci_dev *);
 
-/* Error reporting */
+/* Error handling */
 int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *);
+int zpci_clear_error_state(struct zpci_dev *zdev);
+int zpci_reset_load_store_blocked(struct zpci_dev *zdev);
 
 #ifdef CONFIG_NUMA
 
index 6f431fa..ee8707a 100644 (file)
@@ -687,8 +687,10 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags)
                                                      false);
                        if (cfdiag_diffctr(cpuhw, event->hw.config_base))
                                cfdiag_push_sample(event, cpuhw);
-               } else
+               } else if (cpuhw->flags & PMU_F_RESERVED) {
+                       /* Only update when PMU not hotplugged off */
                        hw_perf_event_update(event);
+               }
                hwc->state |= PERF_HES_UPTODATE;
        }
 }
index c6257f6..14a18ba 100644 (file)
@@ -585,6 +585,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                        r = KVM_MAX_VCPUS;
                else if (sclp.has_esca && sclp.has_64bscao)
                        r = KVM_S390_ESCA_CPU_SLOTS;
+               if (ext == KVM_CAP_NR_VCPUS)
+                       r = min_t(unsigned int, num_online_cpus(), r);
                break;
        case KVM_CAP_S390_COW:
                r = MACHINE_HAS_ESOP;
index 872d772..2f9b78f 100644 (file)
@@ -481,6 +481,34 @@ static void zpci_free_iomap(struct zpci_dev *zdev, int entry)
        spin_unlock(&zpci_iomap_lock);
 }
 
+static void zpci_do_update_iomap_fh(struct zpci_dev *zdev, u32 fh)
+{
+       int bar, idx;
+
+       spin_lock(&zpci_iomap_lock);
+       for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) {
+               if (!zdev->bars[bar].size)
+                       continue;
+               idx = zdev->bars[bar].map_idx;
+               if (!zpci_iomap_start[idx].count)
+                       continue;
+               WRITE_ONCE(zpci_iomap_start[idx].fh, zdev->fh);
+       }
+       spin_unlock(&zpci_iomap_lock);
+}
+
+void zpci_update_fh(struct zpci_dev *zdev, u32 fh)
+{
+       if (!fh || zdev->fh == fh)
+               return;
+
+       zdev->fh = fh;
+       if (zpci_use_mio(zdev))
+               return;
+       if (zdev->has_resources && zdev_enabled(zdev))
+               zpci_do_update_iomap_fh(zdev, fh);
+}
+
 static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
                                    unsigned long size, unsigned long flags)
 {
@@ -668,7 +696,7 @@ int zpci_enable_device(struct zpci_dev *zdev)
        if (clp_enable_fh(zdev, &fh, ZPCI_NR_DMA_SPACES))
                rc = -EIO;
        else
-               zdev->fh = fh;
+               zpci_update_fh(zdev, fh);
        return rc;
 }
 
@@ -679,14 +707,14 @@ int zpci_disable_device(struct zpci_dev *zdev)
 
        cc = clp_disable_fh(zdev, &fh);
        if (!cc) {
-               zdev->fh = fh;
+               zpci_update_fh(zdev, fh);
        } else if (cc == CLP_RC_SETPCIFN_ALRDY) {
                pr_info("Disabling PCI function %08x had no effect as it was already disabled\n",
                        zdev->fid);
                /* Function is already disabled - update handle */
                rc = clp_refresh_fh(zdev->fid, &fh);
                if (!rc) {
-                       zdev->fh = fh;
+                       zpci_update_fh(zdev, fh);
                        rc = -EINVAL;
                }
        } else {
@@ -696,6 +724,65 @@ int zpci_disable_device(struct zpci_dev *zdev)
 }
 
 /**
+ * zpci_hot_reset_device - perform a reset of the given zPCI function
+ * @zdev: the slot which should be reset
+ *
+ * Performs a low level reset of the zPCI function. The reset is low level in
+ * the sense that the zPCI function can be reset without detaching it from the
+ * common PCI subsystem. The reset may be performed while under control of
+ * either DMA or IOMMU APIs in which case the existing DMA/IOMMU translation
+ * table is reinstated at the end of the reset.
+ *
+ * After the reset the functions internal state is reset to an initial state
+ * equivalent to its state during boot when first probing a driver.
+ * Consequently after reset the PCI function requires re-initialization via the
+ * common PCI code including re-enabling IRQs via pci_alloc_irq_vectors()
+ * and enabling the function via e.g.pci_enablde_device_flags().The caller
+ * must guard against concurrent reset attempts.
+ *
+ * In most cases this function should not be called directly but through
+ * pci_reset_function() or pci_reset_bus() which handle the save/restore and
+ * locking.
+ *
+ * Return: 0 on success and an error value otherwise
+ */
+int zpci_hot_reset_device(struct zpci_dev *zdev)
+{
+       int rc;
+
+       zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
+       if (zdev_enabled(zdev)) {
+               /* Disables device access, DMAs and IRQs (reset state) */
+               rc = zpci_disable_device(zdev);
+               /*
+                * Due to a z/VM vs LPAR inconsistency in the error state the
+                * FH may indicate an enabled device but disable says the
+                * device is already disabled don't treat it as an error here.
+                */
+               if (rc == -EINVAL)
+                       rc = 0;
+               if (rc)
+                       return rc;
+       }
+
+       rc = zpci_enable_device(zdev);
+       if (rc)
+               return rc;
+
+       if (zdev->dma_table)
+               rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+                                       (u64)zdev->dma_table);
+       else
+               rc = zpci_dma_init_device(zdev);
+       if (rc) {
+               zpci_disable_device(zdev);
+               return rc;
+       }
+
+       return 0;
+}
+
+/**
  * zpci_create_device() - Create a new zpci_dev and add it to the zbus
  * @fid: Function ID of the device to be created
  * @fh: Current Function Handle of the device to be created
@@ -776,7 +863,7 @@ int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh)
 {
        int rc;
 
-       zdev->fh = fh;
+       zpci_update_fh(zdev, fh);
        /* the PCI function will be scanned once function 0 appears */
        if (!zdev->zbus->bus)
                return 0;
@@ -903,6 +990,59 @@ int zpci_report_error(struct pci_dev *pdev,
 }
 EXPORT_SYMBOL(zpci_report_error);
 
+/**
+ * zpci_clear_error_state() - Clears the zPCI error state of the device
+ * @zdev: The zdev for which the zPCI error state should be reset
+ *
+ * Clear the zPCI error state of the device. If clearing the zPCI error state
+ * fails the device is left in the error state. In this case it may make sense
+ * to call zpci_io_perm_failure() on the associated pdev if it exists.
+ *
+ * Returns: 0 on success, -EIO otherwise
+ */
+int zpci_clear_error_state(struct zpci_dev *zdev)
+{
+       u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_ERROR);
+       struct zpci_fib fib = {0};
+       u8 status;
+       int cc;
+
+       cc = zpci_mod_fc(req, &fib, &status);
+       if (cc) {
+               zpci_dbg(3, "ces fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status);
+               return -EIO;
+       }
+
+       return 0;
+}
+
+/**
+ * zpci_reset_load_store_blocked() - Re-enables L/S from error state
+ * @zdev: The zdev for which to unblock load/store access
+ *
+ * Re-enables load/store access for a PCI function in the error state while
+ * keeping DMA blocked. In this state drivers can poke MMIO space to determine
+ * if error recovery is possible while catching any rogue DMA access from the
+ * device.
+ *
+ * Returns: 0 on success, -EIO otherwise
+ */
+int zpci_reset_load_store_blocked(struct zpci_dev *zdev)
+{
+       u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_RESET_BLOCK);
+       struct zpci_fib fib = {0};
+       u8 status;
+       int cc;
+
+       cc = zpci_mod_fc(req, &fib, &status);
+       if (cc) {
+               zpci_dbg(3, "rls fid:%x, cc:%d, status:%x\n", zdev->fid, cc, status);
+               return -EIO;
+       }
+
+       return 0;
+}
+
 static int zpci_mem_init(void)
 {
        BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) ||
index 6a5bfa9..2e3e5b2 100644 (file)
@@ -47,18 +47,223 @@ struct zpci_ccdf_avail {
        u16 pec;                        /* PCI event code */
 } __packed;
 
+static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
+{
+       switch (ers_res) {
+       case PCI_ERS_RESULT_CAN_RECOVER:
+       case PCI_ERS_RESULT_RECOVERED:
+       case PCI_ERS_RESULT_NEED_RESET:
+               return false;
+       default:
+               return true;
+       }
+}
+
+static bool is_passed_through(struct zpci_dev *zdev)
+{
+       return zdev->s390_domain;
+}
+
+static bool is_driver_supported(struct pci_driver *driver)
+{
+       if (!driver || !driver->err_handler)
+               return false;
+       if (!driver->err_handler->error_detected)
+               return false;
+       if (!driver->err_handler->slot_reset)
+               return false;
+       if (!driver->err_handler->resume)
+               return false;
+       return true;
+}
+
+static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev,
+                                                        struct pci_driver *driver)
+{
+       pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+
+       ers_res = driver->err_handler->error_detected(pdev,  pdev->error_state);
+       if (ers_result_indicates_abort(ers_res))
+               pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev));
+       else if (ers_res == PCI_ERS_RESULT_NEED_RESET)
+               pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+
+       return ers_res;
+}
+
+static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev,
+                                                       struct pci_driver *driver)
+{
+       pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+       struct zpci_dev *zdev = to_zpci(pdev);
+       int rc;
+
+       pr_info("%s: Unblocking device access for examination\n", pci_name(pdev));
+       rc = zpci_reset_load_store_blocked(zdev);
+       if (rc) {
+               pr_err("%s: Unblocking device access failed\n", pci_name(pdev));
+               /* Let's try a full reset instead */
+               return PCI_ERS_RESULT_NEED_RESET;
+       }
+
+       if (driver->err_handler->mmio_enabled) {
+               ers_res = driver->err_handler->mmio_enabled(pdev);
+               if (ers_result_indicates_abort(ers_res)) {
+                       pr_info("%s: Automatic recovery failed after MMIO re-enable\n",
+                               pci_name(pdev));
+                       return ers_res;
+               } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) {
+                       pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev));
+                       return ers_res;
+               }
+       }
+
+       pr_debug("%s: Unblocking DMA\n", pci_name(pdev));
+       rc = zpci_clear_error_state(zdev);
+       if (!rc) {
+               pdev->error_state = pci_channel_io_normal;
+       } else {
+               pr_err("%s: Unblocking DMA failed\n", pci_name(pdev));
+               /* Let's try a full reset instead */
+               return PCI_ERS_RESULT_NEED_RESET;
+       }
+
+       return ers_res;
+}
+
+static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev,
+                                           struct pci_driver *driver)
+{
+       pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+
+       pr_info("%s: Initiating reset\n", pci_name(pdev));
+       if (zpci_hot_reset_device(to_zpci(pdev))) {
+               pr_err("%s: The reset request failed\n", pci_name(pdev));
+               return ers_res;
+       }
+       pdev->error_state = pci_channel_io_normal;
+       ers_res = driver->err_handler->slot_reset(pdev);
+       if (ers_result_indicates_abort(ers_res)) {
+               pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev));
+               return ers_res;
+       }
+
+       return ers_res;
+}
+
+/* zpci_event_attempt_error_recovery - Try to recover the given PCI function
+ * @pdev: PCI function to recover currently in the error state
+ *
+ * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst.
+ * With the simplification that recovery always happens per function
+ * and the platform determines which functions are affected for
+ * multi-function devices.
+ */
+static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
+{
+       pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT;
+       struct pci_driver *driver;
+
+       /*
+        * Ensure that the PCI function is not removed concurrently, no driver
+        * is unbound or probed and that userspace can't access its
+        * configuration space while we perform recovery.
+        */
+       pci_dev_lock(pdev);
+       if (pdev->error_state == pci_channel_io_perm_failure) {
+               ers_res = PCI_ERS_RESULT_DISCONNECT;
+               goto out_unlock;
+       }
+       pdev->error_state = pci_channel_io_frozen;
+
+       if (is_passed_through(to_zpci(pdev))) {
+               pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
+                       pci_name(pdev));
+               goto out_unlock;
+       }
+
+       driver = to_pci_driver(pdev->dev.driver);
+       if (!is_driver_supported(driver)) {
+               if (!driver)
+                       pr_info("%s: Cannot be recovered because no driver is bound to the device\n",
+                               pci_name(pdev));
+               else
+                       pr_info("%s: The %s driver bound to the device does not support error recovery\n",
+                               pci_name(pdev),
+                               driver->name);
+               goto out_unlock;
+       }
+
+       ers_res = zpci_event_notify_error_detected(pdev, driver);
+       if (ers_result_indicates_abort(ers_res))
+               goto out_unlock;
+
+       if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) {
+               ers_res = zpci_event_do_error_state_clear(pdev, driver);
+               if (ers_result_indicates_abort(ers_res))
+                       goto out_unlock;
+       }
+
+       if (ers_res == PCI_ERS_RESULT_NEED_RESET)
+               ers_res = zpci_event_do_reset(pdev, driver);
+
+       if (ers_res != PCI_ERS_RESULT_RECOVERED) {
+               pr_err("%s: Automatic recovery failed; operator intervention is required\n",
+                      pci_name(pdev));
+               goto out_unlock;
+       }
+
+       pr_info("%s: The device is ready to resume operations\n", pci_name(pdev));
+       if (driver->err_handler->resume)
+               driver->err_handler->resume(pdev);
+out_unlock:
+       pci_dev_unlock(pdev);
+
+       return ers_res;
+}
+
+/* zpci_event_io_failure - Report PCI channel failure state to driver
+ * @pdev: PCI function for which to report
+ * @es: PCI channel failure state to report
+ */
+static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
+{
+       struct pci_driver *driver;
+
+       pci_dev_lock(pdev);
+       pdev->error_state = es;
+       /**
+        * While vfio-pci's error_detected callback notifies user-space QEMU
+        * reacts to this by freezing the guest. In an s390 environment PCI
+        * errors are rarely fatal so this is overkill. Instead in the future
+        * we will inject the error event and let the guest recover the device
+        * itself.
+        */
+       if (is_passed_through(to_zpci(pdev)))
+               goto out;
+       driver = to_pci_driver(pdev->dev.driver);
+       if (driver && driver->err_handler && driver->err_handler->error_detected)
+               driver->err_handler->error_detected(pdev, pdev->error_state);
+out:
+       pci_dev_unlock(pdev);
+}
+
 static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
 {
        struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
        struct pci_dev *pdev = NULL;
+       pci_ers_result_t ers_res;
 
        zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n",
                 ccdf->fid, ccdf->fh, ccdf->pec);
        zpci_err("error CCDF:\n");
        zpci_err_hex(ccdf, sizeof(*ccdf));
 
-       if (zdev)
-               pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+       if (zdev) {
+               zpci_update_fh(zdev, ccdf->fh);
+               if (zdev->zbus->bus)
+                       pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
+       }
 
        pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n",
               pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid);
@@ -66,7 +271,20 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf)
        if (!pdev)
                return;
 
-       pdev->error_state = pci_channel_io_perm_failure;
+       switch (ccdf->pec) {
+       case 0x003a: /* Service Action or Error Recovery Successful */
+               ers_res = zpci_event_attempt_error_recovery(pdev);
+               if (ers_res != PCI_ERS_RESULT_RECOVERED)
+                       zpci_event_io_failure(pdev, pci_channel_io_perm_failure);
+               break;
+       default:
+               /*
+                * Mark as frozen not permanently failed because the device
+                * could be subsequently recovered by the platform.
+                */
+               zpci_event_io_failure(pdev, pci_channel_io_frozen);
+               break;
+       }
        pci_dev_put(pdev);
 }
 
@@ -78,7 +296,7 @@ void zpci_event_error(void *data)
 
 static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
 {
-       zdev->fh = fh;
+       zpci_update_fh(zdev, fh);
        /* Give the driver a hint that the function is
         * already unusable.
         */
@@ -121,7 +339,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
                if (!zdev)
                        zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY);
                else
-                       zdev->fh = ccdf->fh;
+                       zpci_update_fh(zdev, ccdf->fh);
                break;
        case 0x0303: /* Deconfiguration requested */
                if (zdev) {
@@ -130,7 +348,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
                         */
                        if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
                                break;
-                       zdev->fh = ccdf->fh;
+                       zpci_update_fh(zdev, ccdf->fh);
                        zpci_deconfigure_device(zdev);
                }
                break;
index 2e43996..28d863a 100644 (file)
@@ -163,7 +163,7 @@ static inline int zpci_load_fh(u64 *data, const volatile void __iomem *addr,
                               unsigned long len)
 {
        struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
-       u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+       u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len);
 
        return __zpci_load(data, req, ZPCI_OFFSET(addr));
 }
@@ -244,7 +244,7 @@ static inline int zpci_store_fh(const volatile void __iomem *addr, u64 data,
                                unsigned long len)
 {
        struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
-       u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+       u64 req = ZPCI_CREATE_REQ(READ_ONCE(entry->fh), entry->bar, len);
 
        return __zpci_store(data, req, ZPCI_OFFSET(addr));
 }
index 3823e15..954bb7a 100644 (file)
@@ -387,6 +387,15 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
                airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs);
 }
 
+void arch_restore_msi_irqs(struct pci_dev *pdev)
+{
+       struct zpci_dev *zdev = to_zpci(pdev);
+
+       if (!zdev->irqs_registered)
+               zpci_set_irq(zdev);
+       default_restore_msi_irqs(pdev);
+}
+
 static struct airq_struct zpci_airq = {
        .handler = zpci_floating_irq_handler,
        .isc = PCI_ISC,
index 6904f4b..70afb30 100644 (file)
@@ -56,7 +56,6 @@ config SUPERH
        select HAVE_STACKPROTECTOR
        select HAVE_SYSCALL_TRACEPOINTS
        select IRQ_FORCED_THREADING
-       select MAY_HAVE_SPARSE_IRQ
        select MODULES_USE_ELF_RELA
        select NEED_SG_DMA_LENGTH
        select NO_DMA if !MMU && !DMA_COHERENT
index 958f790..10290e5 100644 (file)
@@ -54,6 +54,7 @@ config DUMP_CODE
 
 config DWARF_UNWINDER
        bool "Enable the DWARF unwinder for stacktraces"
+       depends on DEBUG_KERNEL
        select FRAME_POINTER
        default n
        help
index 29b8b1f..0b672b8 100644 (file)
@@ -26,8 +26,8 @@ enum {
        PCI_INTD, /* PCI int D */
        ATA,      /* ATA */
        FATA,     /* CF */
-       POWER,    /* Power swtich */
-       BUTTON,   /* Button swtich */
+       POWER,    /* Power switch */
+       BUTTON,   /* Button switch */
 };
 
 /* Vectors for LANDISK */
index c081e7e..5c123f5 100644 (file)
@@ -27,8 +27,8 @@ suffix-$(CONFIG_KERNEL_XZ)    := xz
 suffix-$(CONFIG_KERNEL_LZO)    := lzo
 
 targets := zImage vmlinux.srec romImage uImage uImage.srec uImage.gz \
-          uImage.bz2 uImage.lzma uImage.xz uImage.lzo uImage.bin
-extra-y += vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
+          uImage.bz2 uImage.lzma uImage.xz uImage.lzo uImage.bin \
+          vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
           vmlinux.bin.xz vmlinux.bin.lzo
 subdir- := compressed romimage
 
index 37aa530..cd16663 100644 (file)
@@ -1,7 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
-ashiftrt.S
-ashldi3.c
-ashlsi3.S
-ashrsi3.S
-lshrsi3.S
 vmlinux.bin.*
index 589d2d8..cf3174d 100644 (file)
@@ -5,12 +5,18 @@
 # create a compressed vmlinux image from the original vmlinux
 #
 
-targets                := vmlinux vmlinux.bin vmlinux.bin.gz \
-                  vmlinux.bin.bz2 vmlinux.bin.lzma \
-                  vmlinux.bin.xz vmlinux.bin.lzo \
-                  head_32.o misc.o piggy.o
+OBJECTS := head_32.o misc.o cache.o piggy.o \
+           ashiftrt.o ashldi3.o ashrsi3.o ashlsi3.o lshrsi3.o
+
+# These were previously generated files. When you are building the kernel
+# with O=, make sure to remove the stale files in the output tree. Otherwise,
+# the build system wrongly compiles the stale ones.
+ifdef building_out_of_srctree
+$(shell rm -f $(addprefix $(obj)/, ashiftrt.S ashldi3.c ashrsi3.S ashlsi3.S lshrsi3.S))
+endif
 
-OBJECTS = $(obj)/head_32.o $(obj)/misc.o $(obj)/cache.o
+targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \
+           vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo $(OBJECTS)
 
 GCOV_PROFILE := n
 
@@ -33,21 +39,9 @@ ccflags-remove-$(CONFIG_MCOUNT) += -pg
 LDFLAGS_vmlinux := --oformat $(ld-bfd) -Ttext $(IMAGE_OFFSET) -e startup \
                   -T $(obj)/../../kernel/vmlinux.lds
 
-#
-# Pull in the necessary libgcc bits from the in-kernel implementation.
-#
-lib1funcs-y    := ashiftrt.S ashldi3.c ashrsi3.S ashlsi3.S lshrsi3.S
-lib1funcs-obj   := \
-       $(addsuffix .o, $(basename $(addprefix $(obj)/, $(lib1funcs-y))))
-
-lib1funcs-dir          := $(srctree)/arch/$(SRCARCH)/lib
-
-KBUILD_CFLAGS += -I$(lib1funcs-dir) -DDISABLE_BRANCH_PROFILING
-
-$(addprefix $(obj)/,$(lib1funcs-y)): $(obj)/%: $(lib1funcs-dir)/% FORCE
-       $(call cmd,shipped)
+KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 
-$(obj)/vmlinux: $(OBJECTS) $(obj)/piggy.o $(lib1funcs-obj) FORCE
+$(obj)/vmlinux: $(addprefix $(obj)/, $(OBJECTS)) FORCE
        $(call if_changed,ld)
 
 $(obj)/vmlinux.bin: vmlinux FORCE
diff --git a/arch/sh/boot/compressed/ashiftrt.S b/arch/sh/boot/compressed/ashiftrt.S
new file mode 100644 (file)
index 0000000..0f3b291
--- /dev/null
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include "../../lib/ashiftrt.S"
diff --git a/arch/sh/boot/compressed/ashldi3.c b/arch/sh/boot/compressed/ashldi3.c
new file mode 100644 (file)
index 0000000..7cebd64
--- /dev/null
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "../../lib/ashldi3.c"
diff --git a/arch/sh/boot/compressed/ashlsi3.S b/arch/sh/boot/compressed/ashlsi3.S
new file mode 100644 (file)
index 0000000..e354262
--- /dev/null
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include "../../lib/ashlsi3.S"
diff --git a/arch/sh/boot/compressed/ashrsi3.S b/arch/sh/boot/compressed/ashrsi3.S
new file mode 100644 (file)
index 0000000..e564be9
--- /dev/null
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include "../../lib/ashrsi3.S"
diff --git a/arch/sh/boot/compressed/lshrsi3.S b/arch/sh/boot/compressed/lshrsi3.S
new file mode 100644 (file)
index 0000000..5a8281b
--- /dev/null
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include "../../lib/lshrsi3.S"
index 1a391e3..a6501b8 100644 (file)
@@ -84,7 +84,8 @@ static inline __sum16 csum_fold(__wsum sum)
  */
 static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
 {
-       unsigned int sum, __dummy0, __dummy1;
+       __wsum sum;
+       unsigned int __dummy0, __dummy1;
 
        __asm__ __volatile__(
                "mov.l  @%1+, %0\n\t"
@@ -197,6 +198,6 @@ static inline __wsum csum_and_copy_to_user(const void *src,
 {
        if (!access_ok(dst, len))
                return 0;
-       return csum_partial_copy_generic((__force const void *)src, dst, len);
+       return csum_partial_copy_generic(src, (__force void *)dst, len);
 }
 #endif /* __ASM_SH_CHECKSUM_H */
index 839551c..1c49235 100644 (file)
@@ -6,17 +6,6 @@
 #include <asm/machvec.h>
 
 /*
- * Only legacy non-sparseirq platforms have to set a reasonably sane
- * value here. sparseirq platforms allocate their irq_descs on the fly,
- * so will expand automatically based on the number of registered IRQs.
- */
-#ifdef CONFIG_SPARSE_IRQ
-# define NR_IRQS               8
-#else
-# define NR_IRQS               512
-#endif
-
-/*
  * This is a special IRQ number for indicating that no IRQ has been
  * triggered and to simply ignore the IRQ dispatch. This is a special
  * case that can happen with IRQ auto-distribution when multiple CPUs
index cbc7cf8..2d24234 100644 (file)
 #ifndef _SFP_MACHINE_H
 #define _SFP_MACHINE_H
 
+#ifdef __BIG_ENDIAN__
+#define __BYTE_ORDER __BIG_ENDIAN
+#define __LITTLE_ENDIAN 0
+#else
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#define __BIG_ENDIAN 0
+#endif
+
 #define _FP_W_TYPE_SIZE                32
 #define _FP_W_TYPE             unsigned long
 #define _FP_WS_TYPE            signed long
index 73f3b48..8867bb0 100644 (file)
@@ -68,7 +68,7 @@ struct __large_struct { unsigned long buf[100]; };
 ({                                                                     \
        long __gu_err = -EFAULT;                                        \
        unsigned long __gu_val = 0;                                     \
-       const __typeof__(*(ptr)) *__gu_addr = (ptr);                    \
+       const __typeof__(*(ptr)) __user *__gu_addr = (ptr);                     \
        if (likely(access_ok(__gu_addr, (size))))               \
                __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
        (x) = (__force __typeof__(*(ptr)))__gu_val;                     \
@@ -124,7 +124,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
  * Clear the area and return remaining number of bytes
  * (on failure.  Usually it's 0.)
  */
-__kernel_size_t __clear_user(void *addr, __kernel_size_t size);
+__kernel_size_t __clear_user(void __user *addr, __kernel_size_t size);
 
 #define clear_user(addr,n)                                             \
 ({                                                                     \
index f8a2bec..1261dc7 100644 (file)
@@ -73,8 +73,9 @@ static void shx3_prepare_cpus(unsigned int max_cpus)
        BUILD_BUG_ON(SMP_MSG_NR >= 8);
 
        for (i = 0; i < SMP_MSG_NR; i++)
-               request_irq(104 + i, ipi_interrupt_handler,
-                           IRQF_PERCPU, "IPI", (void *)(long)i);
+               if (request_irq(104 + i, ipi_interrupt_handler,
+                           IRQF_PERCPU, "IPI", (void *)(long)i))
+                       pr_err("Failed to request irq %d\n", i);
 
        for (i = 0; i < max_cpus; i++)
                set_cpu_present(i, true);
index a908612..5b41b59 100644 (file)
@@ -26,7 +26,7 @@
 ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
                                size_t csize, unsigned long offset, int userbuf)
 {
-       void  *vaddr;
+       void  __iomem *vaddr;
 
        if (!csize)
                return 0;
@@ -34,7 +34,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
        vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
 
        if (userbuf) {
-               if (copy_to_user(buf, (vaddr + offset), csize)) {
+               if (copy_to_user((void __user *)buf, (vaddr + offset), csize)) {
                        iounmap(vaddr);
                        return -EFAULT;
                }
index b62ad0b..b3c715b 100644 (file)
@@ -490,7 +490,7 @@ asmlinkage void do_address_error(struct pt_regs *regs,
                inc_unaligned_user_access();
 
                oldfs = force_uaccess_begin();
-               if (copy_from_user(&instruction, (insn_size_t *)(regs->pc & ~1),
+               if (copy_from_user(&instruction, (insn_size_t __user *)(regs->pc & ~1),
                                   sizeof(instruction))) {
                        force_uaccess_end(oldfs);
                        goto uspace_segv;
@@ -614,7 +614,7 @@ asmlinkage void do_reserved_inst(void)
        unsigned short inst = 0;
        int err;
 
-       get_user(inst, (unsigned short*)regs->pc);
+       get_user(inst, (unsigned short __user *)regs->pc);
 
        err = do_fpu_inst(inst, regs);
        if (!err) {
@@ -699,9 +699,9 @@ asmlinkage void do_illegal_slot_inst(void)
                return;
 
 #ifdef CONFIG_SH_FPU_EMU
-       get_user(inst, (unsigned short *)regs->pc + 1);
+       get_user(inst, (unsigned short __user *)regs->pc + 1);
        if (!do_fpu_inst(inst, regs)) {
-               get_user(inst, (unsigned short *)regs->pc);
+               get_user(inst, (unsigned short __user *)regs->pc);
                if (!emulate_branch(inst, regs))
                        return;
                /* fault in branch.*/
index e8be0ec..cdaef65 100644 (file)
@@ -51,8 +51,8 @@
 #define Rn     (regs->regs[n])
 #define Rm     (regs->regs[m])
 
-#define WRITE(d,a)     ({if(put_user(d, (typeof (d)*)a)) return -EFAULT;})
-#define READ(d,a)      ({if(get_user(d, (typeof (d)*)a)) return -EFAULT;})
+#define MWRITE(d,a)    ({if(put_user(d, (typeof (d) __user *)a)) return -EFAULT;})
+#define MREAD(d,a)     ({if(get_user(d, (typeof (d) __user *)a)) return -EFAULT;})
 
 #define PACK_S(r,f)    FP_PACK_SP(&r,f)
 #define UNPACK_S(f,r)  FP_UNPACK_SP(f,&r)
@@ -157,11 +157,11 @@ fmov_idx_reg(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
 {
        if (FPSCR_SZ) {
                FMOV_EXT(n);
-               READ(FRn, Rm + R0 + 4);
+               MREAD(FRn, Rm + R0 + 4);
                n++;
-               READ(FRn, Rm + R0);
+               MREAD(FRn, Rm + R0);
        } else {
-               READ(FRn, Rm + R0);
+               MREAD(FRn, Rm + R0);
        }
 
        return 0;
@@ -173,11 +173,11 @@ fmov_mem_reg(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
 {
        if (FPSCR_SZ) {
                FMOV_EXT(n);
-               READ(FRn, Rm + 4);
+               MREAD(FRn, Rm + 4);
                n++;
-               READ(FRn, Rm);
+               MREAD(FRn, Rm);
        } else {
-               READ(FRn, Rm);
+               MREAD(FRn, Rm);
        }
 
        return 0;
@@ -189,12 +189,12 @@ fmov_inc_reg(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
 {
        if (FPSCR_SZ) {
                FMOV_EXT(n);
-               READ(FRn, Rm + 4);
+               MREAD(FRn, Rm + 4);
                n++;
-               READ(FRn, Rm);
+               MREAD(FRn, Rm);
                Rm += 8;
        } else {
-               READ(FRn, Rm);
+               MREAD(FRn, Rm);
                Rm += 4;
        }
 
@@ -207,11 +207,11 @@ fmov_reg_idx(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
 {
        if (FPSCR_SZ) {
                FMOV_EXT(m);
-               WRITE(FRm, Rn + R0 + 4);
+               MWRITE(FRm, Rn + R0 + 4);
                m++;
-               WRITE(FRm, Rn + R0);
+               MWRITE(FRm, Rn + R0);
        } else {
-               WRITE(FRm, Rn + R0);
+               MWRITE(FRm, Rn + R0);
        }
 
        return 0;
@@ -223,11 +223,11 @@ fmov_reg_mem(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
 {
        if (FPSCR_SZ) {
                FMOV_EXT(m);
-               WRITE(FRm, Rn + 4);
+               MWRITE(FRm, Rn + 4);
                m++;
-               WRITE(FRm, Rn);
+               MWRITE(FRm, Rn);
        } else {
-               WRITE(FRm, Rn);
+               MWRITE(FRm, Rn);
        }
 
        return 0;
@@ -240,12 +240,12 @@ fmov_reg_dec(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, int m,
        if (FPSCR_SZ) {
                FMOV_EXT(m);
                Rn -= 8;
-               WRITE(FRm, Rn + 4);
+               MWRITE(FRm, Rn + 4);
                m++;
-               WRITE(FRm, Rn);
+               MWRITE(FRm, Rn);
        } else {
                Rn -= 4;
-               WRITE(FRm, Rn);
+               MWRITE(FRm, Rn);
        }
 
        return 0;
@@ -445,11 +445,11 @@ id_sys(struct sh_fpu_soft_struct *fregs, struct pt_regs *regs, u16 code)
        case 0x4052:
        case 0x4062:
                Rn -= 4;
-               WRITE(*reg, Rn);
+               MWRITE(*reg, Rn);
                break;
        case 0x4056:
        case 0x4066:
-               READ(*reg, Rn);
+               MREAD(*reg, Rn);
                Rn += 4;
                break;
        default:
@@ -468,109 +468,6 @@ static int fpu_emulate(u16 code, struct sh_fpu_soft_struct *fregs, struct pt_reg
 }
 
 /**
- *     denormal_to_double - Given denormalized float number,
- *                          store double float
- *
- *     @fpu: Pointer to sh_fpu_soft structure
- *     @n: Index to FP register
- */
-static void denormal_to_double(struct sh_fpu_soft_struct *fpu, int n)
-{
-       unsigned long du, dl;
-       unsigned long x = fpu->fpul;
-       int exp = 1023 - 126;
-
-       if (x != 0 && (x & 0x7f800000) == 0) {
-               du = (x & 0x80000000);
-               while ((x & 0x00800000) == 0) {
-                       x <<= 1;
-                       exp--;
-               }
-               x &= 0x007fffff;
-               du |= (exp << 20) | (x >> 3);
-               dl = x << 29;
-
-               fpu->fp_regs[n] = du;
-               fpu->fp_regs[n+1] = dl;
-       }
-}
-
-/**
- *     ieee_fpe_handler - Handle denormalized number exception
- *
- *     @regs: Pointer to register structure
- *
- *     Returns 1 when it's handled (should not cause exception).
- */
-static int ieee_fpe_handler(struct pt_regs *regs)
-{
-       unsigned short insn = *(unsigned short *)regs->pc;
-       unsigned short finsn;
-       unsigned long nextpc;
-       int nib[4] = {
-               (insn >> 12) & 0xf,
-               (insn >> 8) & 0xf,
-               (insn >> 4) & 0xf,
-               insn & 0xf};
-
-       if (nib[0] == 0xb ||
-           (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
-               regs->pr = regs->pc + 4;
-
-       if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
-               nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
-               finsn = *(unsigned short *) (regs->pc + 2);
-       } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
-               if (regs->sr & 1)
-                       nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
-               else
-                       nextpc = regs->pc + 4;
-               finsn = *(unsigned short *) (regs->pc + 2);
-       } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
-               if (regs->sr & 1)
-                       nextpc = regs->pc + 4;
-               else
-                       nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
-               finsn = *(unsigned short *) (regs->pc + 2);
-       } else if (nib[0] == 0x4 && nib[3] == 0xb &&
-                (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
-               nextpc = regs->regs[nib[1]];
-               finsn = *(unsigned short *) (regs->pc + 2);
-       } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
-                (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
-               nextpc = regs->pc + 4 + regs->regs[nib[1]];
-               finsn = *(unsigned short *) (regs->pc + 2);
-       } else if (insn == 0x000b) { /* rts */
-               nextpc = regs->pr;
-               finsn = *(unsigned short *) (regs->pc + 2);
-       } else {
-               nextpc = regs->pc + 2;
-               finsn = insn;
-       }
-
-       if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
-               struct task_struct *tsk = current;
-
-               if ((tsk->thread.xstate->softfpu.fpscr & (1 << 17))) {
-                       /* FPU error */
-                       denormal_to_double (&tsk->thread.xstate->softfpu,
-                                           (finsn >> 8) & 0xf);
-                       tsk->thread.xstate->softfpu.fpscr &=
-                               ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
-                       task_thread_info(tsk)->status |= TS_USEDFPU;
-               } else {
-                       force_sig_fault(SIGFPE, FPE_FLTINV,
-                                       (void __user *)regs->pc);
-               }
-
-               regs->pc = nextpc;
-               return 1;
-       }
-
-       return 0;
-}
-
-/**
  * fpu_init - Initialize FPU registers
  * @fpu: Pointer to software emulated FPU registers.
  */
index 8b45044..78c4b6e 100644 (file)
@@ -28,9 +28,9 @@ __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n)
        return 0;
 }
 
-__kernel_size_t __clear_user(void *to, __kernel_size_t n)
+__kernel_size_t __clear_user(void __user *to, __kernel_size_t n)
 {
-       memset(to, 0, n);
+       memset((__force void *)to, 0, n);
        return 0;
 }
 
index 6039644..42cf01e 100644 (file)
@@ -3048,8 +3048,10 @@ intel_vlbr_constraints(struct perf_event *event)
 {
        struct event_constraint *c = &vlbr_constraint;
 
-       if (unlikely(constraint_match(c, event->hw.config)))
+       if (unlikely(constraint_match(c, event->hw.config))) {
+               event->hw.flags |= c->flags;
                return c;
+       }
 
        return NULL;
 }
index 6b72e9b..8043213 100644 (file)
@@ -265,6 +265,8 @@ void intel_pmu_lbr_reset(void)
 
        cpuc->last_task_ctx = NULL;
        cpuc->last_log_id = 0;
+       if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select)
+               wrmsrl(MSR_LBR_SELECT, 0);
 }
 
 /*
index 24f4a06..96eb7db 100644 (file)
@@ -177,6 +177,9 @@ void set_hv_tscchange_cb(void (*cb)(void))
                return;
        }
 
+       if (!hv_vp_index)
+               return;
+
        hv_reenlightenment_cb = cb;
 
        /* Make sure callback is registered before we write to MSRs */
@@ -383,20 +386,13 @@ static void __init hv_get_partition_id(void)
  */
 void __init hyperv_init(void)
 {
-       u64 guest_id, required_msrs;
+       u64 guest_id;
        union hv_x64_msr_hypercall_contents hypercall_msr;
        int cpuhp;
 
        if (x86_hyper_type != X86_HYPER_MS_HYPERV)
                return;
 
-       /* Absolutely required MSRs */
-       required_msrs = HV_MSR_HYPERCALL_AVAILABLE |
-               HV_MSR_VP_INDEX_AVAILABLE;
-
-       if ((ms_hyperv.features & required_msrs) != required_msrs)
-               return;
-
        if (hv_common_init())
                return;
 
index 79f95d3..9656a5b 100644 (file)
@@ -3,6 +3,7 @@
 #define _ASM_X86_FPU_XCR_H
 
 #define XCR_XFEATURE_ENABLED_MASK      0x00000000
+#define XCR_XFEATURE_IN_USE_MASK       0x00000001
 
 static inline u64 xgetbv(u32 index)
 {
@@ -20,4 +21,15 @@ static inline void xsetbv(u32 index, u64 value)
        asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
 }
 
+/*
+ * Return a mask of xfeatures which are currently being tracked
+ * by the processor as being in the initial configuration.
+ *
+ * Callers should check X86_FEATURE_XGETBV1.
+ */
+static inline u64 xfeatures_in_use(void)
+{
+       return xgetbv(XCR_XFEATURE_IN_USE_MASK);
+}
+
 #endif /* _ASM_X86_FPU_XCR_H */
index 0f8b90a..cd3dd17 100644 (file)
 #define XFEATURE_MASK_FPSTATE  (XFEATURE_MASK_USER_RESTORE | \
                                 XFEATURE_MASK_SUPERVISOR_SUPPORTED)
 
+/*
+ * Features in this mask have space allocated in the signal frame, but may not
+ * have that space initialized when the feature is in its init state.
+ */
+#define XFEATURE_MASK_SIGFRAME_INITOPT (XFEATURE_MASK_XTILE | \
+                                        XFEATURE_MASK_USER_DYNAMIC)
+
 extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
 
 extern void __init update_regset_xstate_info(unsigned int size,
index 2715843..5a0bcf8 100644 (file)
 #define INTEL_FAM6_ALDERLAKE           0x97    /* Golden Cove / Gracemont */
 #define INTEL_FAM6_ALDERLAKE_L         0x9A    /* Golden Cove / Gracemont */
 
+#define INTEL_FAM6_RAPTOR_LAKE         0xB7
+
 /* "Small Core" Processors (Atom) */
 
 #define INTEL_FAM6_ATOM_BONNELL                0x1C /* Diamondville, Pineview */
index 2acf37c..6ac61f8 100644 (file)
@@ -38,7 +38,6 @@
 #define __KVM_HAVE_ARCH_VCPU_DEBUGFS
 
 #define KVM_MAX_VCPUS 1024
-#define KVM_SOFT_MAX_VCPUS 710
 
 /*
  * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
@@ -364,6 +363,7 @@ union kvm_mmu_extended_role {
                unsigned int cr4_smap:1;
                unsigned int cr4_smep:1;
                unsigned int cr4_la57:1;
+               unsigned int efer_lma:1;
        };
 };
 
@@ -725,6 +725,7 @@ struct kvm_vcpu_arch {
 
        int cpuid_nent;
        struct kvm_cpuid_entry2 *cpuid_entries;
+       u32 kvm_cpuid_base;
 
        u64 reserved_gpa_bits;
        int maxphyaddr;
@@ -748,7 +749,7 @@ struct kvm_vcpu_arch {
                u8 preempted;
                u64 msr_val;
                u64 last_steal;
-               struct gfn_to_pfn_cache cache;
+               struct gfn_to_hva_cache cache;
        } st;
 
        u64 l1_tsc_offset;
@@ -1034,6 +1035,7 @@ struct kvm_x86_msr_filter {
 #define APICV_INHIBIT_REASON_IRQWIN     3
 #define APICV_INHIBIT_REASON_PIT_REINJ  4
 #define APICV_INHIBIT_REASON_X2APIC    5
+#define APICV_INHIBIT_REASON_BLOCKIRQ  6
 
 struct kvm_arch {
        unsigned long n_used_mmu_pages;
@@ -1476,6 +1478,7 @@ struct kvm_x86_ops {
        int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
        int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
        int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
+       int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
 
        int (*get_msr_feature)(struct kvm_msr_entry *entry);
 
index 6929987..56935eb 100644 (file)
@@ -83,6 +83,18 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
        return ret;
 }
 
+static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1,
+                                     unsigned long p2, unsigned long p3)
+{
+       long ret;
+
+       asm volatile("vmmcall"
+                    : "=a"(ret)
+                    : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
+                    : "memory");
+       return ret;
+}
+
 #ifdef CONFIG_KVM_GUEST
 void kvmclock_init(void);
 void kvmclock_disable(void);
index 2d4f5c1..e2c6f43 100644 (file)
@@ -44,6 +44,8 @@ void __init sme_enable(struct boot_params *bp);
 
 int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
 int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
+void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages,
+                                           bool enc);
 
 void __init mem_encrypt_free_decrypted_mem(void);
 
@@ -78,6 +80,8 @@ static inline int __init
 early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; }
 static inline int __init
 early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
+static inline void __init
+early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {}
 
 static inline void mem_encrypt_free_decrypted_mem(void) { }
 
index cebec95..21c4a69 100644 (file)
@@ -97,6 +97,12 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
        PVOP_VCALL1(mmu.exit_mmap, mm);
 }
 
+static inline void notify_page_enc_status_changed(unsigned long pfn,
+                                                 int npages, bool enc)
+{
+       PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc);
+}
+
 #ifdef CONFIG_PARAVIRT_XXL
 static inline void load_sp0(unsigned long sp0)
 {
index fc1151e..a69012e 100644 (file)
@@ -168,6 +168,7 @@ struct pv_mmu_ops {
 
        /* Hook for intercepting the destruction of an mm_struct. */
        void (*exit_mmap)(struct mm_struct *mm);
+       void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc);
 
 #ifdef CONFIG_PARAVIRT_XXL
        struct paravirt_callee_save read_cr2;
index 191878a..355d38c 100644 (file)
@@ -806,11 +806,14 @@ static inline u32 amd_get_nodes_per_socket(void)  { return 0; }
 static inline u32 amd_get_highest_perf(void)           { return 0; }
 #endif
 
+#define for_each_possible_hypervisor_cpuid_base(function) \
+       for (function = 0x40000000; function < 0x40010000; function += 0x100)
+
 static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
 {
        uint32_t base, eax, signature[3];
 
-       for (base = 0x40000000; base < 0x40010000; base += 0x100) {
+       for_each_possible_hypervisor_cpuid_base(base) {
                cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
 
                if (!memcmp(sig, signature, 12) &&
index 43fa081..8726175 100644 (file)
@@ -83,6 +83,7 @@ int set_pages_rw(struct page *page, int numpages);
 int set_direct_map_invalid_noflush(struct page *page);
 int set_direct_map_default_noflush(struct page *page);
 bool kernel_page_present(struct page *page);
+void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc);
 
 extern int kernel_set_to_readonly;
 
index 08b0e90..81a0211 100644 (file)
@@ -126,6 +126,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 
 void cpu_disable_common(void);
 void native_smp_prepare_boot_cpu(void);
+void smp_prepare_cpus_common(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
 void calculate_max_logical_packages(void);
 void native_smp_cpus_done(unsigned int max_cpus);
index cbb67b6..39ebe05 100644 (file)
@@ -27,6 +27,7 @@
            ".globl " STATIC_CALL_TRAMP_STR(name) "             \n"     \
            STATIC_CALL_TRAMP_STR(name) ":                      \n"     \
            insns "                                             \n"     \
+           ".byte 0x53, 0x43, 0x54                             \n"     \
            ".type " STATIC_CALL_TRAMP_STR(name) ", @function   \n"     \
            ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
            ".popsection                                        \n")
index 5146bba..6e64b27 100644 (file)
@@ -8,6 +8,7 @@
  * should be used to determine that a VM is running under KVM.
  */
 #define KVM_CPUID_SIGNATURE    0x40000000
+#define KVM_SIGNATURE "KVMKVMKVM\0\0\0"
 
 /* This CPUID returns two feature bitmaps in eax, edx. Before enabling
  * a particular paravirtualization, the appropriate feature bit should
index cb2fdd1..c881bca 100644 (file)
@@ -76,6 +76,7 @@ static const struct cpuid_dep cpuid_deps[] = {
        { X86_FEATURE_SGX1,                     X86_FEATURE_SGX       },
        { X86_FEATURE_SGX2,                     X86_FEATURE_SGX1      },
        { X86_FEATURE_XFD,                      X86_FEATURE_XSAVES    },
+       { X86_FEATURE_XFD,                      X86_FEATURE_XGETBV1   },
        { X86_FEATURE_AMX_TILE,                 X86_FEATURE_XFD       },
        {}
 };
index acfd5d9..bb9a46a 100644 (file)
@@ -547,12 +547,13 @@ bool intel_filter_mce(struct mce *m)
 {
        struct cpuinfo_x86 *c = &boot_cpu_data;
 
-       /* MCE errata HSD131, HSM142, HSW131, BDM48, and HSM142 */
+       /* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */
        if ((c->x86 == 6) &&
            ((c->x86_model == INTEL_FAM6_HASWELL) ||
             (c->x86_model == INTEL_FAM6_HASWELL_L) ||
             (c->x86_model == INTEL_FAM6_BROADWELL) ||
-            (c->x86_model == INTEL_FAM6_HASWELL_G)) &&
+            (c->x86_model == INTEL_FAM6_HASWELL_G) ||
+            (c->x86_model == INTEL_FAM6_SKYLAKE_X)) &&
            (m->bank == 0) &&
            ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
                return true;
index 4794b71..ff55df6 100644 (file)
@@ -163,12 +163,22 @@ static uint32_t  __init ms_hyperv_platform(void)
        cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
              &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
 
-       if (eax >= HYPERV_CPUID_MIN &&
-           eax <= HYPERV_CPUID_MAX &&
-           !memcmp("Microsoft Hv", hyp_signature, 12))
-               return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
+       if (eax < HYPERV_CPUID_MIN || eax > HYPERV_CPUID_MAX ||
+           memcmp("Microsoft Hv", hyp_signature, 12))
+               return 0;
 
-       return 0;
+       /* HYPERCALL and VP_INDEX MSRs are mandatory for all features. */
+       eax = cpuid_eax(HYPERV_CPUID_FEATURES);
+       if (!(eax & HV_MSR_HYPERCALL_AVAILABLE)) {
+               pr_warn("x86/hyperv: HYPERCALL MSR not available.\n");
+               return 0;
+       }
+       if (!(eax & HV_MSR_VP_INDEX_AVAILABLE)) {
+               pr_warn("x86/hyperv: VP_INDEX MSR not available.\n");
+               return 0;
+       }
+
+       return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
 }
 
 static unsigned char hv_get_nmi_reason(void)
index e18210d..86ea7c0 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <asm/cpufeature.h>
 #include <asm/fpu/xstate.h>
+#include <asm/fpu/xcr.h>
 
 #ifdef CONFIG_X86_64
 DECLARE_PER_CPU(u64, xfd_state);
@@ -199,6 +200,32 @@ static inline void os_xrstor_supervisor(struct fpstate *fpstate)
 }
 
 /*
+ * XSAVE itself always writes all requested xfeatures.  Removing features
+ * from the request bitmap reduces the features which are written.
+ * Generate a mask of features which must be written to a sigframe.  The
+ * unset features can be optimized away and not written.
+ *
+ * This optimization is user-visible.  Only use for states where
+ * uninitialized sigframe contents are tolerable, like dynamic features.
+ *
+ * Users of buffers produced with this optimization must check XSTATE_BV
+ * to determine which features have been optimized out.
+ */
+static inline u64 xfeatures_need_sigframe_write(void)
+{
+       u64 xfeaures_to_write;
+
+       /* In-use features must be written: */
+       xfeaures_to_write = xfeatures_in_use();
+
+       /* Also write all non-optimizable sigframe features: */
+       xfeaures_to_write |= XFEATURE_MASK_USER_SUPPORTED &
+                            ~XFEATURE_MASK_SIGFRAME_INITOPT;
+
+       return xfeaures_to_write;
+}
+
+/*
  * Save xstate to user space xsave area.
  *
  * We don't use modified optimization because xrstor/xrstors might track
@@ -220,10 +247,16 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
         */
        struct fpstate *fpstate = current->thread.fpu.fpstate;
        u64 mask = fpstate->user_xfeatures;
-       u32 lmask = mask;
-       u32 hmask = mask >> 32;
+       u32 lmask;
+       u32 hmask;
        int err;
 
+       /* Optimize away writing unnecessary xfeatures: */
+       if (fpu_state_size_dynamic())
+               mask &= xfeatures_need_sigframe_write();
+
+       lmask = mask;
+       hmask = mask >> 32;
        xfd_validate_state(fpstate, mask, false);
 
        stac();
index 8863d19..59abbda 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/swait.h>
 #include <linux/syscore_ops.h>
 #include <linux/cc_platform.h>
+#include <linux/efi.h>
 #include <asm/timer.h>
 #include <asm/cpu.h>
 #include <asm/traps.h>
@@ -41,6 +42,7 @@
 #include <asm/ptrace.h>
 #include <asm/reboot.h>
 #include <asm/svm.h>
+#include <asm/e820/api.h>
 
 DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
 
@@ -434,6 +436,8 @@ static void kvm_guest_cpu_offline(bool shutdown)
        kvm_disable_steal_time();
        if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
                wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+       if (kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
+               wrmsrl(MSR_KVM_MIGRATION_CONTROL, 0);
        kvm_pv_disable_apf();
        if (!shutdown)
                apf_task_wake_all();
@@ -548,6 +552,55 @@ static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
        __send_ipi_mask(local_mask, vector);
 }
 
+static int __init setup_efi_kvm_sev_migration(void)
+{
+       efi_char16_t efi_sev_live_migration_enabled[] = L"SevLiveMigrationEnabled";
+       efi_guid_t efi_variable_guid = AMD_SEV_MEM_ENCRYPT_GUID;
+       efi_status_t status;
+       unsigned long size;
+       bool enabled;
+
+       if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) ||
+           !kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
+               return 0;
+
+       if (!efi_enabled(EFI_BOOT))
+               return 0;
+
+       if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
+               pr_info("%s : EFI runtime services are not enabled\n", __func__);
+               return 0;
+       }
+
+       size = sizeof(enabled);
+
+       /* Get variable contents into buffer */
+       status = efi.get_variable(efi_sev_live_migration_enabled,
+                                 &efi_variable_guid, NULL, &size, &enabled);
+
+       if (status == EFI_NOT_FOUND) {
+               pr_info("%s : EFI live migration variable not found\n", __func__);
+               return 0;
+       }
+
+       if (status != EFI_SUCCESS) {
+               pr_info("%s : EFI variable retrieval failed\n", __func__);
+               return 0;
+       }
+
+       if (enabled == 0) {
+               pr_info("%s: live migration disabled in EFI\n", __func__);
+               return 0;
+       }
+
+       pr_info("%s : live migration enabled in EFI\n", __func__);
+       wrmsrl(MSR_KVM_MIGRATION_CONTROL, KVM_MIGRATION_READY);
+
+       return 1;
+}
+
+late_initcall(setup_efi_kvm_sev_migration);
+
 /*
  * Set the IPI entry points
  */
@@ -756,7 +809,7 @@ static noinline uint32_t __kvm_cpuid_base(void)
                return 0;       /* So we don't blow up on old processors */
 
        if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
-               return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
+               return hypervisor_cpuid_base(KVM_SIGNATURE, 0);
 
        return 0;
 }
@@ -806,8 +859,62 @@ static bool __init kvm_msi_ext_dest_id(void)
        return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID);
 }
 
+static void kvm_sev_hc_page_enc_status(unsigned long pfn, int npages, bool enc)
+{
+       kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, pfn << PAGE_SHIFT, npages,
+                          KVM_MAP_GPA_RANGE_ENC_STAT(enc) | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
+}
+
 static void __init kvm_init_platform(void)
 {
+       if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
+           kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) {
+               unsigned long nr_pages;
+               int i;
+
+               pv_ops.mmu.notify_page_enc_status_changed =
+                       kvm_sev_hc_page_enc_status;
+
+               /*
+                * Reset the host's shared pages list related to kernel
+                * specific page encryption status settings before we load a
+                * new kernel by kexec. Reset the page encryption status
+                * during early boot intead of just before kexec to avoid SMP
+                * races during kvm_pv_guest_cpu_reboot().
+                * NOTE: We cannot reset the complete shared pages list
+                * here as we need to retain the UEFI/OVMF firmware
+                * specific settings.
+                */
+
+               for (i = 0; i < e820_table->nr_entries; i++) {
+                       struct e820_entry *entry = &e820_table->entries[i];
+
+                       if (entry->type != E820_TYPE_RAM)
+                               continue;
+
+                       nr_pages = DIV_ROUND_UP(entry->size, PAGE_SIZE);
+
+                       kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, entry->addr,
+                                      nr_pages,
+                                      KVM_MAP_GPA_RANGE_ENCRYPTED | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
+               }
+
+               /*
+                * Ensure that _bss_decrypted section is marked as decrypted in the
+                * shared pages list.
+                */
+               nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
+                                       PAGE_SIZE);
+               early_set_mem_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
+                                               nr_pages, 0);
+
+               /*
+                * If not booted using EFI, enable Live migration support.
+                */
+               if (!efi_enabled(EFI_BOOT))
+                       wrmsrl(MSR_KVM_MIGRATION_CONTROL,
+                              KVM_MIGRATION_READY);
+       }
        kvmclock_init();
        x86_platform.apic_post_init = kvm_apic_init;
 }
index 7157c2d..7f7636a 100644 (file)
@@ -337,6 +337,7 @@ struct paravirt_patch_template pv_ops = {
                        (void (*)(struct mmu_gather *, void *))tlb_remove_page,
 
        .mmu.exit_mmap          = paravirt_nop,
+       .mmu.notify_page_enc_status_changed     = paravirt_nop,
 
 #ifdef CONFIG_PARAVIRT_XXL
        .mmu.read_cr2           = __PV_IS_CALLEE_SAVE(pv_native_read_cr2),
index 8241927..ac2909f 100644 (file)
@@ -1350,12 +1350,7 @@ static void __init smp_get_logical_apicid(void)
                cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
 }
 
-/*
- * Prepare for SMP bootup.
- * @max_cpus: configured maximum number of CPUs, It is a legacy parameter
- *            for common interface support.
- */
-void __init native_smp_prepare_cpus(unsigned int max_cpus)
+void __init smp_prepare_cpus_common(void)
 {
        unsigned int i;
 
@@ -1386,6 +1381,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
        set_sched_topology(x86_topology);
 
        set_cpu_sibling_map(0);
+}
+
+/*
+ * Prepare for SMP bootup.
+ * @max_cpus: configured maximum number of CPUs, It is a legacy parameter
+ *            for common interface support.
+ */
+void __init native_smp_prepare_cpus(unsigned int max_cpus)
+{
+       smp_prepare_cpus_common();
+
        init_freq_invariance(false, false);
        smp_sanity_check();
 
index ea028e7..9c407a3 100644 (file)
@@ -56,10 +56,15 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void
        text_poke_bp(insn, code, size, emulate);
 }
 
-static void __static_call_validate(void *insn, bool tail)
+static void __static_call_validate(void *insn, bool tail, bool tramp)
 {
        u8 opcode = *(u8 *)insn;
 
+       if (tramp && memcmp(insn+5, "SCT", 3)) {
+               pr_err("trampoline signature fail");
+               BUG();
+       }
+
        if (tail) {
                if (opcode == JMP32_INSN_OPCODE ||
                    opcode == RET_INSN_OPCODE)
@@ -74,7 +79,8 @@ static void __static_call_validate(void *insn, bool tail)
        /*
         * If we ever trigger this, our text is corrupt, we'll probably not live long.
         */
-       WARN_ONCE(1, "unexpected static_call insn opcode 0x%x at %pS\n", opcode, insn);
+       pr_err("unexpected static_call insn opcode 0x%x at %pS\n", opcode, insn);
+       BUG();
 }
 
 static inline enum insn_type __sc_insn(bool null, bool tail)
@@ -97,12 +103,12 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
        mutex_lock(&text_mutex);
 
        if (tramp) {
-               __static_call_validate(tramp, true);
+               __static_call_validate(tramp, true, true);
                __static_call_transform(tramp, __sc_insn(!func, true), func);
        }
 
        if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
-               __static_call_validate(site, tail);
+               __static_call_validate(site, tail, false);
                __static_call_transform(site, __sc_insn(!func, tail), func);
        }
 
index f14f69d..cce1c89 100644 (file)
@@ -106,7 +106,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
         */
        local_irq_enable();
 
-       BUG_ON(!vm86 || !vm86->user_vm86);
+       BUG_ON(!vm86);
 
        set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask);
        user = vm86->user_vm86;
index 2d70edb..07e9215 100644 (file)
@@ -99,11 +99,45 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
        return 0;
 }
 
-void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
+static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
 {
-       struct kvm_cpuid_entry2 *best;
+       u32 function;
+       struct kvm_cpuid_entry2 *entry;
+
+       vcpu->arch.kvm_cpuid_base = 0;
+
+       for_each_possible_hypervisor_cpuid_base(function) {
+               entry = kvm_find_cpuid_entry(vcpu, function, 0);
 
-       best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
+               if (entry) {
+                       u32 signature[3];
+
+                       signature[0] = entry->ebx;
+                       signature[1] = entry->ecx;
+                       signature[2] = entry->edx;
+
+                       BUILD_BUG_ON(sizeof(signature) > sizeof(KVM_SIGNATURE));
+                       if (!memcmp(signature, KVM_SIGNATURE, sizeof(signature))) {
+                               vcpu->arch.kvm_cpuid_base = function;
+                               break;
+                       }
+               }
+       }
+}
+
+static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
+{
+       u32 base = vcpu->arch.kvm_cpuid_base;
+
+       if (!base)
+               return NULL;
+
+       return kvm_find_cpuid_entry(vcpu, base | KVM_CPUID_FEATURES, 0);
+}
+
+void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpuid_entry2 *best = kvm_find_kvm_cpuid_features(vcpu);
 
        /*
         * save the feature bitmap to avoid cpuid lookup for every PV
@@ -142,7 +176,7 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
                     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
                best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
 
-       best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
+       best = kvm_find_kvm_cpuid_features(vcpu);
        if (kvm_hlt_in_guest(vcpu->kvm) && best &&
                (best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
                best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
@@ -239,6 +273,26 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu)
        return rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
 }
 
+static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
+                        int nent)
+{
+    int r;
+
+    r = kvm_check_cpuid(e2, nent);
+    if (r)
+        return r;
+
+    kvfree(vcpu->arch.cpuid_entries);
+    vcpu->arch.cpuid_entries = e2;
+    vcpu->arch.cpuid_nent = nent;
+
+    kvm_update_kvm_cpuid_base(vcpu);
+    kvm_update_cpuid_runtime(vcpu);
+    kvm_vcpu_after_set_cpuid(vcpu);
+
+    return 0;
+}
+
 /* when an old userspace process fills a new kernel module */
 int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
                             struct kvm_cpuid *cpuid,
@@ -275,18 +329,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
                e2[i].padding[2] = 0;
        }
 
-       r = kvm_check_cpuid(e2, cpuid->nent);
-       if (r) {
+       r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
+       if (r)
                kvfree(e2);
-               goto out_free_cpuid;
-       }
-
-       kvfree(vcpu->arch.cpuid_entries);
-       vcpu->arch.cpuid_entries = e2;
-       vcpu->arch.cpuid_nent = cpuid->nent;
-
-       kvm_update_cpuid_runtime(vcpu);
-       kvm_vcpu_after_set_cpuid(vcpu);
 
 out_free_cpuid:
        kvfree(e);
@@ -310,20 +355,11 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
                        return PTR_ERR(e2);
        }
 
-       r = kvm_check_cpuid(e2, cpuid->nent);
-       if (r) {
+       r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
+       if (r)
                kvfree(e2);
-               return r;
-       }
 
-       kvfree(vcpu->arch.cpuid_entries);
-       vcpu->arch.cpuid_entries = e2;
-       vcpu->arch.cpuid_nent = cpuid->nent;
-
-       kvm_update_cpuid_runtime(vcpu);
-       kvm_vcpu_after_set_cpuid(vcpu);
-
-       return 0;
+       return r;
 }
 
 int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
@@ -871,8 +907,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
                }
                break;
        case KVM_CPUID_SIGNATURE: {
-               static const char signature[12] = "KVMKVMKVM\0\0";
-               const u32 *sigptr = (const u32 *)signature;
+               const u32 *sigptr = (const u32 *)KVM_SIGNATURE;
                entry->eax = KVM_CPUID_FEATURES;
                entry->ebx = sigptr[0];
                entry->ecx = sigptr[1];
index 4f15c01..5e19e6e 100644 (file)
@@ -1472,7 +1472,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 
                if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
                        hv_vcpu->hv_vapic = data;
-                       if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
+                       if (kvm_lapic_set_pv_eoi(vcpu, 0, 0))
                                return 1;
                        break;
                }
@@ -1490,7 +1490,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
                        return 1;
                hv_vcpu->hv_vapic = data;
                kvm_vcpu_mark_page_dirty(vcpu, gfn);
-               if (kvm_lapic_enable_pv_eoi(vcpu,
+               if (kvm_lapic_set_pv_eoi(vcpu,
                                            gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
                                            sizeof(struct hv_vp_assist_page)))
                        return 1;
@@ -2022,7 +2022,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
 {
        bool longmode;
 
-       longmode = is_64_bit_mode(vcpu);
+       longmode = is_64_bit_hypercall(vcpu);
        if (longmode)
                kvm_rax_write(vcpu, result);
        else {
@@ -2171,7 +2171,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
        }
 
 #ifdef CONFIG_X86_64
-       if (is_64_bit_mode(vcpu)) {
+       if (is_64_bit_hypercall(vcpu)) {
                hc.param = kvm_rcx_read(vcpu);
                hc.ingpa = kvm_rdx_read(vcpu);
                hc.outgpa = kvm_r8_read(vcpu);
index d6ac32f..759952d 100644 (file)
@@ -2856,25 +2856,30 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
        return 0;
 }
 
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
+int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
 {
        u64 addr = data & ~KVM_MSR_ENABLED;
        struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
        unsigned long new_len;
+       int ret;
 
        if (!IS_ALIGNED(addr, 4))
                return 1;
 
-       vcpu->arch.pv_eoi.msr_val = data;
-       if (!pv_eoi_enabled(vcpu))
-               return 0;
+       if (data & KVM_MSR_ENABLED) {
+               if (addr == ghc->gpa && len <= ghc->len)
+                       new_len = ghc->len;
+               else
+                       new_len = len;
 
-       if (addr == ghc->gpa && len <= ghc->len)
-               new_len = ghc->len;
-       else
-               new_len = len;
+               ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
+               if (ret)
+                       return ret;
+       }
+
+       vcpu->arch.pv_eoi.msr_val = data;
 
-       return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
+       return 0;
 }
 
 int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
index d7c25d0..2b44e53 100644 (file)
@@ -127,7 +127,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
 
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
+int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
 void kvm_lapic_exit(void);
 
 #define VEC_POS(v) ((v) & (32 - 1))
index 323b505..3be9bee 100644 (file)
@@ -3191,17 +3191,17 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
                        new_spte |= PT_WRITABLE_MASK;
 
                        /*
-                        * Do not fix write-permission on the large spte.  Since
-                        * we only dirty the first page into the dirty-bitmap in
+                        * Do not fix write-permission on the large spte when
+                        * dirty logging is enabled. Since we only dirty the
+                        * first page into the dirty-bitmap in
                         * fast_pf_fix_direct_spte(), other pages are missed
                         * if its slot has dirty logging enabled.
                         *
                         * Instead, we let the slow page fault path create a
                         * normal spte to fix the access.
-                        *
-                        * See the comments in kvm_arch_commit_memory_region().
                         */
-                       if (sp->role.level > PG_LEVEL_4K)
+                       if (sp->role.level > PG_LEVEL_4K &&
+                           kvm_slot_dirty_track_enabled(fault->slot))
                                break;
                }
 
@@ -4682,6 +4682,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu,
                /* PKEY and LA57 are active iff long mode is active. */
                ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs);
                ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs);
+               ext.efer_lma = ____is_efer_lma(regs);
        }
 
        ext.valid = 1;
index 7c5dd83..a54c349 100644 (file)
@@ -897,7 +897,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
                                          struct kvm_page_fault *fault,
                                          struct tdp_iter *iter)
 {
-       struct kvm_mmu_page *sp = sptep_to_sp(iter->sptep);
+       struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(iter->sptep));
        u64 new_spte;
        int ret = RET_PF_FIXED;
        bool wrprot = false;
index 0772bad..09873f6 100644 (file)
@@ -319,7 +319,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
 }
 
 /* check if idx is a valid index to access PMU */
-int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
 {
        return kvm_x86_ops.pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
 }
index 0e4f2b1..59d6b76 100644 (file)
@@ -32,7 +32,7 @@ struct kvm_pmu_ops {
        struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
                unsigned int idx, u64 *mask);
        struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr);
-       int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
+       bool (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
        bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
        int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
        int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
@@ -149,7 +149,7 @@ void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
 void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
 int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
-int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
+bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
 bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
 int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
 int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
index 8052d92..affc0ea 100644 (file)
@@ -904,7 +904,8 @@ bool svm_check_apicv_inhibit_reasons(ulong bit)
                          BIT(APICV_INHIBIT_REASON_NESTED) |
                          BIT(APICV_INHIBIT_REASON_IRQWIN) |
                          BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
-                         BIT(APICV_INHIBIT_REASON_X2APIC);
+                         BIT(APICV_INHIBIT_REASON_X2APIC) |
+                         BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
 
        return supported & BIT(bit);
 }
index fdf587f..871c426 100644 (file)
@@ -181,14 +181,13 @@ static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
        return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER);
 }
 
-/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
-static int amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
 {
        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
 
        idx &= ~(3u << 30);
 
-       return (idx >= pmu->nr_arch_gp_counters);
+       return idx < pmu->nr_arch_gp_counters;
 }
 
 /* idx is the ECX register of RDPMC instruction */
index 1964b9a..21ac0a5 100644 (file)
@@ -120,16 +120,26 @@ static bool __sev_recycle_asids(int min_asid, int max_asid)
        return true;
 }
 
+static int sev_misc_cg_try_charge(struct kvm_sev_info *sev)
+{
+       enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
+       return misc_cg_try_charge(type, sev->misc_cg, 1);
+}
+
+static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
+{
+       enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
+       misc_cg_uncharge(type, sev->misc_cg, 1);
+}
+
 static int sev_asid_new(struct kvm_sev_info *sev)
 {
        int asid, min_asid, max_asid, ret;
        bool retry = true;
-       enum misc_res_type type;
 
-       type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
        WARN_ON(sev->misc_cg);
        sev->misc_cg = get_current_misc_cg();
-       ret = misc_cg_try_charge(type, sev->misc_cg, 1);
+       ret = sev_misc_cg_try_charge(sev);
        if (ret) {
                put_misc_cg(sev->misc_cg);
                sev->misc_cg = NULL;
@@ -162,7 +172,7 @@ again:
 
        return asid;
 e_uncharge:
-       misc_cg_uncharge(type, sev->misc_cg, 1);
+       sev_misc_cg_uncharge(sev);
        put_misc_cg(sev->misc_cg);
        sev->misc_cg = NULL;
        return ret;
@@ -179,7 +189,6 @@ static void sev_asid_free(struct kvm_sev_info *sev)
 {
        struct svm_cpu_data *sd;
        int cpu;
-       enum misc_res_type type;
 
        mutex_lock(&sev_bitmap_lock);
 
@@ -192,8 +201,7 @@ static void sev_asid_free(struct kvm_sev_info *sev)
 
        mutex_unlock(&sev_bitmap_lock);
 
-       type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
-       misc_cg_uncharge(type, sev->misc_cg, 1);
+       sev_misc_cg_uncharge(sev);
        put_misc_cg(sev->misc_cg);
        sev->misc_cg = NULL;
 }
@@ -229,7 +237,6 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
 static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
        struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
-       bool es_active = argp->id == KVM_SEV_ES_INIT;
        int asid, ret;
 
        if (kvm->created_vcpus)
@@ -239,7 +246,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
        if (unlikely(sev->active))
                return ret;
 
-       sev->es_active = es_active;
+       sev->active = true;
+       sev->es_active = argp->id == KVM_SEV_ES_INIT;
        asid = sev_asid_new(sev);
        if (asid < 0)
                goto e_no_asid;
@@ -249,8 +257,6 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
        if (ret)
                goto e_free;
 
-       sev->active = true;
-       sev->asid = asid;
        INIT_LIST_HEAD(&sev->regions_list);
 
        return 0;
@@ -260,6 +266,7 @@ e_free:
        sev->asid = 0;
 e_no_asid:
        sev->es_active = false;
+       sev->active = false;
        return ret;
 }
 
@@ -590,7 +597,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
         * traditional VMSA as it has been built so far (in prep
         * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
         */
-       memcpy(svm->vmsa, save, sizeof(*save));
+       memcpy(svm->sev_es.vmsa, save, sizeof(*save));
 
        return 0;
 }
@@ -612,11 +619,11 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
         * the VMSA memory content (i.e it will write the same memory region
         * with the guest's key), so invalidate it first.
         */
-       clflush_cache_range(svm->vmsa, PAGE_SIZE);
+       clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE);
 
        vmsa.reserved = 0;
        vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
-       vmsa.address = __sme_pa(svm->vmsa);
+       vmsa.address = __sme_pa(svm->sev_es.vmsa);
        vmsa.len = PAGE_SIZE;
        ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
        if (ret)
@@ -1522,7 +1529,7 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
        return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
 }
 
-static bool cmd_allowed_from_miror(u32 cmd_id)
+static bool is_cmd_allowed_from_mirror(u32 cmd_id)
 {
        /*
         * Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES
@@ -1536,6 +1543,201 @@ static bool cmd_allowed_from_miror(u32 cmd_id)
        return false;
 }
 
+static int sev_lock_for_migration(struct kvm *kvm)
+{
+       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+       /*
+        * Bail if this VM is already involved in a migration to avoid deadlock
+        * between two VMs trying to migrate to/from each other.
+        */
+       if (atomic_cmpxchg_acquire(&sev->migration_in_progress, 0, 1))
+               return -EBUSY;
+
+       mutex_lock(&kvm->lock);
+
+       return 0;
+}
+
+static void sev_unlock_after_migration(struct kvm *kvm)
+{
+       struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+       mutex_unlock(&kvm->lock);
+       atomic_set_release(&sev->migration_in_progress, 0);
+}
+
+
+static int sev_lock_vcpus_for_migration(struct kvm *kvm)
+{
+       struct kvm_vcpu *vcpu;
+       int i, j;
+
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               if (mutex_lock_killable(&vcpu->mutex))
+                       goto out_unlock;
+       }
+
+       return 0;
+
+out_unlock:
+       kvm_for_each_vcpu(j, vcpu, kvm) {
+               if (i == j)
+                       break;
+
+               mutex_unlock(&vcpu->mutex);
+       }
+       return -EINTR;
+}
+
+static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
+{
+       struct kvm_vcpu *vcpu;
+       int i;
+
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               mutex_unlock(&vcpu->mutex);
+       }
+}
+
+static void sev_migrate_from(struct kvm_sev_info *dst,
+                             struct kvm_sev_info *src)
+{
+       dst->active = true;
+       dst->asid = src->asid;
+       dst->handle = src->handle;
+       dst->pages_locked = src->pages_locked;
+
+       src->asid = 0;
+       src->active = false;
+       src->handle = 0;
+       src->pages_locked = 0;
+
+       INIT_LIST_HEAD(&dst->regions_list);
+       list_replace_init(&src->regions_list, &dst->regions_list);
+}
+
+static int sev_es_migrate_from(struct kvm *dst, struct kvm *src)
+{
+       int i;
+       struct kvm_vcpu *dst_vcpu, *src_vcpu;
+       struct vcpu_svm *dst_svm, *src_svm;
+
+       if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus))
+               return -EINVAL;
+
+       kvm_for_each_vcpu(i, src_vcpu, src) {
+               if (!src_vcpu->arch.guest_state_protected)
+                       return -EINVAL;
+       }
+
+       kvm_for_each_vcpu(i, src_vcpu, src) {
+               src_svm = to_svm(src_vcpu);
+               dst_vcpu = kvm_get_vcpu(dst, i);
+               dst_svm = to_svm(dst_vcpu);
+
+               /*
+                * Transfer VMSA and GHCB state to the destination.  Nullify and
+                * clear source fields as appropriate, the state now belongs to
+                * the destination.
+                */
+               memcpy(&dst_svm->sev_es, &src_svm->sev_es, sizeof(src_svm->sev_es));
+               dst_svm->vmcb->control.ghcb_gpa = src_svm->vmcb->control.ghcb_gpa;
+               dst_svm->vmcb->control.vmsa_pa = src_svm->vmcb->control.vmsa_pa;
+               dst_vcpu->arch.guest_state_protected = true;
+
+               memset(&src_svm->sev_es, 0, sizeof(src_svm->sev_es));
+               src_svm->vmcb->control.ghcb_gpa = INVALID_PAGE;
+               src_svm->vmcb->control.vmsa_pa = INVALID_PAGE;
+               src_vcpu->arch.guest_state_protected = false;
+       }
+       to_kvm_svm(src)->sev_info.es_active = false;
+       to_kvm_svm(dst)->sev_info.es_active = true;
+
+       return 0;
+}
+
+int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd)
+{
+       struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info;
+       struct kvm_sev_info *src_sev, *cg_cleanup_sev;
+       struct file *source_kvm_file;
+       struct kvm *source_kvm;
+       bool charged = false;
+       int ret;
+
+       ret = sev_lock_for_migration(kvm);
+       if (ret)
+               return ret;
+
+       if (sev_guest(kvm)) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
+       source_kvm_file = fget(source_fd);
+       if (!file_is_kvm(source_kvm_file)) {
+               ret = -EBADF;
+               goto out_fput;
+       }
+
+       source_kvm = source_kvm_file->private_data;
+       ret = sev_lock_for_migration(source_kvm);
+       if (ret)
+               goto out_fput;
+
+       if (!sev_guest(source_kvm)) {
+               ret = -EINVAL;
+               goto out_source;
+       }
+
+       src_sev = &to_kvm_svm(source_kvm)->sev_info;
+       dst_sev->misc_cg = get_current_misc_cg();
+       cg_cleanup_sev = dst_sev;
+       if (dst_sev->misc_cg != src_sev->misc_cg) {
+               ret = sev_misc_cg_try_charge(dst_sev);
+               if (ret)
+                       goto out_dst_cgroup;
+               charged = true;
+       }
+
+       ret = sev_lock_vcpus_for_migration(kvm);
+       if (ret)
+               goto out_dst_cgroup;
+       ret = sev_lock_vcpus_for_migration(source_kvm);
+       if (ret)
+               goto out_dst_vcpu;
+
+       if (sev_es_guest(source_kvm)) {
+               ret = sev_es_migrate_from(kvm, source_kvm);
+               if (ret)
+                       goto out_source_vcpu;
+       }
+       sev_migrate_from(dst_sev, src_sev);
+       kvm_vm_dead(source_kvm);
+       cg_cleanup_sev = src_sev;
+       ret = 0;
+
+out_source_vcpu:
+       sev_unlock_vcpus_for_migration(source_kvm);
+out_dst_vcpu:
+       sev_unlock_vcpus_for_migration(kvm);
+out_dst_cgroup:
+       /* Operates on the source on success, on the destination on failure.  */
+       if (charged)
+               sev_misc_cg_uncharge(cg_cleanup_sev);
+       put_misc_cg(cg_cleanup_sev->misc_cg);
+       cg_cleanup_sev->misc_cg = NULL;
+out_source:
+       sev_unlock_after_migration(source_kvm);
+out_fput:
+       if (source_kvm_file)
+               fput(source_kvm_file);
+out_unlock:
+       sev_unlock_after_migration(kvm);
+       return ret;
+}
+
 int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
 {
        struct kvm_sev_cmd sev_cmd;
@@ -1554,7 +1756,7 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
 
        /* Only the enc_context_owner handles some memory enc operations. */
        if (is_mirroring_enc_context(kvm) &&
-           !cmd_allowed_from_miror(sev_cmd.id)) {
+           !is_cmd_allowed_from_mirror(sev_cmd.id)) {
                r = -EINVAL;
                goto out;
        }
@@ -1787,7 +1989,12 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
        mutex_unlock(&source_kvm->lock);
        mutex_lock(&kvm->lock);
 
-       if (sev_guest(kvm)) {
+       /*
+        * Disallow out-of-band SEV/SEV-ES init if the target is already an
+        * SEV guest, or if vCPUs have been created.  KVM relies on vCPUs being
+        * created after SEV/SEV-ES initialization, e.g. to init intercepts.
+        */
+       if (sev_guest(kvm) || kvm->created_vcpus) {
                ret = -EINVAL;
                goto e_mirror_unlock;
        }
@@ -2038,16 +2245,16 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
        svm = to_svm(vcpu);
 
        if (vcpu->arch.guest_state_protected)
-               sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
-       __free_page(virt_to_page(svm->vmsa));
+               sev_flush_guest_memory(svm, svm->sev_es.vmsa, PAGE_SIZE);
+       __free_page(virt_to_page(svm->sev_es.vmsa));
 
-       if (svm->ghcb_sa_free)
-               kfree(svm->ghcb_sa);
+       if (svm->sev_es.ghcb_sa_free)
+               kfree(svm->sev_es.ghcb_sa);
 }
 
 static void dump_ghcb(struct vcpu_svm *svm)
 {
-       struct ghcb *ghcb = svm->ghcb;
+       struct ghcb *ghcb = svm->sev_es.ghcb;
        unsigned int nbits;
 
        /* Re-use the dump_invalid_vmcb module parameter */
@@ -2073,7 +2280,7 @@ static void dump_ghcb(struct vcpu_svm *svm)
 static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
 {
        struct kvm_vcpu *vcpu = &svm->vcpu;
-       struct ghcb *ghcb = svm->ghcb;
+       struct ghcb *ghcb = svm->sev_es.ghcb;
 
        /*
         * The GHCB protocol so far allows for the following data
@@ -2093,7 +2300,7 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
 {
        struct vmcb_control_area *control = &svm->vmcb->control;
        struct kvm_vcpu *vcpu = &svm->vcpu;
-       struct ghcb *ghcb = svm->ghcb;
+       struct ghcb *ghcb = svm->sev_es.ghcb;
        u64 exit_code;
 
        /*
@@ -2140,7 +2347,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
        struct ghcb *ghcb;
        u64 exit_code = 0;
 
-       ghcb = svm->ghcb;
+       ghcb = svm->sev_es.ghcb;
 
        /* Only GHCB Usage code 0 is supported */
        if (ghcb->ghcb_usage)
@@ -2258,33 +2465,34 @@ vmgexit_err:
 
 void sev_es_unmap_ghcb(struct vcpu_svm *svm)
 {
-       if (!svm->ghcb)
+       if (!svm->sev_es.ghcb)
                return;
 
-       if (svm->ghcb_sa_free) {
+       if (svm->sev_es.ghcb_sa_free) {
                /*
                 * The scratch area lives outside the GHCB, so there is a
                 * buffer that, depending on the operation performed, may
                 * need to be synced, then freed.
                 */
-               if (svm->ghcb_sa_sync) {
+               if (svm->sev_es.ghcb_sa_sync) {
                        kvm_write_guest(svm->vcpu.kvm,
-                                       ghcb_get_sw_scratch(svm->ghcb),
-                                       svm->ghcb_sa, svm->ghcb_sa_len);
-                       svm->ghcb_sa_sync = false;
+                                       ghcb_get_sw_scratch(svm->sev_es.ghcb),
+                                       svm->sev_es.ghcb_sa,
+                                       svm->sev_es.ghcb_sa_len);
+                       svm->sev_es.ghcb_sa_sync = false;
                }
 
-               kfree(svm->ghcb_sa);
-               svm->ghcb_sa = NULL;
-               svm->ghcb_sa_free = false;
+               kfree(svm->sev_es.ghcb_sa);
+               svm->sev_es.ghcb_sa = NULL;
+               svm->sev_es.ghcb_sa_free = false;
        }
 
-       trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
+       trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb);
 
        sev_es_sync_to_ghcb(svm);
 
-       kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
-       svm->ghcb = NULL;
+       kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map, true);
+       svm->sev_es.ghcb = NULL;
 }
 
 void pre_sev_run(struct vcpu_svm *svm, int cpu)
@@ -2314,7 +2522,7 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
 static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
 {
        struct vmcb_control_area *control = &svm->vmcb->control;
-       struct ghcb *ghcb = svm->ghcb;
+       struct ghcb *ghcb = svm->sev_es.ghcb;
        u64 ghcb_scratch_beg, ghcb_scratch_end;
        u64 scratch_gpa_beg, scratch_gpa_end;
        void *scratch_va;
@@ -2350,7 +2558,7 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
                        return false;
                }
 
-               scratch_va = (void *)svm->ghcb;
+               scratch_va = (void *)svm->sev_es.ghcb;
                scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
        } else {
                /*
@@ -2380,12 +2588,12 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
                 * the vCPU next time (i.e. a read was requested so the data
                 * must be written back to the guest memory).
                 */
-               svm->ghcb_sa_sync = sync;
-               svm->ghcb_sa_free = true;
+               svm->sev_es.ghcb_sa_sync = sync;
+               svm->sev_es.ghcb_sa_free = true;
        }
 
-       svm->ghcb_sa = scratch_va;
-       svm->ghcb_sa_len = len;
+       svm->sev_es.ghcb_sa = scratch_va;
+       svm->sev_es.ghcb_sa_len = len;
 
        return true;
 }
@@ -2504,15 +2712,15 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
                return -EINVAL;
        }
 
-       if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
+       if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) {
                /* Unable to map GHCB from guest */
                vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
                            ghcb_gpa);
                return -EINVAL;
        }
 
-       svm->ghcb = svm->ghcb_map.hva;
-       ghcb = svm->ghcb_map.hva;
+       svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
+       ghcb = svm->sev_es.ghcb_map.hva;
 
        trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
 
@@ -2535,7 +2743,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
                ret = kvm_sev_es_mmio_read(vcpu,
                                           control->exit_info_1,
                                           control->exit_info_2,
-                                          svm->ghcb_sa);
+                                          svm->sev_es.ghcb_sa);
                break;
        case SVM_VMGEXIT_MMIO_WRITE:
                if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
@@ -2544,7 +2752,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
                ret = kvm_sev_es_mmio_write(vcpu,
                                            control->exit_info_1,
                                            control->exit_info_2,
-                                           svm->ghcb_sa);
+                                           svm->sev_es.ghcb_sa);
                break;
        case SVM_VMGEXIT_NMI_COMPLETE:
                ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
@@ -2604,7 +2812,8 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
        if (!setup_vmgexit_scratch(svm, in, bytes))
                return -EINVAL;
 
-       return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in);
+       return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa,
+                                   count, in);
 }
 
 void sev_es_init_vmcb(struct vcpu_svm *svm)
@@ -2619,7 +2828,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm)
         * VMCB page. Do not include the encryption mask on the VMSA physical
         * address since hardware will access it using the guest key.
         */
-       svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
+       svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
 
        /* Can't intercept CR register access, HV can't modify CR registers */
        svm_clr_intercept(svm, INTERCEPT_CR0_READ);
@@ -2691,8 +2900,8 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
        struct vcpu_svm *svm = to_svm(vcpu);
 
        /* First SIPI: Use the values as initially set by the VMM */
-       if (!svm->received_first_sipi) {
-               svm->received_first_sipi = true;
+       if (!svm->sev_es.received_first_sipi) {
+               svm->sev_es.received_first_sipi = true;
                return;
        }
 
@@ -2701,8 +2910,8 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
         * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
         * non-zero value.
         */
-       if (!svm->ghcb)
+       if (!svm->sev_es.ghcb)
                return;
 
-       ghcb_set_sw_exit_info_2(svm->ghcb, 1);
+       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
 }
index b36ca4e..5630c24 100644 (file)
@@ -1452,7 +1452,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
        svm_switch_vmcb(svm, &svm->vmcb01);
 
        if (vmsa_page)
-               svm->vmsa = page_address(vmsa_page);
+               svm->sev_es.vmsa = page_address(vmsa_page);
 
        svm->guest_state_loaded = false;
 
@@ -2835,11 +2835,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
-       if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->ghcb))
+       if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->sev_es.ghcb))
                return kvm_complete_insn_gp(vcpu, err);
 
-       ghcb_set_sw_exit_info_1(svm->ghcb, 1);
-       ghcb_set_sw_exit_info_2(svm->ghcb,
+       ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 1);
+       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb,
                                X86_TRAP_GP |
                                SVM_EVTINJ_TYPE_EXEPT |
                                SVM_EVTINJ_VALID);
@@ -3121,11 +3121,6 @@ static int invpcid_interception(struct kvm_vcpu *vcpu)
        type = svm->vmcb->control.exit_info_2;
        gva = svm->vmcb->control.exit_info_1;
 
-       if (type > 3) {
-               kvm_inject_gp(vcpu, 0);
-               return 1;
-       }
-
        return kvm_handle_invpcid(vcpu, type, gva);
 }
 
@@ -4701,6 +4696,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .mem_enc_unreg_region = svm_unregister_enc_region,
 
        .vm_copy_enc_context_from = svm_vm_copy_asid_from,
+       .vm_move_enc_context_from = svm_vm_migrate_from,
 
        .can_emulate_instruction = svm_can_emulate_instruction,
 
index 5e9510d..5faad3d 100644 (file)
@@ -80,6 +80,7 @@ struct kvm_sev_info {
        u64 ap_jump_table;      /* SEV-ES AP Jump Table address */
        struct kvm *enc_context_owner; /* Owner of copied encryption context */
        struct misc_cg *misc_cg; /* For misc cgroup accounting */
+       atomic_t migration_in_progress;
 };
 
 struct kvm_svm {
@@ -123,6 +124,20 @@ struct svm_nested_state {
        bool initialized;
 };
 
+struct vcpu_sev_es_state {
+       /* SEV-ES support */
+       struct vmcb_save_area *vmsa;
+       struct ghcb *ghcb;
+       struct kvm_host_map ghcb_map;
+       bool received_first_sipi;
+
+       /* SEV-ES scratch area support */
+       void *ghcb_sa;
+       u32 ghcb_sa_len;
+       bool ghcb_sa_sync;
+       bool ghcb_sa_free;
+};
+
 struct vcpu_svm {
        struct kvm_vcpu vcpu;
        /* vmcb always points at current_vmcb->ptr, it's purely a shorthand. */
@@ -186,17 +201,7 @@ struct vcpu_svm {
                DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS);
        } shadow_msr_intercept;
 
-       /* SEV-ES support */
-       struct vmcb_save_area *vmsa;
-       struct ghcb *ghcb;
-       struct kvm_host_map ghcb_map;
-       bool received_first_sipi;
-
-       /* SEV-ES scratch area support */
-       void *ghcb_sa;
-       u32 ghcb_sa_len;
-       bool ghcb_sa_sync;
-       bool ghcb_sa_free;
+       struct vcpu_sev_es_state sev_es;
 
        bool guest_state_loaded;
 };
@@ -242,7 +247,7 @@ static __always_inline bool sev_es_guest(struct kvm *kvm)
 #ifdef CONFIG_KVM_AMD_SEV
        struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 
-       return sev_guest(kvm) && sev->es_active;
+       return sev->es_active && !WARN_ON_ONCE(!sev->active);
 #else
        return false;
 #endif
@@ -558,6 +563,7 @@ int svm_register_enc_region(struct kvm *kvm,
 int svm_unregister_enc_region(struct kvm *kvm,
                              struct kvm_enc_region *range);
 int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd);
+int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd);
 void pre_sev_run(struct vcpu_svm *svm, int cpu);
 void __init sev_set_cpu_caps(void);
 void __init sev_hardware_setup(void);
index b4ee5e9..1e2f669 100644 (file)
@@ -525,67 +525,19 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
 }
 
 /*
- * Check if MSR is intercepted for L01 MSR bitmap.
+ * For x2APIC MSRs, ignore the vmcs01 bitmap.  L1 can enable x2APIC without L1
+ * itself utilizing x2APIC.  All MSRs were previously set to be intercepted,
+ * only the "disable intercept" case needs to be handled.
  */
-static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
+static void nested_vmx_disable_intercept_for_x2apic_msr(unsigned long *msr_bitmap_l1,
+                                                       unsigned long *msr_bitmap_l0,
+                                                       u32 msr, int type)
 {
-       unsigned long *msr_bitmap;
-       int f = sizeof(unsigned long);
+       if (type & MSR_TYPE_R && !vmx_test_msr_bitmap_read(msr_bitmap_l1, msr))
+               vmx_clear_msr_bitmap_read(msr_bitmap_l0, msr);
 
-       if (!cpu_has_vmx_msr_bitmap())
-               return true;
-
-       msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
-
-       if (msr <= 0x1fff) {
-               return !!test_bit(msr, msr_bitmap + 0x800 / f);
-       } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
-               msr &= 0x1fff;
-               return !!test_bit(msr, msr_bitmap + 0xc00 / f);
-       }
-
-       return true;
-}
-
-/*
- * If a msr is allowed by L0, we should check whether it is allowed by L1.
- * The corresponding bit will be cleared unless both of L0 and L1 allow it.
- */
-static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
-                                              unsigned long *msr_bitmap_nested,
-                                              u32 msr, int type)
-{
-       int f = sizeof(unsigned long);
-
-       /*
-        * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
-        * have the write-low and read-high bitmap offsets the wrong way round.
-        * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
-        */
-       if (msr <= 0x1fff) {
-               if (type & MSR_TYPE_R &&
-                  !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
-                       /* read-low */
-                       __clear_bit(msr, msr_bitmap_nested + 0x000 / f);
-
-               if (type & MSR_TYPE_W &&
-                  !test_bit(msr, msr_bitmap_l1 + 0x800 / f))
-                       /* write-low */
-                       __clear_bit(msr, msr_bitmap_nested + 0x800 / f);
-
-       } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
-               msr &= 0x1fff;
-               if (type & MSR_TYPE_R &&
-                  !test_bit(msr, msr_bitmap_l1 + 0x400 / f))
-                       /* read-high */
-                       __clear_bit(msr, msr_bitmap_nested + 0x400 / f);
-
-               if (type & MSR_TYPE_W &&
-                  !test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
-                       /* write-high */
-                       __clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
-
-       }
+       if (type & MSR_TYPE_W && !vmx_test_msr_bitmap_write(msr_bitmap_l1, msr))
+               vmx_clear_msr_bitmap_write(msr_bitmap_l0, msr);
 }
 
 static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
@@ -600,6 +552,34 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
        }
 }
 
+#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw)                                    \
+static inline                                                                  \
+void nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx,                 \
+                                        unsigned long *msr_bitmap_l1,          \
+                                        unsigned long *msr_bitmap_l0, u32 msr) \
+{                                                                              \
+       if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) ||            \
+           vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr))                       \
+               vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr);                    \
+       else                                                                    \
+               vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr);                  \
+}
+BUILD_NVMX_MSR_INTERCEPT_HELPER(read)
+BUILD_NVMX_MSR_INTERCEPT_HELPER(write)
+
+static inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx,
+                                                   unsigned long *msr_bitmap_l1,
+                                                   unsigned long *msr_bitmap_l0,
+                                                   u32 msr, int types)
+{
+       if (types & MSR_TYPE_R)
+               nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1,
+                                                 msr_bitmap_l0, msr);
+       if (types & MSR_TYPE_W)
+               nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1,
+                                                  msr_bitmap_l0, msr);
+}
+
 /*
  * Merge L0's and L1's MSR bitmap, return false to indicate that
  * we do not use the hardware.
@@ -607,10 +587,11 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
 static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
                                                 struct vmcs12 *vmcs12)
 {
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
        int msr;
        unsigned long *msr_bitmap_l1;
-       unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
-       struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map;
+       unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
+       struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;
 
        /* Nothing to do if the MSR bitmap is not in use.  */
        if (!cpu_has_vmx_msr_bitmap() ||
@@ -625,7 +606,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
        /*
         * To keep the control flow simple, pay eight 8-byte writes (sixteen
         * 4-byte writes on 32-bit systems) up front to enable intercepts for
-        * the x2APIC MSR range and selectively disable them below.
+        * the x2APIC MSR range and selectively toggle those relevant to L2.
         */
        enable_x2apic_msr_intercepts(msr_bitmap_l0);
 
@@ -644,61 +625,44 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
                        }
                }
 
-               nested_vmx_disable_intercept_for_msr(
+               nested_vmx_disable_intercept_for_x2apic_msr(
                        msr_bitmap_l1, msr_bitmap_l0,
                        X2APIC_MSR(APIC_TASKPRI),
                        MSR_TYPE_R | MSR_TYPE_W);
 
                if (nested_cpu_has_vid(vmcs12)) {
-                       nested_vmx_disable_intercept_for_msr(
+                       nested_vmx_disable_intercept_for_x2apic_msr(
                                msr_bitmap_l1, msr_bitmap_l0,
                                X2APIC_MSR(APIC_EOI),
                                MSR_TYPE_W);
-                       nested_vmx_disable_intercept_for_msr(
+                       nested_vmx_disable_intercept_for_x2apic_msr(
                                msr_bitmap_l1, msr_bitmap_l0,
                                X2APIC_MSR(APIC_SELF_IPI),
                                MSR_TYPE_W);
                }
        }
 
-       /* KVM unconditionally exposes the FS/GS base MSRs to L1. */
+       /*
+        * Always check vmcs01's bitmap to honor userspace MSR filters and any
+        * other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through.
+        */
 #ifdef CONFIG_X86_64
-       nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
-                                            MSR_FS_BASE, MSR_TYPE_RW);
+       nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+                                        MSR_FS_BASE, MSR_TYPE_RW);
 
-       nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
-                                            MSR_GS_BASE, MSR_TYPE_RW);
+       nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+                                        MSR_GS_BASE, MSR_TYPE_RW);
 
-       nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
-                                            MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+       nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+                                        MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
 #endif
+       nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+                                        MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
 
-       /*
-        * Checking the L0->L1 bitmap is trying to verify two things:
-        *
-        * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
-        *    ensures that we do not accidentally generate an L02 MSR bitmap
-        *    from the L12 MSR bitmap that is too permissive.
-        * 2. That L1 or L2s have actually used the MSR. This avoids
-        *    unnecessarily merging of the bitmap if the MSR is unused. This
-        *    works properly because we only update the L01 MSR bitmap lazily.
-        *    So even if L0 should pass L1 these MSRs, the L01 bitmap is only
-        *    updated to reflect this when L1 (or its L2s) actually write to
-        *    the MSR.
-        */
-       if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL))
-               nested_vmx_disable_intercept_for_msr(
-                                       msr_bitmap_l1, msr_bitmap_l0,
-                                       MSR_IA32_SPEC_CTRL,
-                                       MSR_TYPE_R | MSR_TYPE_W);
-
-       if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD))
-               nested_vmx_disable_intercept_for_msr(
-                                       msr_bitmap_l1, msr_bitmap_l0,
-                                       MSR_IA32_PRED_CMD,
-                                       MSR_TYPE_W);
+       nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+                                        MSR_IA32_PRED_CMD, MSR_TYPE_W);
 
-       kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false);
+       kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
 
        return true;
 }
@@ -706,33 +670,39 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
 static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
                                       struct vmcs12 *vmcs12)
 {
-       struct kvm_host_map map;
-       struct vmcs12 *shadow;
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
 
        if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
            vmcs12->vmcs_link_pointer == INVALID_GPA)
                return;
 
-       shadow = get_shadow_vmcs12(vcpu);
-
-       if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
+       if (ghc->gpa != vmcs12->vmcs_link_pointer &&
+           kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
+                                     vmcs12->vmcs_link_pointer, VMCS12_SIZE))
                return;
 
-       memcpy(shadow, map.hva, VMCS12_SIZE);
-       kvm_vcpu_unmap(vcpu, &map, false);
+       kvm_read_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
+                             VMCS12_SIZE);
 }
 
 static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
                                              struct vmcs12 *vmcs12)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
+       struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
 
        if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
            vmcs12->vmcs_link_pointer == INVALID_GPA)
                return;
 
-       kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
-                       get_shadow_vmcs12(vcpu), VMCS12_SIZE);
+       if (ghc->gpa != vmcs12->vmcs_link_pointer &&
+           kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
+                                     vmcs12->vmcs_link_pointer, VMCS12_SIZE))
+               return;
+
+       kvm_write_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
+                              VMCS12_SIZE);
 }
 
 /*
@@ -2866,6 +2836,17 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
        return 0;
 }
 
+static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
+                                      struct vmcs12 *vmcs12)
+{
+#ifdef CONFIG_X86_64
+       if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) !=
+               !!(vcpu->arch.efer & EFER_LMA)))
+               return -EINVAL;
+#endif
+       return 0;
+}
+
 static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
                                       struct vmcs12 *vmcs12)
 {
@@ -2890,18 +2871,16 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
                return -EINVAL;
 
 #ifdef CONFIG_X86_64
-       ia32e = !!(vcpu->arch.efer & EFER_LMA);
+       ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
 #else
        ia32e = false;
 #endif
 
        if (ia32e) {
-               if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) ||
-                   CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
+               if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
                        return -EINVAL;
        } else {
-               if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ||
-                   CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
+               if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
                    CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
                    CC((vmcs12->host_rip) >> 32))
                        return -EINVAL;
@@ -2946,9 +2925,9 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
 static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
                                          struct vmcs12 *vmcs12)
 {
-       int r = 0;
-       struct vmcs12 *shadow;
-       struct kvm_host_map map;
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
+       struct vmcs_hdr hdr;
 
        if (vmcs12->vmcs_link_pointer == INVALID_GPA)
                return 0;
@@ -2956,17 +2935,21 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
        if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))
                return -EINVAL;
 
-       if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)))
-               return -EINVAL;
+       if (ghc->gpa != vmcs12->vmcs_link_pointer &&
+           CC(kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
+                                        vmcs12->vmcs_link_pointer, VMCS12_SIZE)))
+                return -EINVAL;
 
-       shadow = map.hva;
+       if (CC(kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr,
+                                           offsetof(struct vmcs12, hdr),
+                                           sizeof(hdr))))
+               return -EINVAL;
 
-       if (CC(shadow->hdr.revision_id != VMCS12_REVISION) ||
-           CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
-               r = -EINVAL;
+       if (CC(hdr.revision_id != VMCS12_REVISION) ||
+           CC(hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
+               return -EINVAL;
 
-       kvm_vcpu_unmap(vcpu, &map, false);
-       return r;
+       return 0;
 }
 
 /*
@@ -3571,6 +3554,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
        if (nested_vmx_check_controls(vcpu, vmcs12))
                return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 
+       if (nested_vmx_check_address_space_size(vcpu, vmcs12))
+               return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
+
        if (nested_vmx_check_host_state(vcpu, vmcs12))
                return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
 
@@ -5300,10 +5286,11 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
                return 1;
 
        if (vmx->nested.current_vmptr != vmptr) {
-               struct kvm_host_map map;
-               struct vmcs12 *new_vmcs12;
+               struct gfn_to_hva_cache *ghc = &vmx->nested.vmcs12_cache;
+               struct vmcs_hdr hdr;
 
-               if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) {
+               if (ghc->gpa != vmptr &&
+                   kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {
                        /*
                         * Reads from an unbacked page return all 1s,
                         * which means that the 32 bits located at the
@@ -5314,12 +5301,16 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
                                VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
                }
 
-               new_vmcs12 = map.hva;
+               if (kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr,
+                                                offsetof(struct vmcs12, hdr),
+                                                sizeof(hdr))) {
+                       return nested_vmx_fail(vcpu,
+                               VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
+               }
 
-               if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
-                   (new_vmcs12->hdr.shadow_vmcs &&
+               if (hdr.revision_id != VMCS12_REVISION ||
+                   (hdr.shadow_vmcs &&
                     !nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
-                       kvm_vcpu_unmap(vcpu, &map, false);
                        return nested_vmx_fail(vcpu,
                                VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
                }
@@ -5330,8 +5321,11 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
                 * Load VMCS12 from guest memory since it is not already
                 * cached.
                 */
-               memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
-               kvm_vcpu_unmap(vcpu, &map, false);
+               if (kvm_read_guest_cached(vcpu->kvm, ghc, vmx->nested.cached_vmcs12,
+                                         VMCS12_SIZE)) {
+                       return nested_vmx_fail(vcpu,
+                               VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
+               }
 
                set_current_vmptr(vmx, vmptr);
        }
@@ -5379,7 +5373,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
        struct {
                u64 eptp, gpa;
        } operand;
-       int i, r;
+       int i, r, gpr_index;
 
        if (!(vmx->nested.msrs.secondary_ctls_high &
              SECONDARY_EXEC_ENABLE_EPT) ||
@@ -5392,7 +5386,8 @@ static int handle_invept(struct kvm_vcpu *vcpu)
                return 1;
 
        vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
-       type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
+       gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
+       type = kvm_register_read(vcpu, gpr_index);
 
        types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
 
@@ -5459,7 +5454,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
                u64 gla;
        } operand;
        u16 vpid02;
-       int r;
+       int r, gpr_index;
 
        if (!(vmx->nested.msrs.secondary_ctls_high &
              SECONDARY_EXEC_ENABLE_VPID) ||
@@ -5472,7 +5467,8 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
                return 1;
 
        vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
-       type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
+       gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
+       type = kvm_register_read(vcpu, gpr_index);
 
        types = (vmx->nested.msrs.vpid_caps &
                        VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
index b8e0d21..1b7456b 100644 (file)
@@ -118,16 +118,15 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
        }
 }
 
-/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
-static int intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
 {
        struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
        bool fixed = idx & (1u << 30);
 
        idx &= ~(3u << 30);
 
-       return (!fixed && idx >= pmu->nr_arch_gp_counters) ||
-               (fixed && idx >= pmu->nr_arch_fixed_counters);
+       return fixed ? idx < pmu->nr_arch_fixed_counters
+                    : idx < pmu->nr_arch_gp_counters;
 }
 
 static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
index 76861b6..ba66c17 100644 (file)
@@ -769,24 +769,13 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
 /*
  * Check if MSR is intercepted for currently loaded MSR bitmap.
  */
-static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
+static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
 {
-       unsigned long *msr_bitmap;
-       int f = sizeof(unsigned long);
-
-       if (!cpu_has_vmx_msr_bitmap())
+       if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS))
                return true;
 
-       msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
-
-       if (msr <= 0x1fff) {
-               return !!test_bit(msr, msr_bitmap + 0x800 / f);
-       } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
-               msr &= 0x1fff;
-               return !!test_bit(msr, msr_bitmap + 0xc00 / f);
-       }
-
-       return true;
+       return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap,
+                                        MSR_IA32_SPEC_CTRL);
 }
 
 static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
@@ -3697,46 +3686,6 @@ void free_vpid(int vpid)
        spin_unlock(&vmx_vpid_lock);
 }
 
-static void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
-{
-       int f = sizeof(unsigned long);
-
-       if (msr <= 0x1fff)
-               __clear_bit(msr, msr_bitmap + 0x000 / f);
-       else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
-               __clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
-}
-
-static void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
-{
-       int f = sizeof(unsigned long);
-
-       if (msr <= 0x1fff)
-               __clear_bit(msr, msr_bitmap + 0x800 / f);
-       else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
-               __clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
-}
-
-static void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
-{
-       int f = sizeof(unsigned long);
-
-       if (msr <= 0x1fff)
-               __set_bit(msr, msr_bitmap + 0x000 / f);
-       else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
-               __set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
-}
-
-static void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
-{
-       int f = sizeof(unsigned long);
-
-       if (msr <= 0x1fff)
-               __set_bit(msr, msr_bitmap + 0x800 / f);
-       else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
-               __set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
-}
-
 void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5494,6 +5443,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
                u64 pcid;
                u64 gla;
        } operand;
+       int gpr_index;
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
                kvm_queue_exception(vcpu, UD_VECTOR);
@@ -5501,12 +5451,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
        }
 
        vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
-       type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
-
-       if (type > 3) {
-               kvm_inject_gp(vcpu, 0);
-               return 1;
-       }
+       gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
+       type = kvm_register_read(vcpu, gpr_index);
 
        /* According to the Intel instruction reference, the memory operand
         * is read even if it isn't needed (e.g., for type==all)
@@ -6749,7 +6695,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
         * If the L02 MSR bitmap does not intercept the MSR, then we need to
         * save it.
         */
-       if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+       if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
                vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
        x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
@@ -7563,7 +7509,8 @@ static void hardware_unsetup(void)
 static bool vmx_check_apicv_inhibit_reasons(ulong bit)
 {
        ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
-                         BIT(APICV_INHIBIT_REASON_HYPERV);
+                         BIT(APICV_INHIBIT_REASON_HYPERV) |
+                         BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
 
        return supported & BIT(bit);
 }
index e7db42e..4df2ac2 100644 (file)
@@ -142,6 +142,16 @@ struct nested_vmx {
        struct vmcs12 *cached_shadow_vmcs12;
 
        /*
+        * GPA to HVA cache for accessing vmcs12->vmcs_link_pointer
+        */
+       struct gfn_to_hva_cache shadow_vmcs12_cache;
+
+       /*
+        * GPA to HVA cache for VMCS12
+        */
+       struct gfn_to_hva_cache vmcs12_cache;
+
+       /*
         * Indicates if the shadow vmcs or enlightened vmcs must be updated
         * with the data held by struct vmcs12.
         */
@@ -400,6 +410,34 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
 
 void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
 
+/*
+ * Note, early Intel manuals have the write-low and read-high bitmap offsets
+ * the wrong way round.  The bitmaps control MSRs 0x00000000-0x00001fff and
+ * 0xc0000000-0xc0001fff.  The former (low) uses bytes 0-0x3ff for reads and
+ * 0x800-0xbff for writes.  The latter (high) uses 0x400-0x7ff for reads and
+ * 0xc00-0xfff for writes.  MSRs not covered by either of the ranges always
+ * VM-Exit.
+ */
+#define __BUILD_VMX_MSR_BITMAP_HELPER(rtype, action, bitop, access, base)      \
+static inline rtype vmx_##action##_msr_bitmap_##access(unsigned long *bitmap,  \
+                                                      u32 msr)                \
+{                                                                             \
+       int f = sizeof(unsigned long);                                         \
+                                                                              \
+       if (msr <= 0x1fff)                                                     \
+               return bitop##_bit(msr, bitmap + base / f);                    \
+       else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))                   \
+               return bitop##_bit(msr & 0x1fff, bitmap + (base + 0x400) / f); \
+       return (rtype)true;                                                    \
+}
+#define BUILD_VMX_MSR_BITMAP_HELPERS(ret_type, action, bitop)                 \
+       __BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, read,  0x0)     \
+       __BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, write, 0x800)
+
+BUILD_VMX_MSR_BITMAP_HELPERS(bool, test, test)
+BUILD_VMX_MSR_BITMAP_HELPERS(void, clear, __clear)
+BUILD_VMX_MSR_BITMAP_HELPERS(void, set, __set)
+
 static inline u8 vmx_get_rvi(void)
 {
        return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
@@ -522,4 +560,9 @@ static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu)
 
 void dump_vmcs(struct kvm_vcpu *vcpu);
 
+static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info)
+{
+       return (vmx_instr_info >> 28) & 0xf;
+}
+
 #endif /* __KVM_X86_VMX_H */
index c1c4e2b..5a403d9 100644 (file)
@@ -3260,8 +3260,11 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
 
 static void record_steal_time(struct kvm_vcpu *vcpu)
 {
-       struct kvm_host_map map;
-       struct kvm_steal_time *st;
+       struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
+       struct kvm_steal_time __user *st;
+       struct kvm_memslots *slots;
+       u64 steal;
+       u32 version;
 
        if (kvm_xen_msr_enabled(vcpu->kvm)) {
                kvm_xen_runstate_set_running(vcpu);
@@ -3271,47 +3274,86 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
        if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                return;
 
-       /* -EAGAIN is returned in atomic context so we can just return. */
-       if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
-                       &map, &vcpu->arch.st.cache, false))
+       if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm))
                return;
 
-       st = map.hva +
-               offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+       slots = kvm_memslots(vcpu->kvm);
+
+       if (unlikely(slots->generation != ghc->generation ||
+                    kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
+               gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
+
+               /* We rely on the fact that it fits in a single page. */
+               BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
+
+               if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
+                   kvm_is_error_hva(ghc->hva) || !ghc->memslot)
+                       return;
+       }
 
+       st = (struct kvm_steal_time __user *)ghc->hva;
        /*
         * Doing a TLB flush here, on the guest's behalf, can avoid
         * expensive IPIs.
         */
        if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
-               u8 st_preempted = xchg(&st->preempted, 0);
+               u8 st_preempted = 0;
+               int err = -EFAULT;
+
+               if (!user_access_begin(st, sizeof(*st)))
+                       return;
+
+               asm volatile("1: xchgb %0, %2\n"
+                            "xor %1, %1\n"
+                            "2:\n"
+                            _ASM_EXTABLE_UA(1b, 2b)
+                            : "+q" (st_preempted),
+                              "+&r" (err),
+                              "+m" (st->preempted));
+               if (err)
+                       goto out;
+
+               user_access_end();
+
+               vcpu->arch.st.preempted = 0;
 
                trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
                                       st_preempted & KVM_VCPU_FLUSH_TLB);
                if (st_preempted & KVM_VCPU_FLUSH_TLB)
                        kvm_vcpu_flush_tlb_guest(vcpu);
+
+               if (!user_access_begin(st, sizeof(*st)))
+                       goto dirty;
        } else {
-               st->preempted = 0;
-       }
+               if (!user_access_begin(st, sizeof(*st)))
+                       return;
 
-       vcpu->arch.st.preempted = 0;
+               unsafe_put_user(0, &st->preempted, out);
+               vcpu->arch.st.preempted = 0;
+       }
 
-       if (st->version & 1)
-               st->version += 1;  /* first time write, random junk */
+       unsafe_get_user(version, &st->version, out);
+       if (version & 1)
+               version += 1;  /* first time write, random junk */
 
-       st->version += 1;
+       version += 1;
+       unsafe_put_user(version, &st->version, out);
 
        smp_wmb();
 
-       st->steal += current->sched_info.run_delay -
+       unsafe_get_user(steal, &st->steal, out);
+       steal += current->sched_info.run_delay -
                vcpu->arch.st.last_steal;
        vcpu->arch.st.last_steal = current->sched_info.run_delay;
+       unsafe_put_user(steal, &st->steal, out);
 
-       smp_wmb();
+       version += 1;
+       unsafe_put_user(version, &st->version, out);
 
-       st->version += 1;
-
-       kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
+ out:
+       user_access_end();
+ dirty:
+       mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
 }
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -3517,7 +3559,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
                        return 1;
 
-               if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
+               if (kvm_lapic_set_pv_eoi(vcpu, data, sizeof(u8)))
                        return 1;
                break;
 
@@ -4137,7 +4179,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
                r = !static_call(kvm_x86_cpu_has_accelerated_tpr)();
                break;
        case KVM_CAP_NR_VCPUS:
-               r = KVM_SOFT_MAX_VCPUS;
+               r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
                break;
        case KVM_CAP_MAX_VCPUS:
                r = KVM_MAX_VCPUS;
@@ -4351,8 +4393,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
 {
-       struct kvm_host_map map;
-       struct kvm_steal_time *st;
+       struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
+       struct kvm_steal_time __user *st;
+       struct kvm_memslots *slots;
+       static const u8 preempted = KVM_VCPU_PREEMPTED;
 
        if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                return;
@@ -4360,16 +4404,23 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
        if (vcpu->arch.st.preempted)
                return;
 
-       if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
-                       &vcpu->arch.st.cache, true))
+       /* This happens on process exit */
+       if (unlikely(current->mm != vcpu->kvm->mm))
+               return;
+
+       slots = kvm_memslots(vcpu->kvm);
+
+       if (unlikely(slots->generation != ghc->generation ||
+                    kvm_is_error_hva(ghc->hva) || !ghc->memslot))
                return;
 
-       st = map.hva +
-               offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+       st = (struct kvm_steal_time __user *)ghc->hva;
+       BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted));
 
-       st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
+       if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted)))
+               vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
 
-       kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
+       mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -5728,6 +5779,12 @@ split_irqchip_unlock:
                if (kvm_x86_ops.vm_copy_enc_context_from)
                        r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]);
                return r;
+       case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
+               r = -EINVAL;
+               if (kvm_x86_ops.vm_move_enc_context_from)
+                       r = kvm_x86_ops.vm_move_enc_context_from(
+                               kvm, cap->args[0]);
+               return r;
        case KVM_CAP_EXIT_HYPERCALL:
                if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) {
                        r = -EINVAL;
@@ -7328,7 +7385,9 @@ static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
 static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
                              u32 pmc)
 {
-       return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc);
+       if (kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc))
+               return 0;
+       return -EINVAL;
 }
 
 static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
@@ -8789,7 +8848,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 
        trace_kvm_hypercall(nr, a0, a1, a2, a3);
 
-       op_64_bit = is_64_bit_mode(vcpu);
+       op_64_bit = is_64_bit_hypercall(vcpu);
        if (!op_64_bit) {
                nr &= 0xFFFFFFFF;
                a0 &= 0xFFFFFFFF;
@@ -9488,12 +9547,16 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
        if (!kvm_apic_hw_enabled(vcpu->arch.apic))
                return;
 
-       if (to_hv_vcpu(vcpu))
+       if (to_hv_vcpu(vcpu)) {
                bitmap_or((ulong *)eoi_exit_bitmap,
                          vcpu->arch.ioapic_handled_vectors,
                          to_hv_synic(vcpu)->vec_bitmap, 256);
+               static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
+               return;
+       }
 
-       static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
+       static_call(kvm_x86_load_eoi_exitmap)(
+               vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
 }
 
 void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
@@ -9552,7 +9615,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
        }
 
        if (kvm_request_pending(vcpu)) {
-               if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) {
+               if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
                        r = -EIO;
                        goto out;
                }
@@ -10564,6 +10627,24 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        return ret;
 }
 
+static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm)
+{
+       bool inhibit = false;
+       struct kvm_vcpu *vcpu;
+       int i;
+
+       down_write(&kvm->arch.apicv_update_lock);
+
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ) {
+                       inhibit = true;
+                       break;
+               }
+       }
+       __kvm_request_apicv_update(kvm, !inhibit, APICV_INHIBIT_REASON_BLOCKIRQ);
+       up_write(&kvm->arch.apicv_update_lock);
+}
+
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
                                        struct kvm_guest_debug *dbg)
 {
@@ -10616,6 +10697,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 
        static_call(kvm_x86_update_exception_bitmap)(vcpu);
 
+       kvm_arch_vcpu_guestdbg_update_apicv_inhibit(vcpu->kvm);
+
        r = 0;
 
 out:
@@ -10859,11 +10942,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
-       struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
        int idx;
 
-       kvm_release_pfn(cache->pfn, cache->dirty, cache);
-
        kvmclock_reset(vcpu);
 
        static_call(kvm_x86_vcpu_free)(vcpu);
@@ -12275,7 +12355,8 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
                return kvm_skip_emulated_instruction(vcpu);
 
        default:
-               BUG(); /* We have already checked above that type <= 3 */
+               kvm_inject_gp(vcpu, 0);
+               return 1;
        }
 }
 EXPORT_SYMBOL_GPL(kvm_handle_invpcid);
index ea264c4..997669a 100644 (file)
@@ -153,12 +153,24 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu)
 {
        int cs_db, cs_l;
 
+       WARN_ON_ONCE(vcpu->arch.guest_state_protected);
+
        if (!is_long_mode(vcpu))
                return false;
        static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
        return cs_l;
 }
 
+static inline bool is_64_bit_hypercall(struct kvm_vcpu *vcpu)
+{
+       /*
+        * If running with protected guest state, the CS register is not
+        * accessible. The hypercall register values will have had to been
+        * provided in 64-bit mode, so assume the guest is in 64-bit.
+        */
+       return vcpu->arch.guest_state_protected || is_64_bit_mode(vcpu);
+}
+
 static inline bool x86_exception_has_error_code(unsigned int vector)
 {
        static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) |
index 8f62bae..dff2bdf 100644 (file)
@@ -127,9 +127,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
        state_entry_time = vx->runstate_entry_time;
        state_entry_time |= XEN_RUNSTATE_UPDATE;
 
-       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) !=
+       BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) !=
                     sizeof(state_entry_time));
-       BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) !=
+       BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
                     sizeof(state_entry_time));
 
        if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@ -144,9 +144,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
         */
        BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
                     offsetof(struct compat_vcpu_runstate_info, state));
-       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+       BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) !=
                     sizeof(vx->current_runstate));
-       BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) !=
+       BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
                     sizeof(vx->current_runstate));
 
        if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@ -163,9 +163,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
                     offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
        BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
                     offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
-       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
-                    sizeof(((struct compat_vcpu_runstate_info *)0)->time));
-       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+       BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
+                    sizeof_field(struct compat_vcpu_runstate_info, time));
+       BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
                     sizeof(vx->runstate_times));
 
        if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@ -205,9 +205,9 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
        BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) !=
                     offsetof(struct compat_vcpu_info, evtchn_upcall_pending));
        BUILD_BUG_ON(sizeof(rc) !=
-                    sizeof(((struct vcpu_info *)0)->evtchn_upcall_pending));
+                    sizeof_field(struct vcpu_info, evtchn_upcall_pending));
        BUILD_BUG_ON(sizeof(rc) !=
-                    sizeof(((struct compat_vcpu_info *)0)->evtchn_upcall_pending));
+                    sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
 
        /*
         * For efficiency, this mirrors the checks for using the valid
@@ -299,7 +299,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
                break;
 
        case KVM_XEN_ATTR_TYPE_SHARED_INFO:
-               data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_gfn);
+               data->u.shared_info.gfn = kvm->arch.xen.shinfo_gfn;
                r = 0;
                break;
 
@@ -698,7 +698,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
            kvm_hv_hypercall_enabled(vcpu))
                return kvm_hv_hypercall(vcpu);
 
-       longmode = is_64_bit_mode(vcpu);
+       longmode = is_64_bit_hypercall(vcpu);
        if (!longmode) {
                params[0] = (u32)kvm_rbx_read(vcpu);
                params[1] = (u32)kvm_rcx_read(vcpu);
index 23d54b8..3548730 100644 (file)
@@ -229,28 +229,75 @@ void __init sev_setup_arch(void)
        swiotlb_adjust_size(size);
 }
 
-static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
+static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
 {
-       pgprot_t old_prot, new_prot;
-       unsigned long pfn, pa, size;
-       pte_t new_pte;
+       unsigned long pfn = 0;
+       pgprot_t prot;
 
        switch (level) {
        case PG_LEVEL_4K:
                pfn = pte_pfn(*kpte);
-               old_prot = pte_pgprot(*kpte);
+               prot = pte_pgprot(*kpte);
                break;
        case PG_LEVEL_2M:
                pfn = pmd_pfn(*(pmd_t *)kpte);
-               old_prot = pmd_pgprot(*(pmd_t *)kpte);
+               prot = pmd_pgprot(*(pmd_t *)kpte);
                break;
        case PG_LEVEL_1G:
                pfn = pud_pfn(*(pud_t *)kpte);
-               old_prot = pud_pgprot(*(pud_t *)kpte);
+               prot = pud_pgprot(*(pud_t *)kpte);
                break;
        default:
-               return;
+               WARN_ONCE(1, "Invalid level for kpte\n");
+               return 0;
+       }
+
+       if (ret_prot)
+               *ret_prot = prot;
+
+       return pfn;
+}
+
+void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
+{
+#ifdef CONFIG_PARAVIRT
+       unsigned long sz = npages << PAGE_SHIFT;
+       unsigned long vaddr_end = vaddr + sz;
+
+       while (vaddr < vaddr_end) {
+               int psize, pmask, level;
+               unsigned long pfn;
+               pte_t *kpte;
+
+               kpte = lookup_address(vaddr, &level);
+               if (!kpte || pte_none(*kpte)) {
+                       WARN_ONCE(1, "kpte lookup for vaddr\n");
+                       return;
+               }
+
+               pfn = pg_level_to_pfn(level, kpte, NULL);
+               if (!pfn)
+                       continue;
+
+               psize = page_level_size(level);
+               pmask = page_level_mask(level);
+
+               notify_page_enc_status_changed(pfn, psize >> PAGE_SHIFT, enc);
+
+               vaddr = (vaddr & pmask) + psize;
        }
+#endif
+}
+
+static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
+{
+       pgprot_t old_prot, new_prot;
+       unsigned long pfn, pa, size;
+       pte_t new_pte;
+
+       pfn = pg_level_to_pfn(level, kpte, &old_prot);
+       if (!pfn)
+               return;
 
        new_prot = old_prot;
        if (enc)
@@ -286,12 +333,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
 static int __init early_set_memory_enc_dec(unsigned long vaddr,
                                           unsigned long size, bool enc)
 {
-       unsigned long vaddr_end, vaddr_next;
+       unsigned long vaddr_end, vaddr_next, start;
        unsigned long psize, pmask;
        int split_page_size_mask;
        int level, ret;
        pte_t *kpte;
 
+       start = vaddr;
        vaddr_next = vaddr;
        vaddr_end = vaddr + size;
 
@@ -346,6 +394,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
 
        ret = 0;
 
+       notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
 out:
        __flush_tlb_all();
        return ret;
@@ -361,6 +410,11 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
        return early_set_memory_enc_dec(vaddr, size, true);
 }
 
+void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
+{
+       notify_range_enc_status_changed(vaddr, npages, enc);
+}
+
 /* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
 bool force_dma_unencrypted(struct device *dev)
 {
index 934dc5b..b407211 100644 (file)
@@ -2023,6 +2023,12 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
         */
        cpa_flush(&cpa, 0);
 
+       /*
+        * Notify hypervisor that a given memory range is mapped encrypted
+        * or decrypted.
+        */
+       notify_range_enc_status_changed(addr, numpages, enc);
+
        return ret;
 }
 
index 9e55bcb..6a8f3b5 100644 (file)
@@ -225,7 +225,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void)
 static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
 {
        unsigned cpu;
-       unsigned int i;
 
        if (skip_ioapic_setup) {
                char *m = (max_cpus == 0) ?
@@ -238,16 +237,9 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
        }
        xen_init_lock_cpu(0);
 
-       smp_store_boot_cpu_info();
-       cpu_data(0).x86_max_cores = 1;
+       smp_prepare_cpus_common();
 
-       for_each_possible_cpu(i) {
-               zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
-               zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
-               zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
-               zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
-       }
-       set_cpu_sibling_map(0);
+       cpu_data(0).x86_max_cores = 1;
 
        speculative_store_bypass_ht_init();
 
index b043de2..9ee32f8 100644 (file)
@@ -809,10 +809,8 @@ noinline_for_stack bool submit_bio_checks(struct bio *bio)
        if (unlikely(!current->io_context))
                create_task_io_context(current, GFP_ATOMIC, q->node);
 
-       if (blk_throtl_bio(bio)) {
-               blkcg_bio_issue_init(bio);
+       if (blk_throtl_bio(bio))
                return false;
-       }
 
        blk_cgroup_bio_start(bio);
        blkcg_bio_issue_init(bio);
index c246c42..b925f3d 100644 (file)
@@ -104,8 +104,8 @@ static struct kobj_type blk_ia_ranges_ktype = {
 };
 
 /**
- * disk_register_ia_ranges - register with sysfs a set of independent
- *                         access ranges
+ * disk_register_independent_access_ranges - register with sysfs a set of
+ *             independent access ranges
  * @disk:      Target disk
  * @new_iars:  New set of independent access ranges
  *
index 4be652f..ba21449 100644 (file)
@@ -370,9 +370,6 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
        bool ret = false;
        enum hctx_type type;
 
-       if (bio_queue_enter(bio))
-               return false;
-
        if (e && e->type->ops.bio_merge) {
                ret = e->type->ops.bio_merge(q, bio, nr_segs);
                goto out_put;
@@ -397,7 +394,6 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
 
        spin_unlock(&ctx->lock);
 out_put:
-       blk_queue_exit(q);
        return ret;
 }
 
index 629cf42..3ab34c4 100644 (file)
@@ -2495,8 +2495,9 @@ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
        return BLK_MAX_REQUEST_COUNT;
 }
 
-static bool blk_attempt_bio_merge(struct request_queue *q, struct bio *bio,
-                                 unsigned int nr_segs, bool *same_queue_rq)
+static bool blk_mq_attempt_bio_merge(struct request_queue *q,
+                                    struct bio *bio, unsigned int nr_segs,
+                                    bool *same_queue_rq)
 {
        if (!blk_queue_nomerges(q) && bio_mergeable(bio)) {
                if (blk_attempt_plug_merge(q, bio, nr_segs, same_queue_rq))
@@ -2520,12 +2521,8 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
        };
        struct request *rq;
 
-       if (unlikely(bio_queue_enter(bio)))
+       if (blk_mq_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
                return NULL;
-       if (unlikely(!submit_bio_checks(bio)))
-               goto put_exit;
-       if (blk_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
-               goto put_exit;
 
        rq_qos_throttle(q, bio);
 
@@ -2542,26 +2539,44 @@ static struct request *blk_mq_get_new_requests(struct request_queue *q,
        rq_qos_cleanup(q, bio);
        if (bio->bi_opf & REQ_NOWAIT)
                bio_wouldblock_error(bio);
-put_exit:
-       blk_queue_exit(q);
+
        return NULL;
 }
 
+static inline bool blk_mq_can_use_cached_rq(struct request *rq,
+               struct bio *bio)
+{
+       if (blk_mq_get_hctx_type(bio->bi_opf) != rq->mq_hctx->type)
+               return false;
+
+       if (op_is_flush(rq->cmd_flags) != op_is_flush(bio->bi_opf))
+               return false;
+
+       return true;
+}
+
 static inline struct request *blk_mq_get_request(struct request_queue *q,
                                                 struct blk_plug *plug,
                                                 struct bio *bio,
                                                 unsigned int nsegs,
                                                 bool *same_queue_rq)
 {
+       struct request *rq;
+       bool checked = false;
+
        if (plug) {
-               struct request *rq;
 
                rq = rq_list_peek(&plug->cached_rq);
                if (rq && rq->q == q) {
                        if (unlikely(!submit_bio_checks(bio)))
                                return NULL;
-                       if (blk_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
+                       if (blk_mq_attempt_bio_merge(q, bio, nsegs,
+                                               same_queue_rq))
                                return NULL;
+                       checked = true;
+                       if (!blk_mq_can_use_cached_rq(rq, bio))
+                               goto fallback;
+                       rq->cmd_flags = bio->bi_opf;
                        plug->cached_rq = rq_list_next(rq);
                        INIT_LIST_HEAD(&rq->queuelist);
                        rq_qos_throttle(q, bio);
@@ -2569,7 +2584,15 @@ static inline struct request *blk_mq_get_request(struct request_queue *q,
                }
        }
 
-       return blk_mq_get_new_requests(q, plug, bio, nsegs, same_queue_rq);
+fallback:
+       if (unlikely(bio_queue_enter(bio)))
+               return NULL;
+       if (!checked && !submit_bio_checks(bio))
+               return NULL;
+       rq = blk_mq_get_new_requests(q, plug, bio, nsegs, same_queue_rq);
+       if (!rq)
+               blk_queue_exit(q);
+       return rq;
 }
 
 /**
index cb0b548..8acfa65 100644 (file)
@@ -89,15 +89,7 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue_type(struct request_queue *
        return q->queue_hw_ctx[q->tag_set->map[type].mq_map[cpu]];
 }
 
-/*
- * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue
- * @q: request queue
- * @flags: request command flags
- * @ctx: software queue cpu ctx
- */
-static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
-                                                    unsigned int flags,
-                                                    struct blk_mq_ctx *ctx)
+static inline enum hctx_type blk_mq_get_hctx_type(unsigned int flags)
 {
        enum hctx_type type = HCTX_TYPE_DEFAULT;
 
@@ -108,8 +100,20 @@ static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
                type = HCTX_TYPE_POLL;
        else if ((flags & REQ_OP_MASK) == REQ_OP_READ)
                type = HCTX_TYPE_READ;
-       
-       return ctx->hctxs[type];
+       return type;
+}
+
+/*
+ * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue
+ * @q: request queue
+ * @flags: request command flags
+ * @ctx: software queue cpu ctx
+ */
+static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q,
+                                                    unsigned int flags,
+                                                    struct blk_mq_ctx *ctx)
+{
+       return ctx->hctxs[blk_mq_get_hctx_type(flags)];
 }
 
 /*
@@ -149,7 +153,7 @@ struct blk_mq_alloc_data {
        blk_mq_req_flags_t flags;
        unsigned int shallow_depth;
        unsigned int cmd_flags;
-       unsigned int rq_flags;
+       req_flags_t rq_flags;
 
        /* allocate multiple requests/tags in one go */
        unsigned int nr_tags;
index 1d0c76c..774ecc5 100644 (file)
@@ -429,9 +429,10 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
                op = REQ_OP_ZONE_RESET;
 
                /* Invalidate the page cache, including dirty pages. */
+               filemap_invalidate_lock(bdev->bd_inode->i_mapping);
                ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
                if (ret)
-                       return ret;
+                       goto fail;
                break;
        case BLKOPENZONE:
                op = REQ_OP_ZONE_OPEN;
@@ -449,15 +450,9 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
        ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
                               GFP_KERNEL);
 
-       /*
-        * Invalidate the page cache again for zone reset: writes can only be
-        * direct for zoned devices so concurrent writes would not add any page
-        * to the page cache after/during reset. The page cache may be filled
-        * again due to concurrent reads though and dropping the pages for
-        * these is fine.
-        */
-       if (!ret && cmd == BLKRESETZONE)
-               ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
+fail:
+       if (cmd == BLKRESETZONE)
+               filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
 
        return ret;
 }
index ca2fbab..c5392cc 100644 (file)
@@ -394,8 +394,8 @@ static void disk_scan_partitions(struct gendisk *disk)
  * This function registers the partitioning information in @disk
  * with the kernel.
  */
-int device_add_disk(struct device *parent, struct gendisk *disk,
-                    const struct attribute_group **groups)
+int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
+                                const struct attribute_group **groups)
 
 {
        struct device *ddev = disk_to_dev(disk);
@@ -544,7 +544,7 @@ out_disk_release_events:
 out_free_ext_minor:
        if (disk->major == BLOCK_EXT_MAJOR)
                blk_free_ext_minor(disk->first_minor);
-       return WARN_ON_ONCE(ret); /* keep until all callers handle errors */
+       return ret;
 }
 EXPORT_SYMBOL(device_add_disk);
 
index d6af0ac..0a1d10a 100644 (file)
@@ -113,6 +113,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
        uint64_t range[2];
        uint64_t start, len;
        struct request_queue *q = bdev_get_queue(bdev);
+       struct inode *inode = bdev->bd_inode;
        int err;
 
        if (!(mode & FMODE_WRITE))
@@ -135,12 +136,17 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
        if (start + len > bdev_nr_bytes(bdev))
                return -EINVAL;
 
+       filemap_invalidate_lock(inode->i_mapping);
        err = truncate_bdev_range(bdev, mode, start, start + len - 1);
        if (err)
-               return err;
+               goto fail;
 
-       return blkdev_issue_discard(bdev, start >> 9, len >> 9,
-                                   GFP_KERNEL, flags);
+       err = blkdev_issue_discard(bdev, start >> 9, len >> 9,
+                                  GFP_KERNEL, flags);
+
+fail:
+       filemap_invalidate_unlock(inode->i_mapping);
+       return err;
 }
 
 static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
@@ -148,6 +154,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
 {
        uint64_t range[2];
        uint64_t start, end, len;
+       struct inode *inode = bdev->bd_inode;
        int err;
 
        if (!(mode & FMODE_WRITE))
@@ -170,12 +177,17 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode,
                return -EINVAL;
 
        /* Invalidate the page cache, including dirty pages */
+       filemap_invalidate_lock(inode->i_mapping);
        err = truncate_bdev_range(bdev, mode, start, end);
        if (err)
-               return err;
+               goto fail;
+
+       err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
+                                  BLKDEV_ZERO_NOUNMAP);
 
-       return blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
-                       BLKDEV_ZERO_NOUNMAP);
+fail:
+       filemap_invalidate_unlock(inode->i_mapping);
+       return err;
 }
 
 static int put_ushort(unsigned short __user *argp, unsigned short val)
index 1a3309f..154a969 100644 (file)
 #define ZSTD_DEF_LEVEL 3
 
 struct zstd_ctx {
-       ZSTD_CCtx *cctx;
-       ZSTD_DCtx *dctx;
+       zstd_cctx *cctx;
+       zstd_dctx *dctx;
        void *cwksp;
        void *dwksp;
 };
 
-static ZSTD_parameters zstd_params(void)
+static zstd_parameters zstd_params(void)
 {
-       return ZSTD_getParams(ZSTD_DEF_LEVEL, 0, 0);
+       return zstd_get_params(ZSTD_DEF_LEVEL, 0);
 }
 
 static int zstd_comp_init(struct zstd_ctx *ctx)
 {
        int ret = 0;
-       const ZSTD_parameters params = zstd_params();
-       const size_t wksp_size = ZSTD_CCtxWorkspaceBound(params.cParams);
+       const zstd_parameters params = zstd_params();
+       const size_t wksp_size = zstd_cctx_workspace_bound(&params.cParams);
 
        ctx->cwksp = vzalloc(wksp_size);
        if (!ctx->cwksp) {
@@ -41,7 +41,7 @@ static int zstd_comp_init(struct zstd_ctx *ctx)
                goto out;
        }
 
-       ctx->cctx = ZSTD_initCCtx(ctx->cwksp, wksp_size);
+       ctx->cctx = zstd_init_cctx(ctx->cwksp, wksp_size);
        if (!ctx->cctx) {
                ret = -EINVAL;
                goto out_free;
@@ -56,7 +56,7 @@ out_free:
 static int zstd_decomp_init(struct zstd_ctx *ctx)
 {
        int ret = 0;
-       const size_t wksp_size = ZSTD_DCtxWorkspaceBound();
+       const size_t wksp_size = zstd_dctx_workspace_bound();
 
        ctx->dwksp = vzalloc(wksp_size);
        if (!ctx->dwksp) {
@@ -64,7 +64,7 @@ static int zstd_decomp_init(struct zstd_ctx *ctx)
                goto out;
        }
 
-       ctx->dctx = ZSTD_initDCtx(ctx->dwksp, wksp_size);
+       ctx->dctx = zstd_init_dctx(ctx->dwksp, wksp_size);
        if (!ctx->dctx) {
                ret = -EINVAL;
                goto out_free;
@@ -152,10 +152,10 @@ static int __zstd_compress(const u8 *src, unsigned int slen,
 {
        size_t out_len;
        struct zstd_ctx *zctx = ctx;
-       const ZSTD_parameters params = zstd_params();
+       const zstd_parameters params = zstd_params();
 
-       out_len = ZSTD_compressCCtx(zctx->cctx, dst, *dlen, src, slen, params);
-       if (ZSTD_isError(out_len))
+       out_len = zstd_compress_cctx(zctx->cctx, dst, *dlen, src, slen, &params);
+       if (zstd_is_error(out_len))
                return -EINVAL;
        *dlen = out_len;
        return 0;
@@ -182,8 +182,8 @@ static int __zstd_decompress(const u8 *src, unsigned int slen,
        size_t out_len;
        struct zstd_ctx *zctx = ctx;
 
-       out_len = ZSTD_decompressDCtx(zctx->dctx, dst, *dlen, src, slen);
-       if (ZSTD_isError(out_len))
+       out_len = zstd_decompress_dctx(zctx->dctx, dst, *dlen, src, slen);
+       if (zstd_is_error(out_len))
                return -EINVAL;
        *dlen = out_len;
        return 0;
index 981e72a..ff16a36 100644 (file)
@@ -677,6 +677,8 @@ void remove_cpu_topology(unsigned int cpu)
                cpumask_clear_cpu(cpu, topology_core_cpumask(sibling));
        for_each_cpu(sibling, topology_sibling_cpumask(cpu))
                cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling));
+       for_each_cpu(sibling, topology_cluster_cpumask(cpu))
+               cpumask_clear_cpu(cpu, topology_cluster_cpumask(sibling));
        for_each_cpu(sibling, topology_llc_cpumask(cpu))
                cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling));
 
index 4c2f7d6..183d5cc 100644 (file)
@@ -485,7 +485,7 @@ static int __init brcmstb_gisb_arb_probe(struct platform_device *pdev)
        list_add_tail(&gdev->next, &brcmstb_gisb_arb_device_list);
 
 #ifdef CONFIG_MIPS
-       board_be_handler = brcmstb_bus_error_handler;
+       mips_set_be_handler(brcmstb_bus_error_handler);
 #endif
 
        if (list_is_singular(&brcmstb_gisb_arb_device_list)) {
index f15e262..64f316c 100644 (file)
@@ -10,7 +10,6 @@
 
 #include <linux/clk-provider.h>
 #include <linux/regmap.h>
-#include <linux/slab.h>
 
 #include "owl-factor.h"
 
index bc3be5f..24dab23 100644 (file)
@@ -51,6 +51,8 @@ static DEFINE_SPINLOCK(aspeed_g6_clk_lock);
 static struct clk_hw_onecell_data *aspeed_g6_clk_data;
 
 static void __iomem *scu_g6_base;
+/* AST2600 revision: A0, A1, A2, etc */
+static u8 soc_rev;
 
 /*
  * Clocks marked with CLK_IS_CRITICAL:
@@ -191,9 +193,8 @@ static struct clk_hw *ast2600_calc_pll(const char *name, u32 val)
 static struct clk_hw *ast2600_calc_apll(const char *name, u32 val)
 {
        unsigned int mult, div;
-       u32 chip_id = readl(scu_g6_base + ASPEED_G6_SILICON_REV);
 
-       if (((chip_id & CHIP_REVISION_ID) >> 16) >= 2) {
+       if (soc_rev >= 2) {
                if (val & BIT(24)) {
                        /* Pass through mode */
                        mult = div = 1;
@@ -707,7 +708,7 @@ static const u32 ast2600_a1_axi_ahb200_tbl[] = {
 static void __init aspeed_g6_cc(struct regmap *map)
 {
        struct clk_hw *hw;
-       u32 val, div, divbits, chip_id, axi_div, ahb_div;
+       u32 val, div, divbits, axi_div, ahb_div;
 
        clk_hw_register_fixed_rate(NULL, "clkin", NULL, 0, 25000000);
 
@@ -738,8 +739,7 @@ static void __init aspeed_g6_cc(struct regmap *map)
                axi_div = 2;
 
        divbits = (val >> 11) & 0x3;
-       regmap_read(map, ASPEED_G6_SILICON_REV, &chip_id);
-       if (chip_id & BIT(16)) {
+       if (soc_rev >= 1) {
                if (!divbits) {
                        ahb_div = ast2600_a1_axi_ahb200_tbl[(val >> 8) & 0x3];
                        if (val & BIT(16))
@@ -784,6 +784,8 @@ static void __init aspeed_g6_cc_init(struct device_node *np)
        if (!scu_g6_base)
                return;
 
+       soc_rev = (readl(scu_g6_base + ASPEED_G6_SILICON_REV) & CHIP_REVISION_ID) >> 16;
+
        aspeed_g6_clk_data = kzalloc(struct_size(aspeed_g6_clk_data, hws,
                                      ASPEED_G6_NUM_CLKS), GFP_KERNEL);
        if (!aspeed_g6_clk_data)
index c04ae0e..b9c5f90 100644 (file)
@@ -97,6 +97,7 @@ static int clk_composite_determine_rate(struct clk_hw *hw,
                                return ret;
 
                        req->rate = tmp_req.rate;
+                       req->best_parent_hw = tmp_req.best_parent_hw;
                        req->best_parent_rate = tmp_req.best_parent_rate;
 
                        return 0;
index 57e4597..93fa8c9 100644 (file)
@@ -1,15 +1,15 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * clk-si5351.c: Silicon Laboratories Si5351A/B/C I2C Clock Generator
+ * clk-si5351.c: Skyworks / Silicon Labs Si5351A/B/C I2C Clock Generator
  *
  * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
  * Rabeeh Khoury <rabeeh@solid-run.com>
  *
  * References:
  * [1] "Si5351A/B/C Data Sheet"
- *     https://www.silabs.com/Support%20Documents/TechnicalDocs/Si5351.pdf
- * [2] "Manually Generating an Si5351 Register Map"
- *     https://www.silabs.com/Support%20Documents/TechnicalDocs/AN619.pdf
+ *     https://www.skyworksinc.com/-/media/Skyworks/SL/documents/public/data-sheets/Si5351-B.pdf
+ * [2] "AN619: Manually Generating an Si5351 Register Map"
+ *     https://www.skyworksinc.com/-/media/Skyworks/SL/documents/public/application-notes/AN619.pdf
  */
 
 #include <linux/module.h>
index 73dc8ef..e9e2bfd 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 /*
- * clk-si5351.h: Silicon Laboratories Si5351A/B/C I2C Clock Generator
+ * clk-si5351.h: Skyworks / Silicon Labs Si5351A/B/C I2C Clock Generator
  *
  * Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
  * Rabeeh Khoury <rabeeh@solid-run.com>
index c6d3b1a..e7be3e5 100644 (file)
@@ -905,7 +905,7 @@ output_error:
 
 static const struct of_device_id clk_vc5_of_match[];
 
-static int vc5_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int vc5_probe(struct i2c_client *client)
 {
        unsigned int oe, sd, src_mask = 0, src_val = 0;
        struct vc5_driver_data *vc5;
@@ -1244,7 +1244,7 @@ static struct i2c_driver vc5_driver = {
                .pm     = &vc5_pm_ops,
                .of_match_table = clk_vc5_of_match,
        },
-       .probe          = vc5_probe,
+       .probe_new      = vc5_probe,
        .remove         = vc5_remove,
        .id_table       = vc5_id,
 };
index 8199499..7d220a0 100644 (file)
@@ -391,11 +391,11 @@ struct clk_hw *__imx8m_clk_hw_composite(const char *name,
 
 #define imx8m_clk_hw_composite(name, parent_names, reg) \
        _imx8m_clk_hw_composite(name, parent_names, reg, \
-                       IMX_COMPOSITE_CORE, IMX_COMPOSITE_CLK_FLAGS_DEFAULT)
+                       0, IMX_COMPOSITE_CLK_FLAGS_DEFAULT)
 
 #define imx8m_clk_hw_composite_critical(name, parent_names, reg) \
        _imx8m_clk_hw_composite(name, parent_names, reg, \
-                       IMX_COMPOSITE_CORE, IMX_COMPOSITE_CLK_FLAGS_CRITICAL)
+                       0, IMX_COMPOSITE_CLK_FLAGS_CRITICAL)
 
 #define imx8m_clk_hw_composite_bus(name, parent_names, reg)    \
        _imx8m_clk_hw_composite(name, parent_names, reg, \
index 266c759..af31633 100644 (file)
@@ -453,15 +453,15 @@ ingenic_clk_calc_div(struct clk_hw *hw,
        }
 
        /* Impose hardware constraints */
-       div = min_t(unsigned, div, 1 << clk_info->div.bits);
-       div = max_t(unsigned, div, 1);
+       div = clamp_t(unsigned int, div, clk_info->div.div,
+                     clk_info->div.div << clk_info->div.bits);
 
        /*
         * If the divider value itself must be divided before being written to
         * the divider register, we must ensure we don't have any bits set that
         * would be lost as a result of doing so.
         */
-       div /= clk_info->div.div;
+       div = DIV_ROUND_UP(div, clk_info->div.div);
        div *= clk_info->div.div;
 
        return div;
index 5154b0c..744d136 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/delay.h>
 #include <linux/of.h>
 
-#include <dt-bindings/clock/jz4725b-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4725b-cgu.h>
 
 #include "cgu.h"
 #include "pm.h"
index cd878f0..43ffb62 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/io.h>
 #include <linux/of.h>
 
-#include <dt-bindings/clock/jz4740-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4740-cgu.h>
 
 #include "cgu.h"
 #include "pm.h"
index 1448379..080d492 100644 (file)
@@ -12,7 +12,7 @@
 
 #include <linux/clk.h>
 
-#include <dt-bindings/clock/jz4760-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4760-cgu.h>
 
 #include "cgu.h"
 #include "pm.h"
index 2321742..8c6c120 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/io.h>
 #include <linux/of.h>
 
-#include <dt-bindings/clock/jz4770-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4770-cgu.h>
 
 #include "cgu.h"
 #include "pm.h"
index 0268d23..e357c22 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/iopoll.h>
 #include <linux/of.h>
 
-#include <dt-bindings/clock/jz4780-cgu.h>
+#include <dt-bindings/clock/ingenic,jz4780-cgu.h>
 
 #include "cgu.h"
 #include "pm.h"
index 9aa20b5..3c4d5a7 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/io.h>
 #include <linux/of.h>
 
-#include <dt-bindings/clock/x1000-cgu.h>
+#include <dt-bindings/clock/ingenic,x1000-cgu.h>
 
 #include "cgu.h"
 #include "pm.h"
index 950aee2..e01ec2d 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/io.h>
 #include <linux/of.h>
 
-#include <dt-bindings/clock/x1830-cgu.h>
+#include <dt-bindings/clock/ingenic,x1830-cgu.h>
 
 #include "cgu.h"
 #include "pm.h"
index 0e2ac0a..4ab312e 100644 (file)
@@ -10,8 +10,6 @@
 #include <linux/clk-provider.h>
 #include <linux/platform_device.h>
 
-#include <dt-bindings/clock/mt8195-clk.h>
-
 static const struct mtk_gate_regs imp_iic_wrap_cg_regs = {
        .set_ofs = 0xe08,
        .clr_ofs = 0xe04,
index 3c3a7ff..9b1674b 100644 (file)
@@ -2937,20 +2937,6 @@ static struct clk_branch gcc_smmu_aggre0_ahb_clk = {
        },
 };
 
-static struct clk_branch gcc_aggre1_pnoc_ahb_clk = {
-       .halt_reg = 0x82014,
-       .clkr = {
-               .enable_reg = 0x82014,
-               .enable_mask = BIT(0),
-               .hw.init = &(struct clk_init_data){
-                       .name = "gcc_aggre1_pnoc_ahb_clk",
-                       .parent_names = (const char *[]){ "periph_noc_clk_src" },
-                       .num_parents = 1,
-                       .ops = &clk_branch2_ops,
-               },
-       },
-};
-
 static struct clk_branch gcc_aggre2_ufs_axi_clk = {
        .halt_reg = 0x83014,
        .clkr = {
@@ -3474,7 +3460,6 @@ static struct clk_regmap *gcc_msm8996_clocks[] = {
        [GCC_AGGRE0_CNOC_AHB_CLK] = &gcc_aggre0_cnoc_ahb_clk.clkr,
        [GCC_SMMU_AGGRE0_AXI_CLK] = &gcc_smmu_aggre0_axi_clk.clkr,
        [GCC_SMMU_AGGRE0_AHB_CLK] = &gcc_smmu_aggre0_ahb_clk.clkr,
-       [GCC_AGGRE1_PNOC_AHB_CLK] = &gcc_aggre1_pnoc_ahb_clk.clkr,
        [GCC_AGGRE2_UFS_AXI_CLK] = &gcc_aggre2_ufs_axi_clk.clkr,
        [GCC_AGGRE2_USB3_AXI_CLK] = &gcc_aggre2_usb3_axi_clk.clkr,
        [GCC_QSPI_AHB_CLK] = &gcc_qspi_ahb_clk.clkr,
index 2dfd6a3..3067bdb 100644 (file)
@@ -80,14 +80,14 @@ config CLK_RK3368
          Build the driver for RK3368 Clock Driver.
 
 config CLK_RK3399
-       tristate "Rockchip RK3399 clock controller support"
+       bool "Rockchip RK3399 clock controller support"
        depends on ARM64 || COMPILE_TEST
        default y
        help
          Build the driver for RK3399 Clock Driver.
 
 config CLK_RK3568
-       tristate "Rockchip RK3568 clock controller support"
+       bool "Rockchip RK3568 clock controller support"
        depends on ARM64 || COMPILE_TEST
        default y
        help
index 7924598..306910a 100644 (file)
@@ -1630,7 +1630,6 @@ static const struct of_device_id clk_rk3399_match_table[] = {
        },
        { }
 };
-MODULE_DEVICE_TABLE(of, clk_rk3399_match_table);
 
 static int __init clk_rk3399_probe(struct platform_device *pdev)
 {
@@ -1656,7 +1655,4 @@ static struct platform_driver clk_rk3399_driver = {
                .suppress_bind_attrs = true,
        },
 };
-module_platform_driver_probe(clk_rk3399_driver, clk_rk3399_probe);
-
-MODULE_DESCRIPTION("Rockchip RK3399 Clock Driver");
-MODULE_LICENSE("GPL");
+builtin_platform_driver_probe(clk_rk3399_driver, clk_rk3399_probe);
index 939e707..69a9e80 100644 (file)
@@ -1693,7 +1693,6 @@ static const struct of_device_id clk_rk3568_match_table[] = {
        },
        { }
 };
-MODULE_DEVICE_TABLE(of, clk_rk3568_match_table);
 
 static int __init clk_rk3568_probe(struct platform_device *pdev)
 {
@@ -1719,7 +1718,4 @@ static struct platform_driver clk_rk3568_driver = {
                .suppress_bind_attrs = true,
        },
 };
-module_platform_driver_probe(clk_rk3568_driver, clk_rk3568_probe);
-
-MODULE_DESCRIPTION("Rockchip RK3568 Clock Driver");
-MODULE_LICENSE("GPL");
+builtin_platform_driver_probe(clk_rk3568_driver, clk_rk3568_probe);
index 1238023..46c66fa 100644 (file)
@@ -132,6 +132,10 @@ static const struct of_device_id uniphier_clk_match[] = {
                .compatible = "socionext,uniphier-pxs3-clock",
                .data = uniphier_pxs3_sys_clk_data,
        },
+       {
+               .compatible = "socionext,uniphier-nx1-clock",
+               .data = uniphier_nx1_sys_clk_data,
+       },
        /* Media I/O clock, SD clock */
        {
                .compatible = "socionext,uniphier-ld4-mio-clock",
@@ -165,6 +169,10 @@ static const struct of_device_id uniphier_clk_match[] = {
                .compatible = "socionext,uniphier-pxs3-sd-clock",
                .data = uniphier_pro5_sd_clk_data,
        },
+       {
+               .compatible = "socionext,uniphier-nx1-sd-clock",
+               .data = uniphier_pro5_sd_clk_data,
+       },
        /* Peripheral clock */
        {
                .compatible = "socionext,uniphier-ld4-peri-clock",
@@ -198,6 +206,15 @@ static const struct of_device_id uniphier_clk_match[] = {
                .compatible = "socionext,uniphier-pxs3-peri-clock",
                .data = uniphier_pro4_peri_clk_data,
        },
+       {
+               .compatible = "socionext,uniphier-nx1-peri-clock",
+               .data = uniphier_pro4_peri_clk_data,
+       },
+       /* SoC-glue clock */
+       {
+               .compatible = "socionext,uniphier-pro4-sg-clock",
+               .data = uniphier_pro4_sg_clk_data,
+       },
        { /* sentinel */ }
 };
 
index 32b3017..0180470 100644 (file)
        UNIPHIER_CLK_FACTOR("sd-200m", -1, "spll", 1, 10),              \
        UNIPHIER_CLK_FACTOR("sd-133m", -1, "spll", 1, 15)
 
+#define UNIPHIER_NX1_SYS_CLK_SD                                                \
+       UNIPHIER_CLK_FACTOR("sd-200m", -1, "spll", 1, 4),               \
+       UNIPHIER_CLK_FACTOR("sd-133m", -1, "spll", 1, 6)
+
 #define UNIPHIER_LD4_SYS_CLK_NAND(idx)                                 \
        UNIPHIER_CLK_FACTOR("nand-50m", -1, "spll", 1, 32),             \
        UNIPHIER_CLK_GATE("nand", (idx), "nand-50m", 0x2104, 2)
@@ -288,6 +292,8 @@ const struct uniphier_clk_data uniphier_pxs3_sys_clk_data[] = {
        UNIPHIER_CLK_GATE("sata0", 28, NULL, 0x210c, 7),
        UNIPHIER_CLK_GATE("sata1", 29, NULL, 0x210c, 8),
        UNIPHIER_CLK_GATE("sata-phy", 30, NULL, 0x210c, 21),
+       UNIPHIER_LD11_SYS_CLK_AIO(40),
+       UNIPHIER_LD11_SYS_CLK_EXIV(42),
        /* CPU gears */
        UNIPHIER_CLK_DIV4("cpll", 2, 3, 4, 8),
        UNIPHIER_CLK_DIV4("spll", 2, 3, 4, 8),
@@ -300,3 +306,44 @@ const struct uniphier_clk_data uniphier_pxs3_sys_clk_data[] = {
                             "spll/4", "spll/8", "s2pll/4", "s2pll/8"),
        { /* sentinel */ }
 };
+
+const struct uniphier_clk_data uniphier_nx1_sys_clk_data[] = {
+       UNIPHIER_CLK_FACTOR("cpll", -1, "ref", 100, 1),         /* ARM: 2500 MHz */
+       UNIPHIER_CLK_FACTOR("spll", -1, "ref", 32, 1),          /* 800 MHz */
+       UNIPHIER_CLK_FACTOR("uart", 0, "spll", 1, 6),
+       UNIPHIER_CLK_FACTOR("i2c", 1, "spll", 1, 16),
+       UNIPHIER_NX1_SYS_CLK_SD,
+       UNIPHIER_CLK_GATE("emmc", 4, NULL, 0x2108, 8),
+       UNIPHIER_CLK_GATE("ether", 6, NULL, 0x210c, 0),
+       UNIPHIER_CLK_GATE("usb30-0", 12, NULL, 0x210c, 16),     /* =GIO */
+       UNIPHIER_CLK_GATE("usb30-1", 13, NULL, 0x210c, 20),     /* =GIO1P */
+       UNIPHIER_CLK_GATE("usb30-hsphy0", 16, NULL, 0x210c, 24),
+       UNIPHIER_CLK_GATE("usb30-ssphy0", 17, NULL, 0x210c, 25),
+       UNIPHIER_CLK_GATE("usb30-ssphy1", 18, NULL, 0x210c, 26),
+       UNIPHIER_CLK_GATE("pcie", 24, NULL, 0x210c, 8),
+       UNIPHIER_CLK_GATE("voc", 52, NULL, 0x2110, 0),
+       UNIPHIER_CLK_GATE("hdmitx", 58, NULL, 0x2110, 8),
+       /* CPU gears */
+       UNIPHIER_CLK_DIV5("cpll", 2, 4, 8, 16, 32),
+       UNIPHIER_CLK_CPUGEAR("cpu-ca53", 33, 0x8080, 0xf, 5,
+                            "cpll/2", "cpll/4", "cpll/8", "cpll/16",
+                            "cpll/32"),
+       { /* sentinel */ }
+};
+
+const struct uniphier_clk_data uniphier_pro4_sg_clk_data[] = {
+       UNIPHIER_CLK_DIV("gpll", 4),
+       {
+               .name = "sata-ref",
+               .type = UNIPHIER_CLK_TYPE_MUX,
+               .idx = 0,
+               .data.mux = {
+                       .parent_names = { "gpll/4", "ref", },
+                       .num_parents = 2,
+                       .reg = 0x1a28,
+                       .masks = { 0x1, 0x1, },
+                       .vals  = { 0x0, 0x1, },
+               },
+       },
+       { /* sentinel */ }
+};
index 9e30362..dea0c78 100644 (file)
@@ -119,6 +119,10 @@ struct uniphier_clk_data {
        UNIPHIER_CLK_DIV2(parent, div0, div1),                  \
        UNIPHIER_CLK_DIV2(parent, div2, div3)
 
+#define UNIPHIER_CLK_DIV5(parent, div0, div1, div2, div3, div4)        \
+       UNIPHIER_CLK_DIV4(parent, div0, div1, div2, div3),      \
+       UNIPHIER_CLK_DIV(parent, div4)
+
 struct clk_hw *uniphier_clk_register_cpugear(struct device *dev,
                                             struct regmap *regmap,
                                             const char *name,
@@ -146,9 +150,11 @@ extern const struct uniphier_clk_data uniphier_pxs2_sys_clk_data[];
 extern const struct uniphier_clk_data uniphier_ld11_sys_clk_data[];
 extern const struct uniphier_clk_data uniphier_ld20_sys_clk_data[];
 extern const struct uniphier_clk_data uniphier_pxs3_sys_clk_data[];
+extern const struct uniphier_clk_data uniphier_nx1_sys_clk_data[];
 extern const struct uniphier_clk_data uniphier_ld4_mio_clk_data[];
 extern const struct uniphier_clk_data uniphier_pro5_sd_clk_data[];
 extern const struct uniphier_clk_data uniphier_ld4_peri_clk_data[];
 extern const struct uniphier_clk_data uniphier_pro4_peri_clk_data[];
+extern const struct uniphier_clk_data uniphier_pro4_sg_clk_data[];
 
 #endif /* __CLK_UNIPHIER_H__ */
index 77fd0ec..d52f976 100644 (file)
@@ -484,7 +484,7 @@ static void __init of_syscon_icst_setup(struct device_node *np)
        struct device_node *parent;
        struct regmap *map;
        struct clk_icst_desc icst_desc;
-       const char *name = np->name;
+       const char *name;
        const char *parent_name;
        struct clk *regclk;
        enum icst_control_type ctype;
@@ -533,15 +533,17 @@ static void __init of_syscon_icst_setup(struct device_node *np)
                icst_desc.params = &icst525_apcp_cm_params;
                ctype = ICST_INTEGRATOR_CP_CM_MEM;
        } else {
-               pr_err("unknown ICST clock %s\n", name);
+               pr_err("unknown ICST clock %pOF\n", np);
                return;
        }
 
        /* Parent clock name is not the same as node parent */
        parent_name = of_clk_get_parent_name(np, 0);
+       name = kasprintf(GFP_KERNEL, "%pOFP", np);
 
        regclk = icst_clk_setup(NULL, &icst_desc, name, parent_name, map, ctype);
        if (IS_ERR(regclk)) {
+               kfree(name);
                pr_err("error setting up syscon ICST clock %s\n", name);
                return;
        }
index 7f11ea0..ca873a3 100644 (file)
@@ -480,7 +480,7 @@ module_param(pressure_report_delay, uint, (S_IRUGO | S_IWUSR));
 MODULE_PARM_DESC(pressure_report_delay, "Delay in secs in reporting pressure");
 static atomic_t trans_id = ATOMIC_INIT(0);
 
-static int dm_ring_size = 20 * 1024;
+static int dm_ring_size = VMBUS_RING_SIZE(16 * 1024);
 
 /*
  * Driver specific state.
index cb403c9..4aebd67 100644 (file)
@@ -78,7 +78,7 @@ static void csky_mpintc_handler(struct pt_regs *regs)
                readl_relaxed(reg_base + INTCL_RDYIR));
 }
 
-static void csky_mpintc_enable(struct irq_data *d)
+static void csky_mpintc_unmask(struct irq_data *d)
 {
        void __iomem *reg_base = this_cpu_read(intcl_reg);
 
@@ -87,7 +87,7 @@ static void csky_mpintc_enable(struct irq_data *d)
        writel_relaxed(d->hwirq, reg_base + INTCL_SENR);
 }
 
-static void csky_mpintc_disable(struct irq_data *d)
+static void csky_mpintc_mask(struct irq_data *d)
 {
        void __iomem *reg_base = this_cpu_read(intcl_reg);
 
@@ -164,8 +164,8 @@ static int csky_irq_set_affinity(struct irq_data *d,
 static struct irq_chip csky_irq_chip = {
        .name           = "C-SKY SMP Intc",
        .irq_eoi        = csky_mpintc_eoi,
-       .irq_enable     = csky_mpintc_enable,
-       .irq_disable    = csky_mpintc_disable,
+       .irq_unmask     = csky_mpintc_unmask,
+       .irq_mask       = csky_mpintc_mask,
        .irq_set_type   = csky_mpintc_set_type,
 #ifdef CONFIG_SMP
        .irq_set_affinity = csky_irq_set_affinity,
index cf74cfa..259065d 100644 (file)
@@ -163,7 +163,13 @@ static void plic_irq_eoi(struct irq_data *d)
 {
        struct plic_handler *handler = this_cpu_ptr(&plic_handlers);
 
-       writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+       if (irqd_irq_masked(d)) {
+               plic_irq_unmask(d);
+               writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+               plic_irq_mask(d);
+       } else {
+               writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM);
+       }
 }
 
 static struct irq_chip plic_chip = {
index 47a04c3..b732ee9 100644 (file)
@@ -3286,7 +3286,7 @@ static void __exit amt_fini(void)
 {
        rtnl_link_unregister(&amt_link_ops);
        unregister_netdevice_notifier(&amt_notifier_block);
-       cancel_delayed_work(&source_gc_wq);
+       cancel_delayed_work_sync(&source_gc_wq);
        __amt_source_gc_work();
        destroy_workqueue(amt_wq);
 }
index a429c97..147ca39 100644 (file)
@@ -1256,8 +1256,12 @@ qca8k_setup(struct dsa_switch *ds)
                /* Set initial MTU for every port.
                 * We have only have a general MTU setting. So track
                 * every port and set the max across all port.
+                * Set per port MTU to 1500 as the MTU change function
+                * will add the overhead and if its set to 1518 then it
+                * will apply the overhead again and we will end up with
+                * MTU of 1536 instead of 1518
                 */
-               priv->port_mtu[i] = ETH_FRAME_LEN + ETH_FCS_LEN;
+               priv->port_mtu[i] = ETH_DATA_LEN;
        }
 
        /* Special GLOBAL_FC_THRESH value are needed for ar8327 switch */
@@ -1433,6 +1437,12 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
 
                qca8k_write(priv, QCA8K_REG_SGMII_CTRL, val);
 
+               /* From original code is reported port instability as SGMII also
+                * require delay set. Apply advised values here or take them from DT.
+                */
+               if (state->interface == PHY_INTERFACE_MODE_SGMII)
+                       qca8k_mac_config_setup_internal_delay(priv, cpu_port_index, reg);
+
                /* For qca8327/qca8328/qca8334/qca8338 sgmii is unique and
                 * falling edge is set writing in the PORT0 PAD reg
                 */
@@ -1455,12 +1465,6 @@ qca8k_phylink_mac_config(struct dsa_switch *ds, int port, unsigned int mode,
                                        QCA8K_PORT0_PAD_SGMII_TXCLK_FALLING_EDGE,
                                        val);
 
-               /* From original code is reported port instability as SGMII also
-                * require delay set. Apply advised values here or take them from DT.
-                */
-               if (state->interface == PHY_INTERFACE_MODE_SGMII)
-                       qca8k_mac_config_setup_internal_delay(priv, cpu_port_index, reg);
-
                break;
        default:
                dev_err(ds->dev, "xMII mode %s not supported for port %d\n",
index 24122cc..81b3756 100644 (file)
@@ -298,13 +298,14 @@ bool aq_ring_tx_clean(struct aq_ring_s *self)
                        }
                }
 
-               if (unlikely(buff->is_eop)) {
+               if (unlikely(buff->is_eop && buff->skb)) {
                        u64_stats_update_begin(&self->stats.tx.syncp);
                        ++self->stats.tx.packets;
                        self->stats.tx.bytes += buff->skb->len;
                        u64_stats_update_end(&self->stats.tx.syncp);
 
                        dev_kfree_skb_any(buff->skb);
+                       buff->skb = NULL;
                }
                buff->pa = 0U;
                buff->eop_index = 0xffffU;
index 80263c3..4a83c99 100644 (file)
@@ -127,9 +127,9 @@ struct ax88796c_device {
                #define AX_PRIV_FLAGS_MASK      (AX_CAP_COMP)
 
        unsigned long           flags;
-               #define EVENT_INTR              BIT(0)
-               #define EVENT_TX                BIT(1)
-               #define EVENT_SET_MULTI         BIT(2)
+               #define EVENT_INTR              0
+               #define EVENT_TX                1
+               #define EVENT_SET_MULTI         2
 
 };
 
index 94df4f9..0710e71 100644 (file)
@@ -34,7 +34,7 @@ int axspi_read_status(struct axspi_data *ax_spi, struct spi_status *status)
 
        /* OP */
        ax_spi->cmd_buf[0] = AX_SPICMD_READ_STATUS;
-       ret = spi_write_then_read(ax_spi->spi, ax_spi->cmd_buf, 1, (u8 *)&status, 3);
+       ret = spi_write_then_read(ax_spi->spi, ax_spi->cmd_buf, 1, (u8 *)status, 3);
        if (ret)
                dev_err(&ax_spi->spi->dev, "%s() failed: ret = %d\n", __func__, ret);
        else
index d0d5da9..4c9507d 100644 (file)
@@ -2258,6 +2258,16 @@ static inline void bnxt_db_write(struct bnxt *bp, struct bnxt_db_info *db,
        }
 }
 
+/* Must hold rtnl_lock */
+static inline bool bnxt_sriov_cfg(struct bnxt *bp)
+{
+#if defined(CONFIG_BNXT_SRIOV)
+       return BNXT_PF(bp) && (bp->pf.active_vfs || bp->sriov_cfg);
+#else
+       return false;
+#endif
+}
+
 extern const u16 bnxt_lhint_arr[];
 
 int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
index 6fe9e9b..951c4c5 100644 (file)
@@ -442,7 +442,7 @@ static int bnxt_dl_reload_down(struct devlink *dl, bool netns_change,
        switch (action) {
        case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: {
                rtnl_lock();
-               if (BNXT_PF(bp) && (bp->pf.active_vfs || bp->sriov_cfg)) {
+               if (bnxt_sriov_cfg(bp)) {
                        NL_SET_ERR_MSG_MOD(extack,
                                           "reload is unsupported while VFs are allocated or being configured");
                        rtnl_unlock();
index 64479c4..ae9cca7 100644 (file)
@@ -3196,6 +3196,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
        }
        if (adapter->registered_device_map == 0) {
                dev_err(&pdev->dev, "could not register any net devices\n");
+               err = -EINVAL;
                goto err_disable_interrupts;
        }
 
index 13121c4..71730ef 100644 (file)
@@ -4709,6 +4709,10 @@ type3_infoblock(struct net_device *dev, u_char count, u_char *p)
         lp->ibn = 3;
         lp->active = *p++;
        if (MOTO_SROM_BUG) lp->active = 0;
+       /* if (MOTO_SROM_BUG) statement indicates lp->active could
+        * be 8 (i.e. the size of array lp->phy) */
+       if (WARN_ON(lp->active >= ARRAY_SIZE(lp->phy)))
+               return -EINVAL;
        lp->phy[lp->active].gep = (*p ? p : NULL); p += (2 * (*p) + 1);
        lp->phy[lp->active].rst = (*p ? p : NULL); p += (2 * (*p) + 1);
        lp->phy[lp->active].mc  = get_unaligned_le16(p); p += 2;
@@ -5000,19 +5004,23 @@ mii_get_phy(struct net_device *dev)
        }
        if ((j == limit) && (i < DE4X5_MAX_MII)) {
            for (k=0; k < DE4X5_MAX_PHY && lp->phy[k].id; k++);
-           lp->phy[k].addr = i;
-           lp->phy[k].id = id;
-           lp->phy[k].spd.reg = GENERIC_REG;      /* ANLPA register         */
-           lp->phy[k].spd.mask = GENERIC_MASK;    /* 100Mb/s technologies   */
-           lp->phy[k].spd.value = GENERIC_VALUE;  /* TX & T4, H/F Duplex    */
-           lp->mii_cnt++;
-           lp->active++;
-           printk("%s: Using generic MII device control. If the board doesn't operate,\nplease mail the following dump to the author:\n", dev->name);
-           j = de4x5_debug;
-           de4x5_debug |= DEBUG_MII;
-           de4x5_dbg_mii(dev, k);
-           de4x5_debug = j;
-           printk("\n");
+           if (k < DE4X5_MAX_PHY) {
+               lp->phy[k].addr = i;
+               lp->phy[k].id = id;
+               lp->phy[k].spd.reg = GENERIC_REG;      /* ANLPA register         */
+               lp->phy[k].spd.mask = GENERIC_MASK;    /* 100Mb/s technologies   */
+               lp->phy[k].spd.value = GENERIC_VALUE;  /* TX & T4, H/F Duplex    */
+               lp->mii_cnt++;
+               lp->active++;
+               printk("%s: Using generic MII device control. If the board doesn't operate,\nplease mail the following dump to the author:\n", dev->name);
+               j = de4x5_debug;
+               de4x5_debug |= DEBUG_MII;
+               de4x5_dbg_mii(dev, k);
+               de4x5_debug = j;
+               printk("\n");
+           } else {
+               goto purgatory;
+           }
        }
     }
   purgatory:
index 714e961..6451c83 100644 (file)
@@ -4550,10 +4550,10 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
 
        fsl_mc_portal_free(priv->mc_io);
 
-       free_netdev(net_dev);
-
        dev_dbg(net_dev->dev.parent, "Removed interface %s\n", net_dev->name);
 
+       free_netdev(net_dev);
+
        return 0;
 }
 
index 23d9cbf..740850b 100644 (file)
@@ -400,6 +400,10 @@ static void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port,
                return;
 
        if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) {
+               /* DSAF_MAX_PORT_NUM is 6, but DSAF_GE_NUM is 8.
+                  We need check to prevent array overflow */
+               if (port >= DSAF_MAX_PORT_NUM)
+                       return;
                reg_val_1  = 0x1 << port;
                port_rst_off = dsaf_dev->mac_cb[port]->port_rst_off;
                /* there is difference between V1 and V2 in register.*/
index 5039a25..0bf3d47 100644 (file)
@@ -3003,9 +3003,10 @@ static void __e100_shutdown(struct pci_dev *pdev, bool *enable_wake)
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct nic *nic = netdev_priv(netdev);
 
+       netif_device_detach(netdev);
+
        if (netif_running(netdev))
                e100_down(nic);
-       netif_device_detach(netdev);
 
        if ((nic->flags & wol_magic) | e100_asf(nic)) {
                /* enable reverse auto-negotiation */
@@ -3022,7 +3023,7 @@ static void __e100_shutdown(struct pci_dev *pdev, bool *enable_wake)
                *enable_wake = false;
        }
 
-       pci_clear_master(pdev);
+       pci_disable_device(pdev);
 }
 
 static int __e100_power_off(struct pci_dev *pdev, bool wake)
@@ -3042,8 +3043,6 @@ static int __maybe_unused e100_suspend(struct device *dev_d)
 
        __e100_shutdown(to_pci_dev(dev_d), &wake);
 
-       device_wakeup_disable(dev_d);
-
        return 0;
 }
 
@@ -3051,6 +3050,14 @@ static int __maybe_unused e100_resume(struct device *dev_d)
 {
        struct net_device *netdev = dev_get_drvdata(dev_d);
        struct nic *nic = netdev_priv(netdev);
+       int err;
+
+       err = pci_enable_device(to_pci_dev(dev_d));
+       if (err) {
+               netdev_err(netdev, "Resume cannot enable PCI device, aborting\n");
+               return err;
+       }
+       pci_set_master(to_pci_dev(dev_d));
 
        /* disable reverse auto-negotiation */
        if (nic->phy == phy_82552_v) {
@@ -3062,10 +3069,11 @@ static int __maybe_unused e100_resume(struct device *dev_d)
                           smartspeed & ~(E100_82552_REV_ANEG));
        }
 
-       netif_device_attach(netdev);
        if (netif_running(netdev))
                e100_up(nic);
 
+       netif_device_attach(netdev);
+
        return 0;
 }
 
index 3d528fb..4d939af 100644 (file)
@@ -161,6 +161,7 @@ enum i40e_vsi_state_t {
        __I40E_VSI_OVERFLOW_PROMISC,
        __I40E_VSI_REINIT_REQUESTED,
        __I40E_VSI_DOWN_REQUESTED,
+       __I40E_VSI_RELEASING,
        /* This must be last as it determines the size of the BITMAP */
        __I40E_VSI_STATE_SIZE__,
 };
@@ -1247,6 +1248,7 @@ void i40e_ptp_restore_hw_time(struct i40e_pf *pf);
 void i40e_ptp_init(struct i40e_pf *pf);
 void i40e_ptp_stop(struct i40e_pf *pf);
 int i40e_ptp_alloc_pins(struct i40e_pf *pf);
+int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset);
 int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi);
 i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf);
 i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf);
index ba86213..e118cf9 100644 (file)
@@ -1790,6 +1790,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
                                     bool is_add)
 {
        struct i40e_pf *pf = vsi->back;
+       u16 num_tc_qps = 0;
        u16 sections = 0;
        u8 netdev_tc = 0;
        u16 numtc = 1;
@@ -1797,13 +1798,33 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
        u8 offset;
        u16 qmap;
        int i;
-       u16 num_tc_qps = 0;
 
        sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
        offset = 0;
+       /* zero out queue mapping, it will get updated on the end of the function */
+       memset(ctxt->info.queue_mapping, 0, sizeof(ctxt->info.queue_mapping));
+
+       if (vsi->type == I40E_VSI_MAIN) {
+               /* This code helps add more queue to the VSI if we have
+                * more cores than RSS can support, the higher cores will
+                * be served by ATR or other filters. Furthermore, the
+                * non-zero req_queue_pairs says that user requested a new
+                * queue count via ethtool's set_channels, so use this
+                * value for queues distribution across traffic classes
+                */
+               if (vsi->req_queue_pairs > 0)
+                       vsi->num_queue_pairs = vsi->req_queue_pairs;
+               else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+                       vsi->num_queue_pairs = pf->num_lan_msix;
+       }
 
        /* Number of queues per enabled TC */
-       num_tc_qps = vsi->alloc_queue_pairs;
+       if (vsi->type == I40E_VSI_MAIN ||
+           (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs != 0))
+               num_tc_qps = vsi->num_queue_pairs;
+       else
+               num_tc_qps = vsi->alloc_queue_pairs;
+
        if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
                /* Find numtc from enabled TC bitmap */
                for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
@@ -1881,15 +1902,11 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
                }
                ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
        }
-
-       /* Set actual Tx/Rx queue pairs */
-       vsi->num_queue_pairs = offset;
-       if ((vsi->type == I40E_VSI_MAIN) && (numtc == 1)) {
-               if (vsi->req_queue_pairs > 0)
-                       vsi->num_queue_pairs = vsi->req_queue_pairs;
-               else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
-                       vsi->num_queue_pairs = pf->num_lan_msix;
-       }
+       /* Do not change previously set num_queue_pairs for PFs and VFs*/
+       if ((vsi->type == I40E_VSI_MAIN && numtc != 1) ||
+           (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs == 0) ||
+           (vsi->type != I40E_VSI_MAIN && vsi->type != I40E_VSI_SRIOV))
+               vsi->num_queue_pairs = offset;
 
        /* Scheduler section valid can only be set for ADD VSI */
        if (is_add) {
@@ -2623,7 +2640,8 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
 
        for (v = 0; v < pf->num_alloc_vsi; v++) {
                if (pf->vsi[v] &&
-                   (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED)) {
+                   (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED) &&
+                   !test_bit(__I40E_VSI_RELEASING, pf->vsi[v]->state)) {
                        int ret = i40e_sync_vsi_filters(pf->vsi[v]);
 
                        if (ret) {
@@ -5427,6 +5445,58 @@ static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi,
 }
 
 /**
+ * i40e_update_adq_vsi_queues - update queue mapping for ADq VSI
+ * @vsi: the VSI being reconfigured
+ * @vsi_offset: offset from main VF VSI
+ */
+int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset)
+{
+       struct i40e_vsi_context ctxt = {};
+       struct i40e_pf *pf;
+       struct i40e_hw *hw;
+       int ret;
+
+       if (!vsi)
+               return I40E_ERR_PARAM;
+       pf = vsi->back;
+       hw = &pf->hw;
+
+       ctxt.seid = vsi->seid;
+       ctxt.pf_num = hw->pf_id;
+       ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id + vsi_offset;
+       ctxt.uplink_seid = vsi->uplink_seid;
+       ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
+       ctxt.flags = I40E_AQ_VSI_TYPE_VF;
+       ctxt.info = vsi->info;
+
+       i40e_vsi_setup_queue_map(vsi, &ctxt, vsi->tc_config.enabled_tc,
+                                false);
+       if (vsi->reconfig_rss) {
+               vsi->rss_size = min_t(int, pf->alloc_rss_size,
+                                     vsi->num_queue_pairs);
+               ret = i40e_vsi_config_rss(vsi);
+               if (ret) {
+                       dev_info(&pf->pdev->dev, "Failed to reconfig rss for num_queues\n");
+                       return ret;
+               }
+               vsi->reconfig_rss = false;
+       }
+
+       ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+       if (ret) {
+               dev_info(&pf->pdev->dev, "Update vsi config failed, err %s aq_err %s\n",
+                        i40e_stat_str(hw, ret),
+                        i40e_aq_str(hw, hw->aq.asq_last_status));
+               return ret;
+       }
+       /* update the local VSI info with updated queue map */
+       i40e_vsi_update_queue_map(vsi, &ctxt);
+       vsi->info.valid_sections = 0;
+
+       return ret;
+}
+
+/**
  * i40e_vsi_config_tc - Configure VSI Tx Scheduler for given TC map
  * @vsi: VSI to be configured
  * @enabled_tc: TC bitmap
@@ -5717,24 +5787,6 @@ static void i40e_remove_queue_channels(struct i40e_vsi *vsi)
 }
 
 /**
- * i40e_is_any_channel - channel exist or not
- * @vsi: ptr to VSI to which channels are associated with
- *
- * Returns true or false if channel(s) exist for associated VSI or not
- **/
-static bool i40e_is_any_channel(struct i40e_vsi *vsi)
-{
-       struct i40e_channel *ch, *ch_tmp;
-
-       list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
-               if (ch->initialized)
-                       return true;
-       }
-
-       return false;
-}
-
-/**
  * i40e_get_max_queues_for_channel
  * @vsi: ptr to VSI to which channels are associated with
  *
@@ -6240,26 +6292,15 @@ int i40e_create_queue_channel(struct i40e_vsi *vsi,
        /* By default we are in VEPA mode, if this is the first VF/VMDq
         * VSI to be added switch to VEB mode.
         */
-       if ((!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) ||
-           (!i40e_is_any_channel(vsi))) {
-               if (!is_power_of_2(vsi->tc_config.tc_info[0].qcount)) {
-                       dev_dbg(&pf->pdev->dev,
-                               "Failed to create channel. Override queues (%u) not power of 2\n",
-                               vsi->tc_config.tc_info[0].qcount);
-                       return -EINVAL;
-               }
 
-               if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
-                       pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+       if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+               pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
 
-                       if (vsi->type == I40E_VSI_MAIN) {
-                               if (pf->flags & I40E_FLAG_TC_MQPRIO)
-                                       i40e_do_reset(pf, I40E_PF_RESET_FLAG,
-                                                     true);
-                               else
-                                       i40e_do_reset_safe(pf,
-                                                          I40E_PF_RESET_FLAG);
-                       }
+               if (vsi->type == I40E_VSI_MAIN) {
+                       if (pf->flags & I40E_FLAG_TC_MQPRIO)
+                               i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
+                       else
+                               i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
                }
                /* now onwards for main VSI, number of queues will be value
                 * of TC0's queue count
@@ -7912,12 +7953,20 @@ config_tc:
                            vsi->seid);
                need_reset = true;
                goto exit;
-       } else {
-               dev_info(&vsi->back->pdev->dev,
-                        "Setup channel (id:%u) utilizing num_queues %d\n",
-                        vsi->seid, vsi->tc_config.tc_info[0].qcount);
+       } else if (enabled_tc &&
+                  (!is_power_of_2(vsi->tc_config.tc_info[0].qcount))) {
+               netdev_info(netdev,
+                           "Failed to create channel. Override queues (%u) not power of 2\n",
+                           vsi->tc_config.tc_info[0].qcount);
+               ret = -EINVAL;
+               need_reset = true;
+               goto exit;
        }
 
+       dev_info(&vsi->back->pdev->dev,
+                "Setup channel (id:%u) utilizing num_queues %d\n",
+                vsi->seid, vsi->tc_config.tc_info[0].qcount);
+
        if (pf->flags & I40E_FLAG_TC_MQPRIO) {
                if (vsi->mqprio_qopt.max_rate[0]) {
                        u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
@@ -8482,9 +8531,8 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi,
                err = i40e_add_del_cloud_filter(vsi, filter, true);
 
        if (err) {
-               dev_err(&pf->pdev->dev,
-                       "Failed to add cloud filter, err %s\n",
-                       i40e_stat_str(&pf->hw, err));
+               dev_err(&pf->pdev->dev, "Failed to add cloud filter, err %d\n",
+                       err);
                goto err;
        }
 
@@ -13771,7 +13819,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi)
                dev_info(&pf->pdev->dev, "Can't remove PF VSI\n");
                return -ENODEV;
        }
-
+       set_bit(__I40E_VSI_RELEASING, vsi->state);
        uplink_seid = vsi->uplink_seid;
        if (vsi->type != I40E_VSI_SRIOV) {
                if (vsi->netdev_registered) {
index 472f56b..80ae264 100644 (file)
@@ -183,17 +183,18 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
 /***********************misc routines*****************************/
 
 /**
- * i40e_vc_disable_vf
+ * i40e_vc_reset_vf
  * @vf: pointer to the VF info
- *
- * Disable the VF through a SW reset.
+ * @notify_vf: notify vf about reset or not
+ * Reset VF handler.
  **/
-static inline void i40e_vc_disable_vf(struct i40e_vf *vf)
+static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
 {
        struct i40e_pf *pf = vf->pf;
        int i;
 
-       i40e_vc_notify_vf_reset(vf);
+       if (notify_vf)
+               i40e_vc_notify_vf_reset(vf);
 
        /* We want to ensure that an actual reset occurs initiated after this
         * function was called. However, we do not want to wait forever, so
@@ -211,9 +212,14 @@ static inline void i40e_vc_disable_vf(struct i40e_vf *vf)
                usleep_range(10000, 20000);
        }
 
-       dev_warn(&vf->pf->pdev->dev,
-                "Failed to initiate reset for VF %d after 200 milliseconds\n",
-                vf->vf_id);
+       if (notify_vf)
+               dev_warn(&vf->pf->pdev->dev,
+                        "Failed to initiate reset for VF %d after 200 milliseconds\n",
+                        vf->vf_id);
+       else
+               dev_dbg(&vf->pf->pdev->dev,
+                       "Failed to initiate reset for VF %d after 200 milliseconds\n",
+                       vf->vf_id);
 }
 
 /**
@@ -674,14 +680,13 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id,
                                    u16 vsi_queue_id,
                                    struct virtchnl_rxq_info *info)
 {
+       u16 pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
        struct i40e_pf *pf = vf->pf;
+       struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx];
        struct i40e_hw *hw = &pf->hw;
        struct i40e_hmc_obj_rxq rx_ctx;
-       u16 pf_queue_id;
        int ret = 0;
 
-       pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
-
        /* clear the context structure first */
        memset(&rx_ctx, 0, sizeof(struct i40e_hmc_obj_rxq));
 
@@ -719,6 +724,10 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id,
        }
        rx_ctx.rxmax = info->max_pkt_size;
 
+       /* if port VLAN is configured increase the max packet size */
+       if (vsi->info.pvid)
+               rx_ctx.rxmax += VLAN_HLEN;
+
        /* enable 32bytes desc always */
        rx_ctx.dsize = 1;
 
@@ -2106,20 +2115,6 @@ err:
 }
 
 /**
- * i40e_vc_reset_vf_msg
- * @vf: pointer to the VF info
- *
- * called from the VF to reset itself,
- * unlike other virtchnl messages, PF driver
- * doesn't send the response back to the VF
- **/
-static void i40e_vc_reset_vf_msg(struct i40e_vf *vf)
-{
-       if (test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states))
-               i40e_reset_vf(vf, false);
-}
-
-/**
  * i40e_vc_config_promiscuous_mode_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
@@ -2217,11 +2212,12 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg)
        struct virtchnl_vsi_queue_config_info *qci =
            (struct virtchnl_vsi_queue_config_info *)msg;
        struct virtchnl_queue_pair_info *qpi;
-       struct i40e_pf *pf = vf->pf;
        u16 vsi_id, vsi_queue_id = 0;
-       u16 num_qps_all = 0;
+       struct i40e_pf *pf = vf->pf;
        i40e_status aq_ret = 0;
        int i, j = 0, idx = 0;
+       struct i40e_vsi *vsi;
+       u16 num_qps_all = 0;
 
        if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) {
                aq_ret = I40E_ERR_PARAM;
@@ -2310,9 +2306,15 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg)
                pf->vsi[vf->lan_vsi_idx]->num_queue_pairs =
                        qci->num_queue_pairs;
        } else {
-               for (i = 0; i < vf->num_tc; i++)
-                       pf->vsi[vf->ch[i].vsi_idx]->num_queue_pairs =
-                              vf->ch[i].num_qps;
+               for (i = 0; i < vf->num_tc; i++) {
+                       vsi = pf->vsi[vf->ch[i].vsi_idx];
+                       vsi->num_queue_pairs = vf->ch[i].num_qps;
+
+                       if (i40e_update_adq_vsi_queues(vsi, i)) {
+                               aq_ret = I40E_ERR_CONFIG;
+                               goto error_param;
+                       }
+               }
        }
 
 error_param:
@@ -2607,8 +2609,7 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg)
        } else {
                /* successful request */
                vf->num_req_queues = req_pairs;
-               i40e_vc_notify_vf_reset(vf);
-               i40e_reset_vf(vf, false);
+               i40e_vc_reset_vf(vf, true);
                return 0;
        }
 
@@ -3803,8 +3804,7 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg)
        vf->num_req_queues = 0;
 
        /* reset the VF in order to allocate resources */
-       i40e_vc_notify_vf_reset(vf);
-       i40e_reset_vf(vf, false);
+       i40e_vc_reset_vf(vf, true);
 
        return I40E_SUCCESS;
 
@@ -3844,8 +3844,7 @@ static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg)
        }
 
        /* reset the VF in order to allocate resources */
-       i40e_vc_notify_vf_reset(vf);
-       i40e_reset_vf(vf, false);
+       i40e_vc_reset_vf(vf, true);
 
        return I40E_SUCCESS;
 
@@ -3907,7 +3906,7 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
                i40e_vc_notify_vf_link_state(vf);
                break;
        case VIRTCHNL_OP_RESET_VF:
-               i40e_vc_reset_vf_msg(vf);
+               i40e_vc_reset_vf(vf, false);
                ret = 0;
                break;
        case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
@@ -4161,7 +4160,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
        /* Force the VF interface down so it has to bring up with new MAC
         * address
         */
-       i40e_vc_disable_vf(vf);
+       i40e_vc_reset_vf(vf, true);
        dev_info(&pf->pdev->dev, "Bring down and up the VF interface to make this change effective.\n");
 
 error_param:
@@ -4170,34 +4169,6 @@ error_param:
 }
 
 /**
- * i40e_vsi_has_vlans - True if VSI has configured VLANs
- * @vsi: pointer to the vsi
- *
- * Check if a VSI has configured any VLANs. False if we have a port VLAN or if
- * we have no configured VLANs. Do not call while holding the
- * mac_filter_hash_lock.
- */
-static bool i40e_vsi_has_vlans(struct i40e_vsi *vsi)
-{
-       bool have_vlans;
-
-       /* If we have a port VLAN, then the VSI cannot have any VLANs
-        * configured, as all MAC/VLAN filters will be assigned to the PVID.
-        */
-       if (vsi->info.pvid)
-               return false;
-
-       /* Since we don't have a PVID, we know that if the device is in VLAN
-        * mode it must be because of a VLAN filter configured on this VSI.
-        */
-       spin_lock_bh(&vsi->mac_filter_hash_lock);
-       have_vlans = i40e_is_vsi_in_vlan(vsi);
-       spin_unlock_bh(&vsi->mac_filter_hash_lock);
-
-       return have_vlans;
-}
-
-/**
  * i40e_ndo_set_vf_port_vlan
  * @netdev: network interface device structure
  * @vf_id: VF identifier
@@ -4253,19 +4224,9 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
                /* duplicate request, so just return success */
                goto error_pvid;
 
-       if (i40e_vsi_has_vlans(vsi)) {
-               dev_err(&pf->pdev->dev,
-                       "VF %d has already configured VLAN filters and the administrator is requesting a port VLAN override.\nPlease unload and reload the VF driver for this change to take effect.\n",
-                       vf_id);
-               /* Administrator Error - knock the VF offline until he does
-                * the right thing by reconfiguring his network correctly
-                * and then reloading the VF driver.
-                */
-               i40e_vc_disable_vf(vf);
-               /* During reset the VF got a new VSI, so refresh the pointer. */
-               vsi = pf->vsi[vf->lan_vsi_idx];
-       }
-
+       i40e_vc_reset_vf(vf, true);
+       /* During reset the VF got a new VSI, so refresh a pointer. */
+       vsi = pf->vsi[vf->lan_vsi_idx];
        /* Locked once because multiple functions below iterate list */
        spin_lock_bh(&vsi->mac_filter_hash_lock);
 
@@ -4641,7 +4602,7 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting)
                goto out;
 
        vf->trusted = setting;
-       i40e_vc_disable_vf(vf);
+       i40e_vc_reset_vf(vf, true);
        dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
                 vf_id, setting ? "" : "un");
 
index 75635bd..3789269 100644 (file)
@@ -305,6 +305,7 @@ struct iavf_adapter {
 #define IAVF_FLAG_AQ_DEL_FDIR_FILTER           BIT(26)
 #define IAVF_FLAG_AQ_ADD_ADV_RSS_CFG           BIT(27)
 #define IAVF_FLAG_AQ_DEL_ADV_RSS_CFG           BIT(28)
+#define IAVF_FLAG_AQ_REQUEST_STATS             BIT(29)
 
        /* OS defined structs */
        struct net_device *netdev;
@@ -444,6 +445,7 @@ int iavf_up(struct iavf_adapter *adapter);
 void iavf_down(struct iavf_adapter *adapter);
 int iavf_process_config(struct iavf_adapter *adapter);
 void iavf_schedule_reset(struct iavf_adapter *adapter);
+void iavf_schedule_request_stats(struct iavf_adapter *adapter);
 void iavf_reset(struct iavf_adapter *adapter);
 void iavf_set_ethtool_ops(struct net_device *netdev);
 void iavf_update_stats(struct iavf_adapter *adapter);
@@ -501,4 +503,5 @@ void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter);
 void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter);
 struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
                                        const u8 *macaddr);
+int iavf_lock_timeout(struct mutex *lock, unsigned int msecs);
 #endif /* _IAVF_H_ */
index 144a776..0cecaff 100644 (file)
@@ -354,6 +354,9 @@ static void iavf_get_ethtool_stats(struct net_device *netdev,
        struct iavf_adapter *adapter = netdev_priv(netdev);
        unsigned int i;
 
+       /* Explicitly request stats refresh */
+       iavf_schedule_request_stats(adapter);
+
        iavf_add_ethtool_stats(&data, adapter, iavf_gstrings_stats);
 
        rcu_read_lock();
@@ -723,12 +726,31 @@ static int iavf_get_per_queue_coalesce(struct net_device *netdev, u32 queue,
  *
  * Change the ITR settings for a specific queue.
  **/
-static void iavf_set_itr_per_queue(struct iavf_adapter *adapter,
-                                  struct ethtool_coalesce *ec, int queue)
+static int iavf_set_itr_per_queue(struct iavf_adapter *adapter,
+                                 struct ethtool_coalesce *ec, int queue)
 {
        struct iavf_ring *rx_ring = &adapter->rx_rings[queue];
        struct iavf_ring *tx_ring = &adapter->tx_rings[queue];
        struct iavf_q_vector *q_vector;
+       u16 itr_setting;
+
+       itr_setting = rx_ring->itr_setting & ~IAVF_ITR_DYNAMIC;
+
+       if (ec->rx_coalesce_usecs != itr_setting &&
+           ec->use_adaptive_rx_coalesce) {
+               netif_info(adapter, drv, adapter->netdev,
+                          "Rx interrupt throttling cannot be changed if adaptive-rx is enabled\n");
+               return -EINVAL;
+       }
+
+       itr_setting = tx_ring->itr_setting & ~IAVF_ITR_DYNAMIC;
+
+       if (ec->tx_coalesce_usecs != itr_setting &&
+           ec->use_adaptive_tx_coalesce) {
+               netif_info(adapter, drv, adapter->netdev,
+                          "Tx interrupt throttling cannot be changed if adaptive-tx is enabled\n");
+               return -EINVAL;
+       }
 
        rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs);
        tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs);
@@ -751,6 +773,7 @@ static void iavf_set_itr_per_queue(struct iavf_adapter *adapter,
         * the Tx and Rx ITR values based on the values we have entered
         * into the q_vector, no need to write the values now.
         */
+       return 0;
 }
 
 /**
@@ -792,9 +815,11 @@ static int __iavf_set_coalesce(struct net_device *netdev,
         */
        if (queue < 0) {
                for (i = 0; i < adapter->num_active_queues; i++)
-                       iavf_set_itr_per_queue(adapter, ec, i);
+                       if (iavf_set_itr_per_queue(adapter, ec, i))
+                               return -EINVAL;
        } else if (queue < adapter->num_active_queues) {
-               iavf_set_itr_per_queue(adapter, ec, queue);
+               if (iavf_set_itr_per_queue(adapter, ec, queue))
+                       return -EINVAL;
        } else {
                netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n",
                           adapter->num_active_queues - 1);
index 336e6bf..14934a7 100644 (file)
@@ -147,7 +147,7 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw,
  *
  * Returns 0 on success, negative on failure
  **/
-static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
+int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
 {
        unsigned int wait, delay = 10;
 
@@ -175,6 +175,19 @@ void iavf_schedule_reset(struct iavf_adapter *adapter)
 }
 
 /**
+ * iavf_schedule_request_stats - Set the flags and schedule statistics request
+ * @adapter: board private structure
+ *
+ * Sets IAVF_FLAG_AQ_REQUEST_STATS flag so iavf_watchdog_task() will explicitly
+ * request and refresh ethtool stats
+ **/
+void iavf_schedule_request_stats(struct iavf_adapter *adapter)
+{
+       adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_STATS;
+       mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0);
+}
+
+/**
  * iavf_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
  * @txqueue: queue number that is timing out
@@ -704,13 +717,11 @@ static void iavf_del_vlan(struct iavf_adapter *adapter, u16 vlan)
  **/
 static void iavf_restore_filters(struct iavf_adapter *adapter)
 {
-       /* re-add all VLAN filters */
-       if (VLAN_ALLOWED(adapter)) {
-               u16 vid;
+       u16 vid;
 
-               for_each_set_bit(vid, adapter->vsi.active_vlans, VLAN_N_VID)
-                       iavf_add_vlan(adapter, vid);
-       }
+       /* re-add all VLAN filters */
+       for_each_set_bit(vid, adapter->vsi.active_vlans, VLAN_N_VID)
+               iavf_add_vlan(adapter, vid);
 }
 
 /**
@@ -745,9 +756,6 @@ static int iavf_vlan_rx_kill_vid(struct net_device *netdev,
 {
        struct iavf_adapter *adapter = netdev_priv(netdev);
 
-       if (!VLAN_ALLOWED(adapter))
-               return -EIO;
-
        iavf_del_vlan(adapter, vid);
        clear_bit(vid, adapter->vsi.active_vlans);
 
@@ -1709,6 +1717,11 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
                iavf_del_adv_rss_cfg(adapter);
                return 0;
        }
+       if (adapter->aq_required & IAVF_FLAG_AQ_REQUEST_STATS) {
+               iavf_request_stats(adapter);
+               return 0;
+       }
+
        return -EAGAIN;
 }
 
@@ -2173,7 +2186,6 @@ static void iavf_reset_task(struct work_struct *work)
        struct net_device *netdev = adapter->netdev;
        struct iavf_hw *hw = &adapter->hw;
        struct iavf_mac_filter *f, *ftmp;
-       struct iavf_vlan_filter *vlf;
        struct iavf_cloud_filter *cf;
        u32 reg_val;
        int i = 0, err;
@@ -2254,6 +2266,7 @@ continue_reset:
                   (adapter->state == __IAVF_RESETTING));
 
        if (running) {
+               netdev->flags &= ~IFF_UP;
                netif_carrier_off(netdev);
                netif_tx_stop_all_queues(netdev);
                adapter->link_up = false;
@@ -2313,11 +2326,6 @@ continue_reset:
        list_for_each_entry(f, &adapter->mac_filter_list, list) {
                f->add = true;
        }
-       /* re-add all VLAN filters */
-       list_for_each_entry(vlf, &adapter->vlan_filter_list, list) {
-               vlf->add = true;
-       }
-
        spin_unlock_bh(&adapter->mac_vlan_list_lock);
 
        /* check if TCs are running and re-add all cloud filters */
@@ -2331,7 +2339,6 @@ continue_reset:
        spin_unlock_bh(&adapter->cloud_filter_list_lock);
 
        adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER;
-       adapter->aq_required |= IAVF_FLAG_AQ_ADD_VLAN_FILTER;
        adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
        iavf_misc_irq_enable(adapter);
 
@@ -2365,7 +2372,7 @@ continue_reset:
                 * to __IAVF_RUNNING
                 */
                iavf_up_complete(adapter);
-
+               netdev->flags |= IFF_UP;
                iavf_irq_enable(adapter, true);
        } else {
                iavf_change_state(adapter, __IAVF_DOWN);
@@ -2378,8 +2385,10 @@ continue_reset:
 reset_err:
        mutex_unlock(&adapter->client_lock);
        mutex_unlock(&adapter->crit_lock);
-       if (running)
+       if (running) {
                iavf_change_state(adapter, __IAVF_RUNNING);
+               netdev->flags |= IFF_UP;
+       }
        dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
        iavf_close(netdev);
 }
@@ -3441,11 +3450,16 @@ static int iavf_set_features(struct net_device *netdev,
 {
        struct iavf_adapter *adapter = netdev_priv(netdev);
 
-       /* Don't allow changing VLAN_RX flag when adapter is not capable
-        * of VLAN offload
+       /* Don't allow enabling VLAN features when adapter is not capable
+        * of VLAN offload/filtering
         */
        if (!VLAN_ALLOWED(adapter)) {
-               if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX)
+               netdev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_RX |
+                                        NETIF_F_HW_VLAN_CTAG_TX |
+                                        NETIF_F_HW_VLAN_CTAG_FILTER);
+               if (features & (NETIF_F_HW_VLAN_CTAG_RX |
+                               NETIF_F_HW_VLAN_CTAG_TX |
+                               NETIF_F_HW_VLAN_CTAG_FILTER))
                        return -EINVAL;
        } else if ((netdev->features ^ features) & NETIF_F_HW_VLAN_CTAG_RX) {
                if (features & NETIF_F_HW_VLAN_CTAG_RX)
index 8c3f0f7..d60bf7c 100644 (file)
@@ -607,7 +607,7 @@ void iavf_add_vlans(struct iavf_adapter *adapter)
                if (f->add)
                        count++;
        }
-       if (!count) {
+       if (!count || !VLAN_ALLOWED(adapter)) {
                adapter->aq_required &= ~IAVF_FLAG_AQ_ADD_VLAN_FILTER;
                spin_unlock_bh(&adapter->mac_vlan_list_lock);
                return;
@@ -673,9 +673,19 @@ void iavf_del_vlans(struct iavf_adapter *adapter)
 
        spin_lock_bh(&adapter->mac_vlan_list_lock);
 
-       list_for_each_entry(f, &adapter->vlan_filter_list, list) {
-               if (f->remove)
+       list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
+               /* since VLAN capabilities are not allowed, we dont want to send
+                * a VLAN delete request because it will most likely fail and
+                * create unnecessary errors/noise, so just free the VLAN
+                * filters marked for removal to enable bailing out before
+                * sending a virtchnl message
+                */
+               if (f->remove && !VLAN_ALLOWED(adapter)) {
+                       list_del(&f->list);
+                       kfree(f);
+               } else if (f->remove) {
                        count++;
+               }
        }
        if (!count) {
                adapter->aq_required &= ~IAVF_FLAG_AQ_DEL_VLAN_FILTER;
@@ -784,6 +794,8 @@ void iavf_request_stats(struct iavf_adapter *adapter)
                /* no error message, this isn't crucial */
                return;
        }
+
+       adapter->aq_required &= ~IAVF_FLAG_AQ_REQUEST_STATS;
        adapter->current_op = VIRTCHNL_OP_GET_STATS;
        vqs.vsi_id = adapter->vsi_res->vsi_id;
        /* queue maps are ignored for this message - only the vsi is used */
@@ -1722,8 +1734,37 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
                }
                spin_lock_bh(&adapter->mac_vlan_list_lock);
                iavf_add_filter(adapter, adapter->hw.mac.addr);
+
+               if (VLAN_ALLOWED(adapter)) {
+                       if (!list_empty(&adapter->vlan_filter_list)) {
+                               struct iavf_vlan_filter *vlf;
+
+                               /* re-add all VLAN filters over virtchnl */
+                               list_for_each_entry(vlf,
+                                                   &adapter->vlan_filter_list,
+                                                   list)
+                                       vlf->add = true;
+
+                               adapter->aq_required |=
+                                       IAVF_FLAG_AQ_ADD_VLAN_FILTER;
+                       }
+               }
+
                spin_unlock_bh(&adapter->mac_vlan_list_lock);
                iavf_process_config(adapter);
+
+               /* unlock crit_lock before acquiring rtnl_lock as other
+                * processes holding rtnl_lock could be waiting for the same
+                * crit_lock
+                */
+               mutex_unlock(&adapter->crit_lock);
+               rtnl_lock();
+               netdev_update_features(adapter->netdev);
+               rtnl_unlock();
+               if (iavf_lock_timeout(&adapter->crit_lock, 10000))
+                       dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n",
+                                __FUNCTION__);
+
                }
                break;
        case VIRTCHNL_OP_ENABLE_QUEUES:
index 4056260..09a3297 100644 (file)
@@ -89,8 +89,13 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
        if (!vsi->rx_rings)
                goto err_rings;
 
-       /* XDP will have vsi->alloc_txq Tx queues as well, so double the size */
-       vsi->txq_map = devm_kcalloc(dev, (2 * vsi->alloc_txq),
+       /* txq_map needs to have enough space to track both Tx (stack) rings
+        * and XDP rings; at this point vsi->num_xdp_txq might not be set,
+        * so use num_possible_cpus() as we want to always provide XDP ring
+        * per CPU, regardless of queue count settings from user that might
+        * have come from ethtool's set_channels() callback;
+        */
+       vsi->txq_map = devm_kcalloc(dev, (vsi->alloc_txq + num_possible_cpus()),
                                    sizeof(*vsi->txq_map), GFP_KERNEL);
 
        if (!vsi->txq_map)
index f099797..4d1fc48 100644 (file)
@@ -2609,7 +2609,18 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
                        ice_stat_str(status));
                goto clear_xdp_rings;
        }
-       ice_vsi_assign_bpf_prog(vsi, prog);
+
+       /* assign the prog only when it's not already present on VSI;
+        * this flow is a subject of both ethtool -L and ndo_bpf flows;
+        * VSI rebuild that happens under ethtool -L can expose us to
+        * the bpf_prog refcount issues as we would be swapping same
+        * bpf_prog pointers from vsi->xdp_prog and calling bpf_prog_put
+        * on it as it would be treated as an 'old_prog'; for ndo_bpf
+        * this is not harmful as dev_xdp_install bumps the refcount
+        * before calling the op exposed by the driver;
+        */
+       if (!ice_is_xdp_ena_vsi(vsi))
+               ice_vsi_assign_bpf_prog(vsi, prog);
 
        return 0;
 clear_xdp_rings:
@@ -2785,6 +2796,11 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
                if (xdp_ring_err)
                        NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed");
        } else {
+               /* safe to call even when prog == vsi->xdp_prog as
+                * dev_xdp_install in net/core/dev.c incremented prog's
+                * refcount so corresponding bpf_prog_put won't cause
+                * underflow
+                */
                ice_vsi_assign_bpf_prog(vsi, prog);
        }
 
index 62a97c4..ef87897 100644 (file)
@@ -429,12 +429,14 @@ static const struct of_device_id orion_mdio_match[] = {
 };
 MODULE_DEVICE_TABLE(of, orion_mdio_match);
 
+#ifdef CONFIG_ACPI
 static const struct acpi_device_id orion_mdio_acpi_match[] = {
        { "MRVL0100", BUS_TYPE_SMI },
        { "MRVL0101", BUS_TYPE_XSMI },
        { },
 };
 MODULE_DEVICE_TABLE(acpi, orion_mdio_acpi_match);
+#endif
 
 static struct platform_driver orion_mdio_driver = {
        .probe = orion_mdio_probe,
index 2b18d89..ce486e1 100644 (file)
@@ -5017,11 +5017,13 @@ static int mvpp2_change_mtu(struct net_device *dev, int mtu)
                mtu = ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8);
        }
 
+       if (port->xdp_prog && mtu > MVPP2_MAX_RX_BUF_SIZE) {
+               netdev_err(dev, "Illegal MTU value %d (> %d) for XDP mode\n",
+                          mtu, (int)MVPP2_MAX_RX_BUF_SIZE);
+               return -EINVAL;
+       }
+
        if (MVPP2_RX_PKT_SIZE(mtu) > MVPP2_BM_LONG_PKT_SIZE) {
-               if (port->xdp_prog) {
-                       netdev_err(dev, "Jumbo frames are not supported with XDP\n");
-                       return -EINVAL;
-               }
                if (priv->percpu_pools) {
                        netdev_warn(dev, "mtu %d too high, switching to shared buffers", mtu);
                        mvpp2_bm_switch_buffers(priv, false);
@@ -5307,8 +5309,8 @@ static int mvpp2_xdp_setup(struct mvpp2_port *port, struct netdev_bpf *bpf)
        bool running = netif_running(port->dev);
        bool reset = !prog != !port->xdp_prog;
 
-       if (port->dev->mtu > ETH_DATA_LEN) {
-               NL_SET_ERR_MSG_MOD(bpf->extack, "XDP is not supported with jumbo frames enabled");
+       if (port->dev->mtu > MVPP2_MAX_RX_BUF_SIZE) {
+               NL_SET_ERR_MSG_MOD(bpf->extack, "MTU too large for XDP");
                return -EOPNOTSUPP;
        }
 
index c7fd466..a09a507 100644 (file)
@@ -236,10 +236,11 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp,
        u64 lmt_addr, val, tbl_base;
        int pf, vf, num_vfs, hw_vfs;
        void __iomem *lmt_map_base;
-       int index = 0, off = 0;
-       int bytes_not_copied;
        int buf_size = 10240;
+       size_t off = 0;
+       int index = 0;
        char *buf;
+       int ret;
 
        /* don't allow partial reads */
        if (*ppos != 0)
@@ -303,15 +304,17 @@ static ssize_t rvu_dbg_lmtst_map_table_display(struct file *filp,
        }
        off +=  scnprintf(&buf[off], buf_size - 1 - off, "\n");
 
-       bytes_not_copied = copy_to_user(buffer, buf, off);
+       ret = min(off, count);
+       if (copy_to_user(buffer, buf, ret))
+               ret = -EFAULT;
        kfree(buf);
 
        iounmap(lmt_map_base);
-       if (bytes_not_copied)
-               return -EFAULT;
+       if (ret < 0)
+               return ret;
 
-       *ppos = off;
-       return off;
+       *ppos = ret;
+       return ret;
 }
 
 RVU_DEBUG_FOPS(lmtst_map_table, lmtst_map_table_display, NULL);
index 3ce6ccd..b4599fe 100644 (file)
@@ -497,8 +497,8 @@ int prestera_bridge_port_join(struct net_device *br_dev,
 
        br_port = prestera_bridge_port_add(bridge, port->dev);
        if (IS_ERR(br_port)) {
-               err = PTR_ERR(br_port);
-               goto err_brport_create;
+               prestera_bridge_put(bridge);
+               return PTR_ERR(br_port);
        }
 
        err = switchdev_bridge_port_offload(br_port->dev, port->dev, NULL,
@@ -519,8 +519,6 @@ err_port_join:
        switchdev_bridge_port_unoffload(br_port->dev, NULL, NULL, NULL);
 err_switchdev_offload:
        prestera_bridge_port_put(br_port);
-err_brport_create:
-       prestera_bridge_put(bridge);
        return err;
 }
 
@@ -1124,7 +1122,7 @@ static int prestera_switchdev_blk_event(struct notifier_block *unused,
                                                     prestera_port_obj_attr_set);
                break;
        default:
-               err = -EOPNOTSUPP;
+               return NOTIFY_DONE;
        }
 
        return notifier_from_errno(err);
index f71ec4d..8eaa24d 100644 (file)
@@ -339,6 +339,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_PAGE_FAULT_RESUME:
        case MLX5_CMD_OP_QUERY_ESW_FUNCTIONS:
        case MLX5_CMD_OP_DEALLOC_SF:
+       case MLX5_CMD_OP_DESTROY_UCTX:
+       case MLX5_CMD_OP_DESTROY_UMEM:
                return MLX5_CMD_STAT_OK;
 
        case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -464,9 +466,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
        case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
        case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
        case MLX5_CMD_OP_CREATE_UCTX:
-       case MLX5_CMD_OP_DESTROY_UCTX:
        case MLX5_CMD_OP_CREATE_UMEM:
-       case MLX5_CMD_OP_DESTROY_UMEM:
        case MLX5_CMD_OP_ALLOC_MEMIC:
        case MLX5_CMD_OP_MODIFY_XRQ:
        case MLX5_CMD_OP_RELEASE_XRQ_ERROR:
index 02e77ff..5371ad0 100644 (file)
@@ -164,13 +164,14 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
        MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
        MLX5_SET(destroy_cq_in, in, uid, cq->uid);
        err = mlx5_cmd_exec_in(dev, destroy_cq, in);
+       if (err)
+               return err;
 
        synchronize_irq(cq->irqn);
-
        mlx5_cq_put(cq);
        wait_for_completion(&cq->free);
 
-       return err;
+       return 0;
 }
 EXPORT_SYMBOL(mlx5_core_destroy_cq);
 
index 07c8d98..10d1950 100644 (file)
@@ -507,6 +507,8 @@ void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
        if (!mlx5_debugfs_root)
                return;
 
-       if (cq->dbg)
+       if (cq->dbg) {
                rem_res_tree(cq->dbg);
+               cq->dbg = NULL;
+       }
 }
index c1c6e74..2445e2a 100644 (file)
@@ -1356,9 +1356,13 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
 int
 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
                        struct mlx5_flow_attr *attr,
+                       struct mlx5e_tc_mod_hdr_acts *mod_acts,
                        const struct flow_action_entry *act,
                        struct netlink_ext_ack *extack)
 {
+       bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR;
+       int err;
+
        if (!priv) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "offload of ct action isn't available");
@@ -1369,6 +1373,17 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
        attr->ct_attr.ct_action = act->ct.action;
        attr->ct_attr.nf_ft = act->ct.flow_table;
 
+       if (!clear_action)
+               goto out;
+
+       err = mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "Failed to set registers for ct clear");
+               return err;
+       }
+       attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+
+out:
        return 0;
 }
 
@@ -1898,23 +1913,16 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
 
        memcpy(pre_ct_attr, attr, attr_sz);
 
-       err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
-       if (err) {
-               ct_dbg("Failed to set register for ct clear");
-               goto err_set_registers;
-       }
-
        mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
                                           mod_acts->num_actions,
                                           mod_acts->actions);
        if (IS_ERR(mod_hdr)) {
                err = PTR_ERR(mod_hdr);
                ct_dbg("Failed to add create ct clear mod hdr");
-               goto err_set_registers;
+               goto err_mod_hdr;
        }
 
        pre_ct_attr->modify_hdr = mod_hdr;
-       pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
 
        rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
        if (IS_ERR(rule)) {
@@ -1930,7 +1938,7 @@ __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
 
 err_insert:
        mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
-err_set_registers:
+err_mod_hdr:
        netdev_warn(priv->netdev,
                    "Failed to offload ct clear flow, err %d\n", err);
        kfree(pre_ct_attr);
index 363329f..99662af 100644 (file)
@@ -110,6 +110,7 @@ int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec);
 int
 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
                        struct mlx5_flow_attr *attr,
+                       struct mlx5e_tc_mod_hdr_acts *mod_acts,
                        const struct flow_action_entry *act,
                        struct netlink_ext_ack *extack);
 
@@ -172,6 +173,7 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
 static inline int
 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
                        struct mlx5_flow_attr *attr,
+                       struct mlx5e_tc_mod_hdr_acts *mod_acts,
                        const struct flow_action_entry *act,
                        struct netlink_ext_ack *extack)
 {
index 8f64f2c..b689701 100644 (file)
@@ -102,6 +102,7 @@ struct mlx5e_tc_flow {
        refcount_t refcnt;
        struct rcu_head rcu_head;
        struct completion init_done;
+       struct completion del_hw_done;
        int tunnel_id; /* the mapped tunnel id of this flow */
        struct mlx5_flow_attr *attr;
 };
index 660cca7..042b1ab 100644 (file)
@@ -245,8 +245,14 @@ static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
                                struct list_head *flow_list,
                                int index)
 {
-       if (IS_ERR(mlx5e_flow_get(flow)))
+       if (IS_ERR(mlx5e_flow_get(flow))) {
+               /* Flow is being deleted concurrently. Wait for it to be
+                * unoffloaded from hardware, otherwise deleting encap will
+                * fail.
+                */
+               wait_for_completion(&flow->del_hw_done);
                return;
+       }
        wait_for_completion(&flow->init_done);
 
        flow->tmp_entry_index = index;
index 62abce0..a2a9f68 100644 (file)
@@ -55,6 +55,7 @@ struct mlx5e_ktls_offload_context_rx {
        DECLARE_BITMAP(flags, MLX5E_NUM_PRIV_RX_FLAGS);
 
        /* resync */
+       spinlock_t lock; /* protects resync fields */
        struct mlx5e_ktls_rx_resync_ctx resync;
        struct list_head list;
 };
@@ -386,14 +387,18 @@ static void resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_r
        struct mlx5e_icosq *sq;
        bool trigger_poll;
 
-       memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, sizeof(info->rec_seq));
-
        sq = &c->async_icosq;
        ktls_resync = sq->ktls_resync;
+       trigger_poll = false;
 
        spin_lock_bh(&ktls_resync->lock);
-       list_add_tail(&priv_rx->list, &ktls_resync->list);
-       trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state);
+       spin_lock_bh(&priv_rx->lock);
+       memcpy(info->rec_seq, &priv_rx->resync.sw_rcd_sn_be, sizeof(info->rec_seq));
+       if (list_empty(&priv_rx->list)) {
+               list_add_tail(&priv_rx->list, &ktls_resync->list);
+               trigger_poll = !test_and_set_bit(MLX5E_SQ_STATE_PENDING_TLS_RX_RESYNC, &sq->state);
+       }
+       spin_unlock_bh(&priv_rx->lock);
        spin_unlock_bh(&ktls_resync->lock);
 
        if (!trigger_poll)
@@ -617,6 +622,8 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
        if (err)
                goto err_create_key;
 
+       INIT_LIST_HEAD(&priv_rx->list);
+       spin_lock_init(&priv_rx->lock);
        priv_rx->crypto_info  =
                *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info;
 
@@ -730,10 +737,14 @@ bool mlx5e_ktls_rx_handle_resync_list(struct mlx5e_channel *c, int budget)
                priv_rx = list_first_entry(&local_list,
                                           struct mlx5e_ktls_offload_context_rx,
                                           list);
+               spin_lock(&priv_rx->lock);
                cseg = post_static_params(sq, priv_rx);
-               if (IS_ERR(cseg))
+               if (IS_ERR(cseg)) {
+                       spin_unlock(&priv_rx->lock);
                        break;
-               list_del(&priv_rx->list);
+               }
+               list_del_init(&priv_rx->list);
+               spin_unlock(&priv_rx->lock);
                db_cseg = cseg;
        }
        if (db_cseg)
index 835caa1..3d45f4a 100644 (file)
@@ -1600,6 +1600,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
                else
                        mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
        }
+       complete_all(&flow->del_hw_done);
 
        if (mlx5_flow_has_geneve_opt(flow))
                mlx5_geneve_tlv_option_del(priv->mdev->geneve);
@@ -3607,7 +3608,9 @@ parse_tc_nic_actions(struct mlx5e_priv *priv,
                        attr->dest_chain = act->chain_index;
                        break;
                case FLOW_ACTION_CT:
-                       err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
+                       err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr,
+                                                     &parse_attr->mod_hdr_acts,
+                                                     act, extack);
                        if (err)
                                return err;
 
@@ -4276,7 +4279,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported");
                                return -EOPNOTSUPP;
                        }
-                       err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
+                       err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr,
+                                                     &parse_attr->mod_hdr_acts,
+                                                     act, extack);
                        if (err)
                                return err;
 
@@ -4465,6 +4470,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
        INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
        refcount_set(&flow->refcnt, 1);
        init_completion(&flow->init_done);
+       init_completion(&flow->del_hw_done);
 
        *__flow = flow;
        *__parse_attr = parse_attr;
index ec136b4..51a8cec 100644 (file)
@@ -1305,12 +1305,17 @@ abort:
  */
 int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
 {
+       bool toggle_lag;
        int ret;
 
        if (!mlx5_esw_allowed(esw))
                return 0;
 
-       mlx5_lag_disable_change(esw->dev);
+       toggle_lag = esw->mode == MLX5_ESWITCH_NONE;
+
+       if (toggle_lag)
+               mlx5_lag_disable_change(esw->dev);
+
        down_write(&esw->mode_lock);
        if (esw->mode == MLX5_ESWITCH_NONE) {
                ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs);
@@ -1324,7 +1329,10 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs)
                        esw->esw_funcs.num_vfs = num_vfs;
        }
        up_write(&esw->mode_lock);
-       mlx5_lag_enable_change(esw->dev);
+
+       if (toggle_lag)
+               mlx5_lag_enable_change(esw->dev);
+
        return ret;
 }
 
@@ -1572,6 +1580,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
        esw->enabled_vports = 0;
        esw->mode = MLX5_ESWITCH_NONE;
        esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
+       if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
+           MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
+               esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
+       else
+               esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 
        dev->priv.eswitch = esw;
        BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
@@ -1934,7 +1947,7 @@ free_out:
        return err;
 }
 
-u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev)
+u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev)
 {
        struct mlx5_eswitch *esw = dev->priv.eswitch;
 
@@ -1948,7 +1961,7 @@ mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev)
        struct mlx5_eswitch *esw;
 
        esw = dev->priv.eswitch;
-       return mlx5_esw_allowed(esw) ? esw->offloads.encap :
+       return (mlx5_eswitch_mode(dev) == MLX5_ESWITCH_OFFLOADS)  ? esw->offloads.encap :
                DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 }
 EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode);
index f4eaa58..a464556 100644 (file)
@@ -3183,12 +3183,6 @@ int esw_offloads_enable(struct mlx5_eswitch *esw)
        u64 mapping_id;
        int err;
 
-       if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) &&
-           MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, decap))
-               esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
-       else
-               esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
-
        mutex_init(&esw->offloads.termtbl_mutex);
        mlx5_rdma_enable_roce(esw->dev);
 
@@ -3286,7 +3280,6 @@ void esw_offloads_disable(struct mlx5_eswitch *esw)
        esw_offloads_metadata_uninit(esw);
        mlx5_rdma_disable_roce(esw->dev);
        mutex_destroy(&esw->offloads.termtbl_mutex);
-       esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
 }
 
 static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode)
@@ -3630,7 +3623,7 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink,
        *encap = esw->offloads.encap;
 unlock:
        up_write(&esw->mode_lock);
-       return 0;
+       return err;
 }
 
 static bool
index 31c99d5..7e0e04c 100644 (file)
@@ -40,7 +40,7 @@
 #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000)
 /* Max number of counters to query in bulk read is 32K */
 #define MLX5_SW_MAX_COUNTERS_BULK BIT(15)
-#define MLX5_SF_NUM_COUNTERS_BULK 6
+#define MLX5_SF_NUM_COUNTERS_BULK 8
 #define MLX5_FC_POOL_MAX_THRESHOLD BIT(18)
 #define MLX5_FC_POOL_USED_BUFF_RATIO 10
 
index 48d2ea6..4ddf6b3 100644 (file)
@@ -615,6 +615,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
        bool is_bonded, is_in_lag, mode_supported;
        int bond_status = 0;
        int num_slaves = 0;
+       int changed = 0;
        int idx;
 
        if (!netif_is_lag_master(upper))
@@ -653,27 +654,27 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
         */
        is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
 
-       if (!mlx5_lag_is_ready(ldev) && is_in_lag) {
-               NL_SET_ERR_MSG_MOD(info->info.extack,
-                                  "Can't activate LAG offload, PF is configured with more than 64 VFs");
-               return 0;
-       }
-
        /* Lag mode must be activebackup or hash. */
        mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
                         tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
 
-       if (is_in_lag && !mode_supported)
-               NL_SET_ERR_MSG_MOD(info->info.extack,
-                                  "Can't activate LAG offload, TX type isn't supported");
-
        is_bonded = is_in_lag && mode_supported;
        if (tracker->is_bonded != is_bonded) {
                tracker->is_bonded = is_bonded;
-               return 1;
+               changed = 1;
        }
 
-       return 0;
+       if (!is_in_lag)
+               return changed;
+
+       if (!mlx5_lag_is_ready(ldev))
+               NL_SET_ERR_MSG_MOD(info->info.extack,
+                                  "Can't activate LAG offload, PF is configured with more than 64 VFs");
+       else if (!mode_supported)
+               NL_SET_ERR_MSG_MOD(info->info.extack,
+                                  "Can't activate LAG offload, TX type isn't supported");
+
+       return changed;
 }
 
 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
@@ -716,9 +717,6 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
 
        ldev    = container_of(this, struct mlx5_lag, nb);
 
-       if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE)
-               return NOTIFY_DONE;
-
        tracker = ldev->tracker;
 
        switch (event) {
index 49089cb..8cbd36c 100644 (file)
@@ -135,25 +135,14 @@ static void dr_domain_fill_uplink_caps(struct mlx5dr_domain *dmn,
 
 static int dr_domain_query_vport(struct mlx5dr_domain *dmn,
                                 u16 vport_number,
+                                bool other_vport,
                                 struct mlx5dr_cmd_vport_cap *vport_caps)
 {
-       u16 cmd_vport = vport_number;
-       bool other_vport = true;
        int ret;
 
-       if (vport_number == MLX5_VPORT_UPLINK) {
-               dr_domain_fill_uplink_caps(dmn, vport_caps);
-               return 0;
-       }
-
-       if (dmn->info.caps.is_ecpf && vport_number == MLX5_VPORT_ECPF) {
-               other_vport = false;
-               cmd_vport = 0;
-       }
-
        ret = mlx5dr_cmd_query_esw_vport_context(dmn->mdev,
                                                 other_vport,
-                                                cmd_vport,
+                                                vport_number,
                                                 &vport_caps->icm_address_rx,
                                                 &vport_caps->icm_address_tx);
        if (ret)
@@ -161,7 +150,7 @@ static int dr_domain_query_vport(struct mlx5dr_domain *dmn,
 
        ret = mlx5dr_cmd_query_gvmi(dmn->mdev,
                                    other_vport,
-                                   cmd_vport,
+                                   vport_number,
                                    &vport_caps->vport_gvmi);
        if (ret)
                return ret;
@@ -176,9 +165,15 @@ static int dr_domain_query_esw_mngr(struct mlx5dr_domain *dmn)
 {
        return dr_domain_query_vport(dmn,
                                     dmn->info.caps.is_ecpf ? MLX5_VPORT_ECPF : 0,
+                                    false,
                                     &dmn->info.caps.vports.esw_manager_caps);
 }
 
+static void dr_domain_query_uplink(struct mlx5dr_domain *dmn)
+{
+       dr_domain_fill_uplink_caps(dmn, &dmn->info.caps.vports.uplink_caps);
+}
+
 static struct mlx5dr_cmd_vport_cap *
 dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport)
 {
@@ -190,7 +185,7 @@ dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport)
        if (!vport_caps)
                return NULL;
 
-       ret = dr_domain_query_vport(dmn, vport, vport_caps);
+       ret = dr_domain_query_vport(dmn, vport, true, vport_caps);
        if (ret) {
                kvfree(vport_caps);
                return NULL;
@@ -207,16 +202,26 @@ dr_domain_add_vport_cap(struct mlx5dr_domain *dmn, u16 vport)
        return vport_caps;
 }
 
+static bool dr_domain_is_esw_mgr_vport(struct mlx5dr_domain *dmn, u16 vport)
+{
+       struct mlx5dr_cmd_caps *caps = &dmn->info.caps;
+
+       return (caps->is_ecpf && vport == MLX5_VPORT_ECPF) ||
+              (!caps->is_ecpf && vport == 0);
+}
+
 struct mlx5dr_cmd_vport_cap *
 mlx5dr_domain_get_vport_cap(struct mlx5dr_domain *dmn, u16 vport)
 {
        struct mlx5dr_cmd_caps *caps = &dmn->info.caps;
        struct mlx5dr_cmd_vport_cap *vport_caps;
 
-       if ((caps->is_ecpf && vport == MLX5_VPORT_ECPF) ||
-           (!caps->is_ecpf && vport == 0))
+       if (dr_domain_is_esw_mgr_vport(dmn, vport))
                return &caps->vports.esw_manager_caps;
 
+       if (vport == MLX5_VPORT_UPLINK)
+               return &caps->vports.uplink_caps;
+
 vport_load:
        vport_caps = xa_load(&caps->vports.vports_caps_xa, vport);
        if (vport_caps)
@@ -241,17 +246,6 @@ static void dr_domain_clear_vports(struct mlx5dr_domain *dmn)
        }
 }
 
-static int dr_domain_query_uplink(struct mlx5dr_domain *dmn)
-{
-       struct mlx5dr_cmd_vport_cap *vport_caps;
-
-       vport_caps = mlx5dr_domain_get_vport_cap(dmn, MLX5_VPORT_UPLINK);
-       if (!vport_caps)
-               return -EINVAL;
-
-       return 0;
-}
-
 static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev,
                                    struct mlx5dr_domain *dmn)
 {
@@ -281,11 +275,7 @@ static int dr_domain_query_fdb_caps(struct mlx5_core_dev *mdev,
                goto free_vports_caps_xa;
        }
 
-       ret = dr_domain_query_uplink(dmn);
-       if (ret) {
-               mlx5dr_err(dmn, "Failed to query uplink vport caps (err: %d)", ret);
-               goto free_vports_caps_xa;
-       }
+       dr_domain_query_uplink(dmn);
 
        return 0;
 
index 75c775b..7933652 100644 (file)
@@ -924,11 +924,12 @@ static int dr_matcher_init(struct mlx5dr_matcher *matcher,
 
        /* Check that all mask data was consumed */
        for (i = 0; i < consumed_mask.match_sz; i++) {
-               if (consumed_mask.match_buf[i]) {
-                       mlx5dr_dbg(dmn, "Match param mask contains unsupported parameters\n");
-                       ret = -EOPNOTSUPP;
-                       goto free_consumed_mask;
-               }
+               if (!((u8 *)consumed_mask.match_buf)[i])
+                       continue;
+
+               mlx5dr_dbg(dmn, "Match param mask contains unsupported parameters\n");
+               ret = -EOPNOTSUPP;
+               goto free_consumed_mask;
        }
 
        ret =  0;
index 3028b77..2333c24 100644 (file)
@@ -764,6 +764,7 @@ struct mlx5dr_roce_cap {
 
 struct mlx5dr_vports {
        struct mlx5dr_cmd_vport_cap esw_manager_caps;
+       struct mlx5dr_cmd_vport_cap uplink_caps;
        struct xarray vports_caps_xa;
 };
 
index 5925db3..03e5bad 100644 (file)
@@ -2153,7 +2153,7 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg,
        max_ports = mlxsw_core_max_ports(mlxsw_sp->core);
        local_port = mlxsw_reg_pude_local_port_get(pude_pl);
 
-       if (WARN_ON_ONCE(local_port >= max_ports))
+       if (WARN_ON_ONCE(!local_port || local_port >= max_ports))
                return;
        mlxsw_sp_port = mlxsw_sp->ports[local_port];
        if (!mlxsw_sp_port)
@@ -3290,10 +3290,10 @@ mlxsw_sp_resources_rif_mac_profile_register(struct mlxsw_core *mlxsw_core)
        u8 max_rif_mac_profiles;
 
        if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_RIF_MAC_PROFILES))
-               return -EIO;
-
-       max_rif_mac_profiles = MLXSW_CORE_RES_GET(mlxsw_core,
-                                                 MAX_RIF_MAC_PROFILES);
+               max_rif_mac_profiles = 1;
+       else
+               max_rif_mac_profiles = MLXSW_CORE_RES_GET(mlxsw_core,
+                                                         MAX_RIF_MAC_PROFILES);
        devlink_resource_size_params_init(&size_params, max_rif_mac_profiles,
                                          max_rif_mac_profiles, 1,
                                          DEVLINK_RESOURCE_UNIT_ENTRY);
index df20373..0b1865e 100644 (file)
@@ -565,7 +565,6 @@ struct nfp_net_dp {
  * @exn_name:           Name for Exception interrupt
  * @shared_handler:     Handler for shared interrupts
  * @shared_name:        Name for shared interrupt
- * @me_freq_mhz:        ME clock_freq (MHz)
  * @reconfig_lock:     Protects @reconfig_posted, @reconfig_timer_active,
  *                     @reconfig_sync_present and HW reconfiguration request
  *                     regs/machinery from async requests (sync must take
@@ -650,8 +649,6 @@ struct nfp_net {
        irq_handler_t shared_handler;
        char shared_name[IFNAMSIZ + 8];
 
-       u32 me_freq_mhz;
-
        bool link_up;
        spinlock_t link_status_lock;
 
index 1de076f..cf78829 100644 (file)
@@ -1344,7 +1344,7 @@ static int nfp_net_set_coalesce(struct net_device *netdev,
         * ME timestamp ticks.  There are 16 ME clock cycles for each timestamp
         * count.
         */
-       factor = nn->me_freq_mhz / 16;
+       factor = nn->tlv_caps.me_freq_mhz / 16;
 
        /* Each pair of (usecs, max_frames) fields specifies that interrupts
         * should be coalesced until
index cfeb762..07a00dd 100644 (file)
@@ -1209,7 +1209,7 @@ static void *nixge_get_nvmem_address(struct device *dev)
 
        cell = nvmem_cell_get(dev, "address");
        if (IS_ERR(cell))
-               return NULL;
+               return cell;
 
        mac = nvmem_cell_read(cell, &cell_size);
        nvmem_cell_put(cell);
@@ -1282,7 +1282,7 @@ static int nixge_probe(struct platform_device *pdev)
        ndev->max_mtu = NIXGE_JUMBO_MTU;
 
        mac_addr = nixge_get_nvmem_address(&pdev->dev);
-       if (mac_addr && is_valid_ether_addr(mac_addr)) {
+       if (!IS_ERR(mac_addr) && is_valid_ether_addr(mac_addr)) {
                eth_hw_addr_set(ndev, mac_addr);
                kfree(mac_addr);
        } else {
index bbe21db..86c44bc 100644 (file)
@@ -5217,8 +5217,8 @@ static int rtl_get_ether_clk(struct rtl8169_private *tp)
 
 static void rtl_init_mac_address(struct rtl8169_private *tp)
 {
+       u8 mac_addr[ETH_ALEN] __aligned(2) = {};
        struct net_device *dev = tp->dev;
-       u8 mac_addr[ETH_ALEN];
        int rc;
 
        rc = eth_platform_get_mac_address(tp_to_dev(tp), mac_addr);
@@ -5233,7 +5233,8 @@ static void rtl_init_mac_address(struct rtl8169_private *tp)
        if (is_valid_ether_addr(mac_addr))
                goto done;
 
-       eth_hw_addr_random(dev);
+       eth_random_addr(mac_addr);
+       dev->addr_assign_type = NET_ADDR_RANDOM;
        dev_warn(tp_to_dev(tp), "can't read MAC address, setting random one\n");
 done:
        eth_hw_addr_set(dev, mac_addr);
index 43eead7..5f12973 100644 (file)
@@ -314,6 +314,7 @@ int stmmac_mdio_reset(struct mii_bus *mii);
 int stmmac_xpcs_setup(struct mii_bus *mii);
 void stmmac_set_ethtool_ops(struct net_device *netdev);
 
+int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags);
 void stmmac_ptp_register(struct stmmac_priv *priv);
 void stmmac_ptp_unregister(struct stmmac_priv *priv);
 int stmmac_open(struct net_device *dev);
index d3f350c..f12097c 100644 (file)
 #include "dwxgmac2.h"
 #include "hwif.h"
 
+/* As long as the interface is active, we keep the timestamping counter enabled
+ * with fine resolution and binary rollover. This avoid non-monotonic behavior
+ * (clock jumps) when changing timestamping settings at runtime.
+ */
+#define STMMAC_HWTS_ACTIVE     (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | \
+                                PTP_TCR_TSCTRLSSR)
+
 #define        STMMAC_ALIGN(x)         ALIGN(ALIGN(x, SMP_CACHE_BYTES), 16)
 #define        TSO_MAX_BUFF_SIZE       (SZ_16K - 1)
 
@@ -511,6 +518,14 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
        return true;
 }
 
+static inline u32 stmmac_cdc_adjust(struct stmmac_priv *priv)
+{
+       /* Correct the clk domain crossing(CDC) error */
+       if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate)
+               return (2 * NSEC_PER_SEC) / priv->plat->clk_ptp_rate;
+       return 0;
+}
+
 /* stmmac_get_tx_hwtstamp - get HW TX timestamps
  * @priv: driver private structure
  * @p : descriptor pointer
@@ -524,7 +539,6 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
 {
        struct skb_shared_hwtstamps shhwtstamp;
        bool found = false;
-       s64 adjust = 0;
        u64 ns = 0;
 
        if (!priv->hwts_tx_en)
@@ -543,12 +557,7 @@ static void stmmac_get_tx_hwtstamp(struct stmmac_priv *priv,
        }
 
        if (found) {
-               /* Correct the clk domain crossing(CDC) error */
-               if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) {
-                       adjust += -(2 * (NSEC_PER_SEC /
-                                        priv->plat->clk_ptp_rate));
-                       ns += adjust;
-               }
+               ns -= stmmac_cdc_adjust(priv);
 
                memset(&shhwtstamp, 0, sizeof(struct skb_shared_hwtstamps));
                shhwtstamp.hwtstamp = ns_to_ktime(ns);
@@ -573,7 +582,6 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
 {
        struct skb_shared_hwtstamps *shhwtstamp = NULL;
        struct dma_desc *desc = p;
-       u64 adjust = 0;
        u64 ns = 0;
 
        if (!priv->hwts_rx_en)
@@ -586,11 +594,7 @@ static void stmmac_get_rx_hwtstamp(struct stmmac_priv *priv, struct dma_desc *p,
        if (stmmac_get_rx_timestamp_status(priv, p, np, priv->adv_ts)) {
                stmmac_get_timestamp(priv, desc, priv->adv_ts, &ns);
 
-               /* Correct the clk domain crossing(CDC) error */
-               if (priv->plat->has_gmac4 && priv->plat->clk_ptp_rate) {
-                       adjust += 2 * (NSEC_PER_SEC / priv->plat->clk_ptp_rate);
-                       ns -= adjust;
-               }
+               ns -= stmmac_cdc_adjust(priv);
 
                netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
                shhwtstamp = skb_hwtstamps(skb);
@@ -616,8 +620,6 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
        struct hwtstamp_config config;
-       struct timespec64 now;
-       u64 temp = 0;
        u32 ptp_v2 = 0;
        u32 tstamp_all = 0;
        u32 ptp_over_ipv4_udp = 0;
@@ -626,11 +628,6 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
        u32 snap_type_sel = 0;
        u32 ts_master_en = 0;
        u32 ts_event_en = 0;
-       u32 sec_inc = 0;
-       u32 value = 0;
-       bool xmac;
-
-       xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
 
        if (!(priv->dma_cap.time_stamp || priv->adv_ts)) {
                netdev_alert(priv->dev, "No support for HW time stamping\n");
@@ -792,42 +789,17 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
        priv->hwts_rx_en = ((config.rx_filter == HWTSTAMP_FILTER_NONE) ? 0 : 1);
        priv->hwts_tx_en = config.tx_type == HWTSTAMP_TX_ON;
 
-       if (!priv->hwts_tx_en && !priv->hwts_rx_en)
-               stmmac_config_hw_tstamping(priv, priv->ptpaddr, 0);
-       else {
-               value = (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | PTP_TCR_TSCTRLSSR |
-                        tstamp_all | ptp_v2 | ptp_over_ethernet |
-                        ptp_over_ipv6_udp | ptp_over_ipv4_udp | ts_event_en |
-                        ts_master_en | snap_type_sel);
-               stmmac_config_hw_tstamping(priv, priv->ptpaddr, value);
-
-               /* program Sub Second Increment reg */
-               stmmac_config_sub_second_increment(priv,
-                               priv->ptpaddr, priv->plat->clk_ptp_rate,
-                               xmac, &sec_inc);
-               temp = div_u64(1000000000ULL, sec_inc);
-
-               /* Store sub second increment and flags for later use */
-               priv->sub_second_inc = sec_inc;
-               priv->systime_flags = value;
-
-               /* calculate default added value:
-                * formula is :
-                * addend = (2^32)/freq_div_ratio;
-                * where, freq_div_ratio = 1e9ns/sec_inc
-                */
-               temp = (u64)(temp << 32);
-               priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate);
-               stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend);
-
-               /* initialize system time */
-               ktime_get_real_ts64(&now);
+       priv->systime_flags = STMMAC_HWTS_ACTIVE;
 
-               /* lower 32 bits of tv_sec are safe until y2106 */
-               stmmac_init_systime(priv, priv->ptpaddr,
-                               (u32)now.tv_sec, now.tv_nsec);
+       if (priv->hwts_tx_en || priv->hwts_rx_en) {
+               priv->systime_flags |= tstamp_all | ptp_v2 |
+                                      ptp_over_ethernet | ptp_over_ipv6_udp |
+                                      ptp_over_ipv4_udp | ts_event_en |
+                                      ts_master_en | snap_type_sel;
        }
 
+       stmmac_config_hw_tstamping(priv, priv->ptpaddr, priv->systime_flags);
+
        memcpy(&priv->tstamp_config, &config, sizeof(config));
 
        return copy_to_user(ifr->ifr_data, &config,
@@ -856,6 +828,66 @@ static int stmmac_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
 }
 
 /**
+ * stmmac_init_tstamp_counter - init hardware timestamping counter
+ * @priv: driver private structure
+ * @systime_flags: timestamping flags
+ * Description:
+ * Initialize hardware counter for packet timestamping.
+ * This is valid as long as the interface is open and not suspended.
+ * Will be rerun after resuming from suspend, case in which the timestamping
+ * flags updated by stmmac_hwtstamp_set() also need to be restored.
+ */
+int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags)
+{
+       bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
+       struct timespec64 now;
+       u32 sec_inc = 0;
+       u64 temp = 0;
+       int ret;
+
+       if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp))
+               return -EOPNOTSUPP;
+
+       ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+       if (ret < 0) {
+               netdev_warn(priv->dev,
+                           "failed to enable PTP reference clock: %pe\n",
+                           ERR_PTR(ret));
+               return ret;
+       }
+
+       stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags);
+       priv->systime_flags = systime_flags;
+
+       /* program Sub Second Increment reg */
+       stmmac_config_sub_second_increment(priv, priv->ptpaddr,
+                                          priv->plat->clk_ptp_rate,
+                                          xmac, &sec_inc);
+       temp = div_u64(1000000000ULL, sec_inc);
+
+       /* Store sub second increment for later use */
+       priv->sub_second_inc = sec_inc;
+
+       /* calculate default added value:
+        * formula is :
+        * addend = (2^32)/freq_div_ratio;
+        * where, freq_div_ratio = 1e9ns/sec_inc
+        */
+       temp = (u64)(temp << 32);
+       priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate);
+       stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend);
+
+       /* initialize system time */
+       ktime_get_real_ts64(&now);
+
+       /* lower 32 bits of tv_sec are safe until y2106 */
+       stmmac_init_systime(priv, priv->ptpaddr, (u32)now.tv_sec, now.tv_nsec);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(stmmac_init_tstamp_counter);
+
+/**
  * stmmac_init_ptp - init PTP
  * @priv: driver private structure
  * Description: this is to verify if the HW supports the PTPv1 or PTPv2.
@@ -865,9 +897,11 @@ static int stmmac_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
 static int stmmac_init_ptp(struct stmmac_priv *priv)
 {
        bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac;
+       int ret;
 
-       if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp))
-               return -EOPNOTSUPP;
+       ret = stmmac_init_tstamp_counter(priv, STMMAC_HWTS_ACTIVE);
+       if (ret)
+               return ret;
 
        priv->adv_ts = 0;
        /* Check if adv_ts can be enabled for dwmac 4.x / xgmac core */
@@ -3275,10 +3309,6 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
        stmmac_mmc_setup(priv);
 
        if (init_ptp) {
-               ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
-               if (ret < 0)
-                       netdev_warn(priv->dev, "failed to enable PTP reference clock: %d\n", ret);
-
                ret = stmmac_init_ptp(priv);
                if (ret == -EOPNOTSUPP)
                        netdev_warn(priv->dev, "PTP not supported by HW\n");
@@ -5164,12 +5194,13 @@ read_again:
                if (likely(!(status & rx_not_ls)) &&
                    (likely(priv->synopsys_id >= DWMAC_CORE_4_00) ||
                     unlikely(status != llc_snap))) {
-                       if (buf2_len)
+                       if (buf2_len) {
                                buf2_len -= ETH_FCS_LEN;
-                       else
+                               len -= ETH_FCS_LEN;
+                       } else if (buf1_len) {
                                buf1_len -= ETH_FCS_LEN;
-
-                       len -= ETH_FCS_LEN;
+                               len -= ETH_FCS_LEN;
+                       }
                }
 
                if (!skb) {
index 232ac98..5d29f33 100644 (file)
@@ -816,7 +816,7 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev)
                if (ret)
                        return ret;
 
-               clk_prepare_enable(priv->plat->clk_ptp_ref);
+               stmmac_init_tstamp_counter(priv, priv->systime_flags);
        }
 
        return 0;
index e2b332b..7da2bb8 100644 (file)
@@ -31,6 +31,8 @@
 
 #define AX_MTU         236
 
+/* some arch define END as assembly function ending, just undef it */
+#undef END
 /* SLIP/KISS protocol characters. */
 #define END             0300           /* indicates end of frame       */
 #define ESC             0333           /* indicates byte stuffing      */
index cff5173..d57472e 100644 (file)
@@ -661,22 +661,6 @@ void ipa_cmd_pipeline_clear_wait(struct ipa *ipa)
        wait_for_completion(&ipa->completion);
 }
 
-void ipa_cmd_pipeline_clear(struct ipa *ipa)
-{
-       u32 count = ipa_cmd_pipeline_clear_count();
-       struct gsi_trans *trans;
-
-       trans = ipa_cmd_trans_alloc(ipa, count);
-       if (trans) {
-               ipa_cmd_pipeline_clear_add(trans);
-               gsi_trans_commit_wait(trans);
-               ipa_cmd_pipeline_clear_wait(ipa);
-       } else {
-               dev_err(&ipa->pdev->dev,
-                       "error allocating %u entry tag transaction\n", count);
-       }
-}
-
 static struct ipa_cmd_info *
 ipa_cmd_info_alloc(struct ipa_endpoint *endpoint, u32 tre_count)
 {
index 69cd085..05ed7e4 100644 (file)
@@ -164,12 +164,6 @@ u32 ipa_cmd_pipeline_clear_count(void);
 void ipa_cmd_pipeline_clear_wait(struct ipa *ipa);
 
 /**
- * ipa_cmd_pipeline_clear() - Clear the hardware pipeline
- * @ipa:       - IPA pointer
- */
-void ipa_cmd_pipeline_clear(struct ipa *ipa);
-
-/**
  * ipa_cmd_trans_alloc() - Allocate a transaction for the command TX endpoint
  * @ipa:       IPA pointer
  * @tre_count: Number of elements in the transaction
index ef790fd..03a1709 100644 (file)
@@ -1636,8 +1636,6 @@ void ipa_endpoint_suspend(struct ipa *ipa)
        if (ipa->modem_netdev)
                ipa_modem_suspend(ipa->modem_netdev);
 
-       ipa_cmd_pipeline_clear(ipa);
-
        ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]);
        ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]);
 }
index cdfa98a..a448ec1 100644 (file)
@@ -28,6 +28,7 @@
 #include "ipa_reg.h"
 #include "ipa_mem.h"
 #include "ipa_table.h"
+#include "ipa_smp2p.h"
 #include "ipa_modem.h"
 #include "ipa_uc.h"
 #include "ipa_interrupt.h"
@@ -801,6 +802,11 @@ static int ipa_remove(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        int ret;
 
+       /* Prevent the modem from triggering a call to ipa_setup().  This
+        * also ensures a modem-initiated setup that's underway completes.
+        */
+       ipa_smp2p_irq_disable_setup(ipa);
+
        ret = pm_runtime_get_sync(dev);
        if (WARN_ON(ret < 0))
                goto out_power_put;
index ad116bc..d0ab4d7 100644 (file)
@@ -339,9 +339,6 @@ int ipa_modem_stop(struct ipa *ipa)
        if (state != IPA_MODEM_STATE_RUNNING)
                return -EBUSY;
 
-       /* Prevent the modem from triggering a call to ipa_setup() */
-       ipa_smp2p_disable(ipa);
-
        /* Clean up the netdev and endpoints if it was started */
        if (netdev) {
                struct ipa_priv *priv = netdev_priv(netdev);
@@ -369,6 +366,9 @@ static void ipa_modem_crashed(struct ipa *ipa)
        struct device *dev = &ipa->pdev->dev;
        int ret;
 
+       /* Prevent the modem from triggering a call to ipa_setup() */
+       ipa_smp2p_irq_disable_setup(ipa);
+
        ret = pm_runtime_get_sync(dev);
        if (ret < 0) {
                dev_err(dev, "error %d getting power to handle crash\n", ret);
index df7639c..2112336 100644 (file)
@@ -53,7 +53,7 @@
  * @setup_ready_irq:   IPA interrupt triggered by modem to signal GSI ready
  * @power_on:          Whether IPA power is on
  * @notified:          Whether modem has been notified of power state
- * @disabled:          Whether setup ready interrupt handling is disabled
+ * @setup_disabled:    Whether setup ready interrupt handler is disabled
  * @mutex:             Mutex protecting ready-interrupt/shutdown interlock
  * @panic_notifier:    Panic notifier structure
 */
@@ -67,7 +67,7 @@ struct ipa_smp2p {
        u32 setup_ready_irq;
        bool power_on;
        bool notified;
-       bool disabled;
+       bool setup_disabled;
        struct mutex mutex;
        struct notifier_block panic_notifier;
 };
@@ -155,11 +155,9 @@ static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id)
        struct device *dev;
        int ret;
 
-       mutex_lock(&smp2p->mutex);
-
-       if (smp2p->disabled)
-               goto out_mutex_unlock;
-       smp2p->disabled = true;         /* If any others arrive, ignore them */
+       /* Ignore any (spurious) interrupts received after the first */
+       if (smp2p->ipa->setup_complete)
+               return IRQ_HANDLED;
 
        /* Power needs to be active for setup */
        dev = &smp2p->ipa->pdev->dev;
@@ -176,8 +174,6 @@ static irqreturn_t ipa_smp2p_modem_setup_ready_isr(int irq, void *dev_id)
 out_power_put:
        pm_runtime_mark_last_busy(dev);
        (void)pm_runtime_put_autosuspend(dev);
-out_mutex_unlock:
-       mutex_unlock(&smp2p->mutex);
 
        return IRQ_HANDLED;
 }
@@ -313,7 +309,7 @@ void ipa_smp2p_exit(struct ipa *ipa)
        kfree(smp2p);
 }
 
-void ipa_smp2p_disable(struct ipa *ipa)
+void ipa_smp2p_irq_disable_setup(struct ipa *ipa)
 {
        struct ipa_smp2p *smp2p = ipa->smp2p;
 
@@ -322,7 +318,10 @@ void ipa_smp2p_disable(struct ipa *ipa)
 
        mutex_lock(&smp2p->mutex);
 
-       smp2p->disabled = true;
+       if (!smp2p->setup_disabled) {
+               disable_irq(smp2p->setup_ready_irq);
+               smp2p->setup_disabled = true;
+       }
 
        mutex_unlock(&smp2p->mutex);
 }
index 99a9567..59cee31 100644 (file)
@@ -27,13 +27,12 @@ int ipa_smp2p_init(struct ipa *ipa, bool modem_init);
 void ipa_smp2p_exit(struct ipa *ipa);
 
 /**
- * ipa_smp2p_disable() - Prevent "ipa-setup-ready" interrupt handling
+ * ipa_smp2p_irq_disable_setup() - Disable the "setup ready" interrupt
  * @ipa:       IPA pointer
  *
- * Prevent handling of the "setup ready" interrupt from the modem.
- * This is used before initiating shutdown of the driver.
+ * Disable the "ipa-setup-ready" interrupt from the modem.
  */
-void ipa_smp2p_disable(struct ipa *ipa);
+void ipa_smp2p_irq_disable_setup(struct ipa *ipa);
 
 /**
  * ipa_smp2p_notify_reset() - Reset modem notification state
index c420e59..3d7f88b 100644 (file)
@@ -40,6 +40,8 @@
                                           insmod -oslip_maxdev=nnn     */
 #define SL_MTU         296             /* 296; I am used to 600- FvK   */
 
+/* some arch define END as assembly function ending, just undef it */
+#undef END
 /* SLIP protocol characters. */
 #define END             0300           /* indicates end of frame       */
 #define ESC             0333           /* indicates byte stuffing      */
index 4a02f33..f9877a3 100644 (file)
@@ -9603,12 +9603,9 @@ static int rtl8152_probe(struct usb_interface *intf,
                netdev->hw_features &= ~NETIF_F_RXCSUM;
        }
 
-       if (le16_to_cpu(udev->descriptor.idVendor) == VENDOR_ID_LENOVO) {
-               switch (le16_to_cpu(udev->descriptor.idProduct)) {
-               case DEVICE_ID_THINKPAD_THUNDERBOLT3_DOCK_GEN2:
-               case DEVICE_ID_THINKPAD_USB_C_DOCK_GEN2:
-                       tp->lenovo_macpassthru = 1;
-               }
+       if (udev->parent &&
+                       le16_to_cpu(udev->parent->descriptor.idVendor) == VENDOR_ID_LENOVO) {
+               tp->lenovo_macpassthru = 1;
        }
 
        if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x3011 && udev->serial &&
index 20fe4cd..abe0149 100644 (file)
@@ -1050,6 +1050,14 @@ static const struct net_device_ops smsc95xx_netdev_ops = {
        .ndo_set_features       = smsc95xx_set_features,
 };
 
+static void smsc95xx_handle_link_change(struct net_device *net)
+{
+       struct usbnet *dev = netdev_priv(net);
+
+       phy_print_status(net->phydev);
+       usbnet_defer_kevent(dev, EVENT_LINK_CHANGE);
+}
+
 static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
 {
        struct smsc95xx_priv *pdata;
@@ -1154,6 +1162,17 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf)
        dev->net->min_mtu = ETH_MIN_MTU;
        dev->net->max_mtu = ETH_DATA_LEN;
        dev->hard_mtu = dev->net->mtu + dev->net->hard_header_len;
+
+       ret = phy_connect_direct(dev->net, pdata->phydev,
+                                &smsc95xx_handle_link_change,
+                                PHY_INTERFACE_MODE_MII);
+       if (ret) {
+               netdev_err(dev->net, "can't attach PHY to %s\n", pdata->mdiobus->id);
+               goto unregister_mdio;
+       }
+
+       phy_attached_info(dev->net->phydev);
+
        return 0;
 
 unregister_mdio:
@@ -1171,47 +1190,25 @@ static void smsc95xx_unbind(struct usbnet *dev, struct usb_interface *intf)
 {
        struct smsc95xx_priv *pdata = dev->driver_priv;
 
+       phy_disconnect(dev->net->phydev);
        mdiobus_unregister(pdata->mdiobus);
        mdiobus_free(pdata->mdiobus);
        netif_dbg(dev, ifdown, dev->net, "free pdata\n");
        kfree(pdata);
 }
 
-static void smsc95xx_handle_link_change(struct net_device *net)
-{
-       struct usbnet *dev = netdev_priv(net);
-
-       phy_print_status(net->phydev);
-       usbnet_defer_kevent(dev, EVENT_LINK_CHANGE);
-}
-
 static int smsc95xx_start_phy(struct usbnet *dev)
 {
-       struct smsc95xx_priv *pdata = dev->driver_priv;
-       struct net_device *net = dev->net;
-       int ret;
+       phy_start(dev->net->phydev);
 
-       ret = smsc95xx_reset(dev);
-       if (ret < 0)
-               return ret;
-
-       ret = phy_connect_direct(net, pdata->phydev,
-                                &smsc95xx_handle_link_change,
-                                PHY_INTERFACE_MODE_MII);
-       if (ret) {
-               netdev_err(net, "can't attach PHY to %s\n", pdata->mdiobus->id);
-               return ret;
-       }
-
-       phy_attached_info(net->phydev);
-       phy_start(net->phydev);
        return 0;
 }
 
-static int smsc95xx_disconnect_phy(struct usbnet *dev)
+static int smsc95xx_stop(struct usbnet *dev)
 {
-       phy_stop(dev->net->phydev);
-       phy_disconnect(dev->net->phydev);
+       if (dev->net->phydev)
+               phy_stop(dev->net->phydev);
+
        return 0;
 }
 
@@ -1966,7 +1963,7 @@ static const struct driver_info smsc95xx_info = {
        .unbind         = smsc95xx_unbind,
        .link_reset     = smsc95xx_link_reset,
        .reset          = smsc95xx_start_phy,
-       .stop           = smsc95xx_disconnect_phy,
+       .stop           = smsc95xx_stop,
        .rx_fixup       = smsc95xx_rx_fixup,
        .tx_fixup       = smsc95xx_tx_fixup,
        .status         = smsc95xx_status,
index 32be5a0..b10f015 100644 (file)
@@ -161,9 +161,10 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
                 * if it is then we are done, unless there is an
                 * interrupt-map which takes precedence.
                 */
+               bool intc = of_property_read_bool(ipar, "interrupt-controller");
+
                imap = of_get_property(ipar, "interrupt-map", &imaplen);
-               if (imap == NULL &&
-                   of_property_read_bool(ipar, "interrupt-controller")) {
+               if (imap == NULL && intc) {
                        pr_debug(" -> got it !\n");
                        return 0;
                }
@@ -244,8 +245,20 @@ int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq)
 
                        pr_debug(" -> imaplen=%d\n", imaplen);
                }
-               if (!match)
+               if (!match) {
+                       if (intc) {
+                               /*
+                                * The PASEMI Nemo is a known offender, so
+                                * let's only warn for anyone else.
+                                */
+                               WARN(!IS_ENABLED(CONFIG_PPC_PASEMI),
+                                    "%pOF interrupt-map failed, using interrupt-controller\n",
+                                    ipar);
+                               return 0;
+                       }
+
                        goto fail;
+               }
 
                /*
                 * Successfully parsed an interrrupt-map translation; copy new
index 07813fb..b3faf89 100644 (file)
@@ -76,6 +76,7 @@ static void of_device_make_bus_id(struct device *dev)
        struct device_node *node = dev->of_node;
        const __be32 *reg;
        u64 addr;
+       u32 mask;
 
        /* Construct the name, using parent nodes if necessary to ensure uniqueness */
        while (node->parent) {
@@ -85,8 +86,13 @@ static void of_device_make_bus_id(struct device *dev)
                 */
                reg = of_get_property(node, "reg", NULL);
                if (reg && (addr = of_translate_address(node, reg)) != OF_BAD_ADDR) {
-                       dev_set_name(dev, dev_name(dev) ? "%llx.%pOFn:%s" : "%llx.%pOFn",
-                                    addr, node, dev_name(dev));
+                       if (!of_property_read_u32(node, "mask", &mask))
+                               dev_set_name(dev, dev_name(dev) ? "%llx.%x.%pOFn:%s" : "%llx.%x.%pOFn",
+                                            addr, ffs(mask) - 1, node, dev_name(dev));
+
+                       else
+                               dev_set_name(dev, dev_name(dev) ? "%llx.%pOFn:%s" : "%llx.%pOFn",
+                                            addr, node, dev_name(dev));
                        return;
                }
 
index e917bb3..93b1411 100644 (file)
@@ -270,7 +270,8 @@ config VMD
 
 config PCIE_BRCMSTB
        tristate "Broadcom Brcmstb PCIe host controller"
-       depends on ARCH_BRCMSTB || ARCH_BCM2835 || ARCH_BCM4908 || COMPILE_TEST
+       depends on ARCH_BRCMSTB || ARCH_BCM2835 || ARCH_BCM4908 || \
+                  BMIPS_GENERIC || COMPILE_TEST
        depends on OF
        depends on PCI_MSI_IRQ_DOMAIN
        default ARCH_BRCMSTB
index dcefdb4..a89b7de 100644 (file)
@@ -57,6 +57,29 @@ static int disable_slot(struct hotplug_slot *hotplug_slot)
        return zpci_deconfigure_device(zdev);
 }
 
+static int reset_slot(struct hotplug_slot *hotplug_slot, bool probe)
+{
+       struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev,
+                                            hotplug_slot);
+
+       if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+               return -EIO;
+       /*
+        * We can't take the zdev->lock as reset_slot may be called during
+        * probing and/or device removal which already happens under the
+        * zdev->lock. Instead the user should use the higher level
+        * pci_reset_function() or pci_bus_reset() which hold the PCI device
+        * lock preventing concurrent removal. If not using these functions
+        * holding the PCI device lock is required.
+        */
+
+       /* As long as the function is configured we can reset */
+       if (probe)
+               return 0;
+
+       return zpci_hot_reset_device(zdev);
+}
+
 static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
        struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev,
@@ -76,6 +99,7 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 static const struct hotplug_slot_ops s390_hotplug_slot_ops = {
        .enable_slot =          enable_slot,
        .disable_slot =         disable_slot,
+       .reset_slot =           reset_slot,
        .get_power_status =     get_power_status,
        .get_adapter_status =   get_adapter_status,
 };
index 12e296d..48e3f4e 100644 (file)
@@ -148,6 +148,9 @@ static noinline void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 s
        raw_spinlock_t *lock = &desc->dev->msi_lock;
        unsigned long flags;
 
+       if (!desc->msi_attrib.can_mask)
+               return;
+
        raw_spin_lock_irqsave(lock, flags);
        desc->msi_mask &= ~clear;
        desc->msi_mask |= set;
@@ -181,7 +184,8 @@ static void pci_msix_write_vector_ctrl(struct msi_desc *desc, u32 ctrl)
 {
        void __iomem *desc_addr = pci_msix_desc_addr(desc);
 
-       writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
+       if (desc->msi_attrib.can_mask)
+               writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
 }
 
 static inline void pci_msix_mask(struct msi_desc *desc)
@@ -200,23 +204,17 @@ static inline void pci_msix_unmask(struct msi_desc *desc)
 
 static void __pci_msi_mask_desc(struct msi_desc *desc, u32 mask)
 {
-       if (pci_msi_ignore_mask || desc->msi_attrib.is_virtual)
-               return;
-
        if (desc->msi_attrib.is_msix)
                pci_msix_mask(desc);
-       else if (desc->msi_attrib.maskbit)
+       else
                pci_msi_mask(desc, mask);
 }
 
 static void __pci_msi_unmask_desc(struct msi_desc *desc, u32 mask)
 {
-       if (pci_msi_ignore_mask || desc->msi_attrib.is_virtual)
-               return;
-
        if (desc->msi_attrib.is_msix)
                pci_msix_unmask(desc);
-       else if (desc->msi_attrib.maskbit)
+       else
                pci_msi_unmask(desc, mask);
 }
 
@@ -370,6 +368,11 @@ static void free_msi_irqs(struct pci_dev *dev)
                        for (i = 0; i < entry->nvec_used; i++)
                                BUG_ON(irq_has_action(entry->irq + i));
 
+       if (dev->msi_irq_groups) {
+               msi_destroy_sysfs(&dev->dev, dev->msi_irq_groups);
+               dev->msi_irq_groups = NULL;
+       }
+
        pci_msi_teardown_msi_irqs(dev);
 
        list_for_each_entry_safe(entry, tmp, msi_list, list) {
@@ -381,11 +384,6 @@ static void free_msi_irqs(struct pci_dev *dev)
                list_del(&entry->list);
                free_msi_entry(entry);
        }
-
-       if (dev->msi_irq_groups) {
-               msi_destroy_sysfs(&dev->dev, dev->msi_irq_groups);
-               dev->msi_irq_groups = NULL;
-       }
 }
 
 static void pci_intx_for_msi(struct pci_dev *dev, int enable)
@@ -479,12 +477,16 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
                goto out;
 
        pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
+       /* Lies, damned lies, and MSIs */
+       if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING)
+               control |= PCI_MSI_FLAGS_MASKBIT;
 
        entry->msi_attrib.is_msix       = 0;
        entry->msi_attrib.is_64         = !!(control & PCI_MSI_FLAGS_64BIT);
        entry->msi_attrib.is_virtual    = 0;
        entry->msi_attrib.entry_nr      = 0;
-       entry->msi_attrib.maskbit       = !!(control & PCI_MSI_FLAGS_MASKBIT);
+       entry->msi_attrib.can_mask      = !pci_msi_ignore_mask &&
+                                         !!(control & PCI_MSI_FLAGS_MASKBIT);
        entry->msi_attrib.default_irq   = dev->irq;     /* Save IOAPIC IRQ */
        entry->msi_attrib.multi_cap     = (control & PCI_MSI_FLAGS_QMASK) >> 1;
        entry->msi_attrib.multiple      = ilog2(__roundup_pow_of_two(nvec));
@@ -495,7 +497,7 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
                entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
 
        /* Save the initial mask status */
-       if (entry->msi_attrib.maskbit)
+       if (entry->msi_attrib.can_mask)
                pci_read_config_dword(dev, entry->mask_pos, &entry->msi_mask);
 
 out:
@@ -639,10 +641,13 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
                entry->msi_attrib.is_virtual =
                        entry->msi_attrib.entry_nr >= vec_count;
 
+               entry->msi_attrib.can_mask      = !pci_msi_ignore_mask &&
+                                                 !entry->msi_attrib.is_virtual;
+
                entry->msi_attrib.default_irq   = dev->irq;
                entry->mask_base                = base;
 
-               if (!entry->msi_attrib.is_virtual) {
+               if (entry->msi_attrib.can_mask) {
                        addr = pci_msix_desc_addr(entry);
                        entry->msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
                }
index 1579a37..3d2fb39 100644 (file)
@@ -5106,12 +5106,13 @@ static int pci_reset_bus_function(struct pci_dev *dev, bool probe)
        return pci_parent_bus_reset(dev, probe);
 }
 
-static void pci_dev_lock(struct pci_dev *dev)
+void pci_dev_lock(struct pci_dev *dev)
 {
        pci_cfg_access_lock(dev);
        /* block PM suspend, driver probe, etc. */
        device_lock(&dev->dev);
 }
+EXPORT_SYMBOL_GPL(pci_dev_lock);
 
 /* Return 1 on successful lock, 0 on contention */
 int pci_dev_trylock(struct pci_dev *dev)
index aedb78c..003950c 100644 (file)
@@ -5851,3 +5851,9 @@ DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_PERICOM, 0x2303,
                         pci_fixup_pericom_acs_store_forward);
 DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_PERICOM, 0x2303,
                         pci_fixup_pericom_acs_store_forward);
+
+static void nvidia_ion_ahci_fixup(struct pci_dev *pdev)
+{
+       pdev->dev_flags |= PCI_DEV_FLAGS_HAS_MSI_MASKING;
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0ab8, nvidia_ion_ahci_fixup);
index 34f943c..0f1b5a7 100644 (file)
@@ -1304,10 +1304,11 @@ ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r)
        if (!ext)
                return -ENOMEM;
 
-       err = -EINVAL;
        ext->mem = ptp_ocp_get_mem(bp, r);
-       if (!ext->mem)
+       if (IS_ERR(ext->mem)) {
+               err = PTR_ERR(ext->mem);
                goto out;
+       }
 
        ext->bp = bp;
        ext->info = r->extra;
@@ -1371,8 +1372,8 @@ ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r)
        void __iomem *mem;
 
        mem = ptp_ocp_get_mem(bp, r);
-       if (!mem)
-               return -EINVAL;
+       if (IS_ERR(mem))
+               return PTR_ERR(mem);
 
        bp_assign_entry(bp, r, mem);
 
index 1f5fab6..f7e75d9 100644 (file)
@@ -53,7 +53,6 @@ int
 tape_std_assign(struct tape_device *device)
 {
        int                  rc;
-       struct timer_list    timeout;
        struct tape_request *request;
 
        request = tape_alloc_request(2, 11);
@@ -70,7 +69,7 @@ tape_std_assign(struct tape_device *device)
         * So we set up a timeout for this call.
         */
        timer_setup(&request->timer, tape_std_assign_timeout, 0);
-       mod_timer(&timeout, jiffies + 2 * HZ);
+       mod_timer(&request->timer, jiffies + msecs_to_jiffies(2000));
 
        rc = tape_do_io_interruptible(device, request);
 
index 2bc55cc..ce9e751 100644 (file)
@@ -437,8 +437,8 @@ static ssize_t dev_busid_show(struct device *dev,
        struct subchannel *sch = to_subchannel(dev);
        struct pmcw *pmcw = &sch->schib.pmcw;
 
-       if ((pmcw->st == SUBCHANNEL_TYPE_IO ||
-            pmcw->st == SUBCHANNEL_TYPE_MSG) && pmcw->dnv)
+       if ((pmcw->st == SUBCHANNEL_TYPE_IO && pmcw->dnv) ||
+           (pmcw->st == SUBCHANNEL_TYPE_MSG && pmcw->w))
                return sysfs_emit(buf, "0.%x.%04x\n", sch->schid.ssid,
                                  pmcw->dev);
        else
index bd0fbcd..e24e220 100644 (file)
@@ -834,8 +834,10 @@ static int __init maple_bus_init(void)
 
        maple_queue_cache = KMEM_CACHE(maple_buffer, SLAB_HWCACHE_ALIGN);
 
-       if (!maple_queue_cache)
+       if (!maple_queue_cache) {
+               retval = -ENOMEM;
                goto cleanup_bothirqs;
+       }
 
        INIT_LIST_HEAD(&maple_waitq);
        INIT_LIST_HEAD(&maple_sentq);
@@ -848,6 +850,7 @@ static int __init maple_bus_init(void)
                if (!mdev[i]) {
                        while (i-- > 0)
                                maple_free_dev(mdev[i]);
+                       retval = -ENOMEM;
                        goto cleanup_cache;
                }
                baseunits[i] = mdev[i];
index 1b45116..40496e9 100644 (file)
@@ -332,13 +332,13 @@ static u8 sticon_build_attr(struct vc_data *conp, u8 color,
                            bool blink, bool underline, bool reverse,
                            bool italic)
 {
-    u8 attr = ((color & 0x70) >> 1) | ((color & 7));
+       u8 fg = color & 7;
+       u8 bg = (color & 0x70) >> 4;
 
-    if (reverse) {
-       color = ((color >> 3) & 0x7) | ((color & 0x7) << 3);
-    }
-
-    return attr;
+       if (reverse)
+               return (fg << 3) | bg;
+       else
+               return (bg << 3) | fg;
 }
 
 static void sticon_invert_region(struct vc_data *conp, u16 *p, int count)
index 0da0af2..96e5a87 100644 (file)
@@ -2889,6 +2889,7 @@ static unsigned int virtio_mem_features[] = {
 #if defined(CONFIG_NUMA) && defined(CONFIG_ACPI_NUMA)
        VIRTIO_MEM_F_ACPI_PXM,
 #endif
+       VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE,
 };
 
 static const struct virtio_device_id virtio_mem_id_table[] = {
index adafdf8..fac918c 100644 (file)
@@ -108,7 +108,9 @@ static const struct netfs_read_request_ops v9fs_req_ops = {
  */
 static int v9fs_vfs_readpage(struct file *file, struct page *page)
 {
-       return netfs_readpage(file, page, &v9fs_req_ops, NULL);
+       struct folio *folio = page_folio(page);
+
+       return netfs_readpage(file, folio, &v9fs_req_ops, NULL);
 }
 
 /**
@@ -130,13 +132,15 @@ static void v9fs_vfs_readahead(struct readahead_control *ractl)
 
 static int v9fs_release_page(struct page *page, gfp_t gfp)
 {
-       if (PagePrivate(page))
+       struct folio *folio = page_folio(page);
+
+       if (folio_test_private(folio))
                return 0;
 #ifdef CONFIG_9P_FSCACHE
-       if (PageFsCache(page)) {
+       if (folio_test_fscache(folio)) {
                if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS))
                        return 0;
-               wait_on_page_fscache(page);
+               folio_wait_fscache(folio);
        }
 #endif
        return 1;
@@ -152,55 +156,58 @@ static int v9fs_release_page(struct page *page, gfp_t gfp)
 static void v9fs_invalidate_page(struct page *page, unsigned int offset,
                                 unsigned int length)
 {
-       wait_on_page_fscache(page);
+       struct folio *folio = page_folio(page);
+
+       folio_wait_fscache(folio);
 }
 
-static int v9fs_vfs_writepage_locked(struct page *page)
+static int v9fs_vfs_write_folio_locked(struct folio *folio)
 {
-       struct inode *inode = page->mapping->host;
+       struct inode *inode = folio_inode(folio);
        struct v9fs_inode *v9inode = V9FS_I(inode);
-       loff_t start = page_offset(page);
-       loff_t size = i_size_read(inode);
+       loff_t start = folio_pos(folio);
+       loff_t i_size = i_size_read(inode);
        struct iov_iter from;
-       int err, len;
+       size_t len = folio_size(folio);
+       int err;
+
+       if (start >= i_size)
+               return 0; /* Simultaneous truncation occurred */
 
-       if (page->index == size >> PAGE_SHIFT)
-               len = size & ~PAGE_MASK;
-       else
-               len = PAGE_SIZE;
+       len = min_t(loff_t, i_size - start, len);
 
-       iov_iter_xarray(&from, WRITE, &page->mapping->i_pages, start, len);
+       iov_iter_xarray(&from, WRITE, &folio_mapping(folio)->i_pages, start, len);
 
        /* We should have writeback_fid always set */
        BUG_ON(!v9inode->writeback_fid);
 
-       set_page_writeback(page);
+       folio_start_writeback(folio);
 
        p9_client_write(v9inode->writeback_fid, start, &from, &err);
 
-       end_page_writeback(page);
+       folio_end_writeback(folio);
        return err;
 }
 
 static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
 {
+       struct folio *folio = page_folio(page);
        int retval;
 
-       p9_debug(P9_DEBUG_VFS, "page %p\n", page);
+       p9_debug(P9_DEBUG_VFS, "folio %p\n", folio);
 
-       retval = v9fs_vfs_writepage_locked(page);
+       retval = v9fs_vfs_write_folio_locked(folio);
        if (retval < 0) {
                if (retval == -EAGAIN) {
-                       redirty_page_for_writepage(wbc, page);
+                       folio_redirty_for_writepage(wbc, folio);
                        retval = 0;
                } else {
-                       SetPageError(page);
-                       mapping_set_error(page->mapping, retval);
+                       mapping_set_error(folio_mapping(folio), retval);
                }
        } else
                retval = 0;
 
-       unlock_page(page);
+       folio_unlock(folio);
        return retval;
 }
 
@@ -213,14 +220,15 @@ static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc)
 
 static int v9fs_launder_page(struct page *page)
 {
+       struct folio *folio = page_folio(page);
        int retval;
 
-       if (clear_page_dirty_for_io(page)) {
-               retval = v9fs_vfs_writepage_locked(page);
+       if (folio_clear_dirty_for_io(folio)) {
+               retval = v9fs_vfs_write_folio_locked(folio);
                if (retval)
                        return retval;
        }
-       wait_on_page_fscache(page);
+       folio_wait_fscache(folio);
        return 0;
 }
 
@@ -265,10 +273,10 @@ v9fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
 static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
                            loff_t pos, unsigned int len, unsigned int flags,
-                           struct page **pagep, void **fsdata)
+                           struct page **subpagep, void **fsdata)
 {
        int retval;
-       struct page *page;
+       struct folio *folio;
        struct v9fs_inode *v9inode = V9FS_I(mapping->host);
 
        p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
@@ -279,31 +287,32 @@ static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
         * file.  We need to do this before we get a lock on the page in case
         * there's more than one writer competing for the same cache block.
         */
-       retval = netfs_write_begin(filp, mapping, pos, len, flags, &page, fsdata,
+       retval = netfs_write_begin(filp, mapping, pos, len, flags, &folio, fsdata,
                                   &v9fs_req_ops, NULL);
        if (retval < 0)
                return retval;
 
-       *pagep = find_subpage(page, pos / PAGE_SIZE);
+       *subpagep = &folio->page;
        return retval;
 }
 
 static int v9fs_write_end(struct file *filp, struct address_space *mapping,
                          loff_t pos, unsigned int len, unsigned int copied,
-                         struct page *page, void *fsdata)
+                         struct page *subpage, void *fsdata)
 {
        loff_t last_pos = pos + copied;
-       struct inode *inode = page->mapping->host;
+       struct folio *folio = page_folio(subpage);
+       struct inode *inode = mapping->host;
 
        p9_debug(P9_DEBUG_VFS, "filp %p, mapping %p\n", filp, mapping);
 
-       if (!PageUptodate(page)) {
+       if (!folio_test_uptodate(folio)) {
                if (unlikely(copied < len)) {
                        copied = 0;
                        goto out;
                }
 
-               SetPageUptodate(page);
+               folio_mark_uptodate(folio);
        }
 
        /*
@@ -314,10 +323,10 @@ static int v9fs_write_end(struct file *filp, struct address_space *mapping,
                inode_add_bytes(inode, last_pos - inode->i_size);
                i_size_write(inode, last_pos);
        }
-       set_page_dirty(page);
+       folio_mark_dirty(folio);
 out:
-       unlock_page(page);
-       put_page(page);
+       folio_unlock(folio);
+       folio_put(folio);
 
        return copied;
 }
index 4244d48..612e297 100644 (file)
@@ -528,13 +528,13 @@ static vm_fault_t
 v9fs_vm_page_mkwrite(struct vm_fault *vmf)
 {
        struct v9fs_inode *v9inode;
-       struct page *page = vmf->page;
+       struct folio *folio = page_folio(vmf->page);
        struct file *filp = vmf->vma->vm_file;
        struct inode *inode = file_inode(filp);
 
 
-       p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n",
-                page, (unsigned long)filp->private_data);
+       p9_debug(P9_DEBUG_VFS, "folio %p fid %lx\n",
+                folio, (unsigned long)filp->private_data);
 
        v9inode = V9FS_I(inode);
 
@@ -542,24 +542,24 @@ v9fs_vm_page_mkwrite(struct vm_fault *vmf)
         * be modified.  We then assume the entire page will need writing back.
         */
 #ifdef CONFIG_9P_FSCACHE
-       if (PageFsCache(page) &&
-           wait_on_page_fscache_killable(page) < 0)
-               return VM_FAULT_RETRY;
+       if (folio_test_fscache(folio) &&
+           folio_wait_fscache_killable(folio) < 0)
+               return VM_FAULT_NOPAGE;
 #endif
 
        /* Update file times before taking page lock */
        file_update_time(filp);
 
        BUG_ON(!v9inode->writeback_fid);
-       if (lock_page_killable(page) < 0)
+       if (folio_lock_killable(folio) < 0)
                return VM_FAULT_RETRY;
-       if (page->mapping != inode->i_mapping)
+       if (folio_mapping(folio) != inode->i_mapping)
                goto out_unlock;
-       wait_for_stable_page(page);
+       folio_wait_stable(folio);
 
        return VM_FAULT_LOCKED;
 out_unlock:
-       unlock_page(page);
+       folio_unlock(folio);
        return VM_FAULT_NOPAGE;
 }
 
index 4579bbd..da9b4f8 100644 (file)
@@ -103,13 +103,13 @@ struct afs_lookup_cookie {
 };
 
 /*
- * Drop the refs that we're holding on the pages we were reading into.  We've
+ * Drop the refs that we're holding on the folios we were reading into.  We've
  * got refs on the first nr_pages pages.
  */
 static void afs_dir_read_cleanup(struct afs_read *req)
 {
        struct address_space *mapping = req->vnode->vfs_inode.i_mapping;
-       struct page *page;
+       struct folio *folio;
        pgoff_t last = req->nr_pages - 1;
 
        XA_STATE(xas, &mapping->i_pages, 0);
@@ -118,65 +118,56 @@ static void afs_dir_read_cleanup(struct afs_read *req)
                return;
 
        rcu_read_lock();
-       xas_for_each(&xas, page, last) {
-               if (xas_retry(&xas, page))
+       xas_for_each(&xas, folio, last) {
+               if (xas_retry(&xas, folio))
                        continue;
-               BUG_ON(xa_is_value(page));
-               BUG_ON(PageCompound(page));
-               ASSERTCMP(page->mapping, ==, mapping);
+               BUG_ON(xa_is_value(folio));
+               ASSERTCMP(folio_file_mapping(folio), ==, mapping);
 
-               put_page(page);
+               folio_put(folio);
        }
 
        rcu_read_unlock();
 }
 
 /*
- * check that a directory page is valid
+ * check that a directory folio is valid
  */
-static bool afs_dir_check_page(struct afs_vnode *dvnode, struct page *page,
-                              loff_t i_size)
+static bool afs_dir_check_folio(struct afs_vnode *dvnode, struct folio *folio,
+                               loff_t i_size)
 {
-       struct afs_xdr_dir_page *dbuf;
-       loff_t latter, off;
-       int tmp, qty;
+       union afs_xdr_dir_block *block;
+       size_t offset, size;
+       loff_t pos;
 
-       /* Determine how many magic numbers there should be in this page, but
+       /* Determine how many magic numbers there should be in this folio, but
         * we must take care because the directory may change size under us.
         */
-       off = page_offset(page);
-       if (i_size <= off)
+       pos = folio_pos(folio);
+       if (i_size <= pos)
                goto checked;
 
-       latter = i_size - off;
-       if (latter >= PAGE_SIZE)
-               qty = PAGE_SIZE;
-       else
-               qty = latter;
-       qty /= sizeof(union afs_xdr_dir_block);
-
-       /* check them */
-       dbuf = kmap_atomic(page);
-       for (tmp = 0; tmp < qty; tmp++) {
-               if (dbuf->blocks[tmp].hdr.magic != AFS_DIR_MAGIC) {
-                       printk("kAFS: %s(%lx): bad magic %d/%d is %04hx\n",
-                              __func__, dvnode->vfs_inode.i_ino, tmp, qty,
-                              ntohs(dbuf->blocks[tmp].hdr.magic));
-                       trace_afs_dir_check_failed(dvnode, off, i_size);
-                       kunmap(page);
+       size = min_t(loff_t, folio_size(folio), i_size - pos);
+       for (offset = 0; offset < size; offset += sizeof(*block)) {
+               block = kmap_local_folio(folio, offset);
+               if (block->hdr.magic != AFS_DIR_MAGIC) {
+                       printk("kAFS: %s(%lx): [%llx] bad magic %zx/%zx is %04hx\n",
+                              __func__, dvnode->vfs_inode.i_ino,
+                              pos, offset, size, ntohs(block->hdr.magic));
+                       trace_afs_dir_check_failed(dvnode, pos + offset, i_size);
+                       kunmap_local(block);
                        trace_afs_file_error(dvnode, -EIO, afs_file_error_dir_bad_magic);
                        goto error;
                }
 
                /* Make sure each block is NUL terminated so we can reasonably
-                * use string functions on it.  The filenames in the page
+                * use string functions on it.  The filenames in the folio
                 * *should* be NUL-terminated anyway.
                 */
-               ((u8 *)&dbuf->blocks[tmp])[AFS_DIR_BLOCK_SIZE - 1] = 0;
-       }
-
-       kunmap_atomic(dbuf);
+               ((u8 *)block)[AFS_DIR_BLOCK_SIZE - 1] = 0;
 
+               kunmap_local(block);
+       }
 checked:
        afs_stat_v(dvnode, n_read_dir);
        return true;
@@ -190,11 +181,11 @@ error:
  */
 static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
 {
-       struct afs_xdr_dir_page *dbuf;
+       union afs_xdr_dir_block *block;
        struct address_space *mapping = dvnode->vfs_inode.i_mapping;
-       struct page *page;
-       unsigned int i, qty = PAGE_SIZE / sizeof(union afs_xdr_dir_block);
+       struct folio *folio;
        pgoff_t last = req->nr_pages - 1;
+       size_t offset, size;
 
        XA_STATE(xas, &mapping->i_pages, 0);
 
@@ -205,30 +196,28 @@ static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
                req->pos, req->nr_pages,
                req->iter->iov_offset,  iov_iter_count(req->iter));
 
-       xas_for_each(&xas, page, last) {
-               if (xas_retry(&xas, page))
+       xas_for_each(&xas, folio, last) {
+               if (xas_retry(&xas, folio))
                        continue;
 
-               BUG_ON(PageCompound(page));
-               BUG_ON(page->mapping != mapping);
-
-               dbuf = kmap_atomic(page);
-               for (i = 0; i < qty; i++) {
-                       union afs_xdr_dir_block *block = &dbuf->blocks[i];
+               BUG_ON(folio_file_mapping(folio) != mapping);
 
-                       pr_warn("[%02lx] %32phN\n", page->index * qty + i, block);
+               size = min_t(loff_t, folio_size(folio), req->actual_len - folio_pos(folio));
+               for (offset = 0; offset < size; offset += sizeof(*block)) {
+                       block = kmap_local_folio(folio, offset);
+                       pr_warn("[%02lx] %32phN\n", folio_index(folio) + offset, block);
+                       kunmap_local(block);
                }
-               kunmap_atomic(dbuf);
        }
 }
 
 /*
- * Check all the pages in a directory.  All the pages are held pinned.
+ * Check all the blocks in a directory.  All the folios are held pinned.
  */
 static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
 {
        struct address_space *mapping = dvnode->vfs_inode.i_mapping;
-       struct page *page;
+       struct folio *folio;
        pgoff_t last = req->nr_pages - 1;
        int ret = 0;
 
@@ -238,14 +227,13 @@ static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
                return 0;
 
        rcu_read_lock();
-       xas_for_each(&xas, page, last) {
-               if (xas_retry(&xas, page))
+       xas_for_each(&xas, folio, last) {
+               if (xas_retry(&xas, folio))
                        continue;
 
-               BUG_ON(PageCompound(page));
-               BUG_ON(page->mapping != mapping);
+               BUG_ON(folio_file_mapping(folio) != mapping);
 
-               if (!afs_dir_check_page(dvnode, page, req->file_size)) {
+               if (!afs_dir_check_folio(dvnode, folio, req->actual_len)) {
                        afs_dir_dump(dvnode, req);
                        ret = -EIO;
                        break;
@@ -274,15 +262,16 @@ static int afs_dir_open(struct inode *inode, struct file *file)
 
 /*
  * Read the directory into the pagecache in one go, scrubbing the previous
- * contents.  The list of pages is returned, pinning them so that they don't
+ * contents.  The list of folios is returned, pinning them so that they don't
  * get reclaimed during the iteration.
  */
 static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
        __acquires(&dvnode->validate_lock)
 {
+       struct address_space *mapping = dvnode->vfs_inode.i_mapping;
        struct afs_read *req;
        loff_t i_size;
-       int nr_pages, i, n;
+       int nr_pages, i;
        int ret;
 
        _enter("");
@@ -320,43 +309,30 @@ expand:
        req->iter = &req->def_iter;
 
        /* Fill in any gaps that we might find where the memory reclaimer has
-        * been at work and pin all the pages.  If there are any gaps, we will
+        * been at work and pin all the folios.  If there are any gaps, we will
         * need to reread the entire directory contents.
         */
        i = req->nr_pages;
        while (i < nr_pages) {
-               struct page *pages[8], *page;
-
-               n = find_get_pages_contig(dvnode->vfs_inode.i_mapping, i,
-                                         min_t(unsigned int, nr_pages - i,
-                                               ARRAY_SIZE(pages)),
-                                         pages);
-               _debug("find %u at %u/%u", n, i, nr_pages);
-
-               if (n == 0) {
-                       gfp_t gfp = dvnode->vfs_inode.i_mapping->gfp_mask;
+               struct folio *folio;
 
+               folio = filemap_get_folio(mapping, i);
+               if (!folio) {
                        if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
                                afs_stat_v(dvnode, n_inval);
 
                        ret = -ENOMEM;
-                       page = __page_cache_alloc(gfp);
-                       if (!page)
+                       folio = __filemap_get_folio(mapping,
+                                                   i, FGP_LOCK | FGP_CREAT,
+                                                   mapping->gfp_mask);
+                       if (!folio)
                                goto error;
-                       ret = add_to_page_cache_lru(page,
-                                                   dvnode->vfs_inode.i_mapping,
-                                                   i, gfp);
-                       if (ret < 0)
-                               goto error;
-
-                       attach_page_private(page, (void *)1);
-                       unlock_page(page);
-                       req->nr_pages++;
-                       i++;
-               } else {
-                       req->nr_pages += n;
-                       i += n;
+                       folio_attach_private(folio, (void *)1);
+                       folio_unlock(folio);
                }
+
+               req->nr_pages += folio_nr_pages(folio);
+               i += folio_nr_pages(folio);
        }
 
        /* If we're going to reload, we need to lock all the pages to prevent
@@ -424,7 +400,7 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
        size_t nlen;
        int tmp;
 
-       _enter("%u,%x,%p,,",(unsigned)ctx->pos,blkoff,block);
+       _enter("%llx,%x", ctx->pos, blkoff);
 
        curr = (ctx->pos - blkoff) / sizeof(union afs_xdr_dirent);
 
@@ -513,12 +489,10 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
                           struct key *key, afs_dataversion_t *_dir_version)
 {
        struct afs_vnode *dvnode = AFS_FS_I(dir);
-       struct afs_xdr_dir_page *dbuf;
        union afs_xdr_dir_block *dblock;
        struct afs_read *req;
-       struct page *page;
-       unsigned blkoff, limit;
-       void __rcu **slot;
+       struct folio *folio;
+       unsigned offset, size;
        int ret;
 
        _enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos);
@@ -540,43 +514,30 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
        /* walk through the blocks in sequence */
        ret = 0;
        while (ctx->pos < req->actual_len) {
-               blkoff = ctx->pos & ~(sizeof(union afs_xdr_dir_block) - 1);
-
-               /* Fetch the appropriate page from the directory and re-add it
+               /* Fetch the appropriate folio from the directory and re-add it
                 * to the LRU.  We have all the pages pinned with an extra ref.
                 */
-               rcu_read_lock();
-               page = NULL;
-               slot = radix_tree_lookup_slot(&dvnode->vfs_inode.i_mapping->i_pages,
-                                             blkoff / PAGE_SIZE);
-               if (slot)
-                       page = radix_tree_deref_slot(slot);
-               rcu_read_unlock();
-               if (!page) {
+               folio = __filemap_get_folio(dir->i_mapping, ctx->pos / PAGE_SIZE,
+                                           FGP_ACCESSED, 0);
+               if (!folio) {
                        ret = afs_bad(dvnode, afs_file_error_dir_missing_page);
                        break;
                }
-               mark_page_accessed(page);
 
-               limit = blkoff & ~(PAGE_SIZE - 1);
+               offset = round_down(ctx->pos, sizeof(*dblock)) - folio_file_pos(folio);
+               size = min_t(loff_t, folio_size(folio),
+                            req->actual_len - folio_file_pos(folio));
 
-               dbuf = kmap(page);
-
-               /* deal with the individual blocks stashed on this page */
                do {
-                       dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) /
-                                              sizeof(union afs_xdr_dir_block)];
-                       ret = afs_dir_iterate_block(dvnode, ctx, dblock, blkoff);
-                       if (ret != 1) {
-                               kunmap(page);
+                       dblock = kmap_local_folio(folio, offset);
+                       ret = afs_dir_iterate_block(dvnode, ctx, dblock,
+                                                   folio_file_pos(folio) + offset);
+                       kunmap_local(dblock);
+                       if (ret != 1)
                                goto out;
-                       }
 
-                       blkoff += sizeof(union afs_xdr_dir_block);
+               } while (offset += sizeof(*dblock), offset < size);
 
-               } while (ctx->pos < dir->i_size && blkoff < limit);
-
-               kunmap(page);
                ret = 0;
        }
 
@@ -2037,42 +1998,42 @@ error:
 }
 
 /*
- * Release a directory page and clean up its private state if it's not busy
- * - return true if the page can now be released, false if not
+ * Release a directory folio and clean up its private state if it's not busy
+ * - return true if the folio can now be released, false if not
  */
-static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags)
+static int afs_dir_releasepage(struct page *subpage, gfp_t gfp_flags)
 {
-       struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host);
+       struct folio *folio = page_folio(subpage);
+       struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
 
-       _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index);
+       _enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, folio_index(folio));
 
-       detach_page_private(page);
+       folio_detach_private(folio);
 
        /* The directory will need reloading. */
        if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
                afs_stat_v(dvnode, n_relpg);
-       return 1;
+       return true;
 }
 
 /*
- * invalidate part or all of a page
- * - release a page and clean up its private data if offset is 0 (indicating
- *   the entire page)
+ * Invalidate part or all of a folio.
  */
-static void afs_dir_invalidatepage(struct page *page, unsigned int offset,
+static void afs_dir_invalidatepage(struct page *subpage, unsigned int offset,
                                   unsigned int length)
 {
-       struct afs_vnode *dvnode = AFS_FS_I(page->mapping->host);
+       struct folio *folio = page_folio(subpage);
+       struct afs_vnode *dvnode = AFS_FS_I(folio_inode(folio));
 
-       _enter("{%lu},%u,%u", page->index, offset, length);
+       _enter("{%lu},%u,%u", folio_index(folio), offset, length);
 
-       BUG_ON(!PageLocked(page));
+       BUG_ON(!folio_test_locked(folio));
 
        /* The directory will need reloading. */
        if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
                afs_stat_v(dvnode, n_inval);
 
-       /* we clean up only if the entire page is being invalidated */
-       if (offset == 0 && length == thp_size(page))
-               detach_page_private(page);
+       /* we clean up only if the entire folio is being invalidated */
+       if (offset == 0 && length == folio_size(folio))
+               folio_detach_private(folio);
 }
index 540b9fc..d98e109 100644 (file)
@@ -105,6 +105,25 @@ static void afs_clear_contig_bits(union afs_xdr_dir_block *block,
 }
 
 /*
+ * Get a new directory folio.
+ */
+static struct folio *afs_dir_get_folio(struct afs_vnode *vnode, pgoff_t index)
+{
+       struct address_space *mapping = vnode->vfs_inode.i_mapping;
+       struct folio *folio;
+
+       folio = __filemap_get_folio(mapping, index,
+                                   FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+                                   mapping->gfp_mask);
+       if (!folio)
+               clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+       else if (folio && !folio_test_private(folio))
+               folio_attach_private(folio, (void *)1);
+
+       return folio;
+}
+
+/*
  * Scan a directory block looking for a dirent of the right name.
  */
 static int afs_dir_scan_block(union afs_xdr_dir_block *block, struct qstr *name,
@@ -188,13 +207,11 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
                      enum afs_edit_dir_reason why)
 {
        union afs_xdr_dir_block *meta, *block;
-       struct afs_xdr_dir_page *meta_page, *dir_page;
        union afs_xdr_dirent *de;
-       struct page *page0, *page;
+       struct folio *folio0, *folio;
        unsigned int need_slots, nr_blocks, b;
        pgoff_t index;
        loff_t i_size;
-       gfp_t gfp;
        int slot;
 
        _enter(",,{%d,%s},", name->len, name->name);
@@ -206,10 +223,8 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
                return;
        }
 
-       gfp = vnode->vfs_inode.i_mapping->gfp_mask;
-       page0 = find_or_create_page(vnode->vfs_inode.i_mapping, 0, gfp);
-       if (!page0) {
-               clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+       folio0 = afs_dir_get_folio(vnode, 0);
+       if (!folio0) {
                _leave(" [fgp]");
                return;
        }
@@ -217,42 +232,35 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
        /* Work out how many slots we're going to need. */
        need_slots = afs_dir_calc_slots(name->len);
 
-       meta_page = kmap(page0);
-       meta = &meta_page->blocks[0];
+       meta = kmap_local_folio(folio0, 0);
        if (i_size == 0)
                goto new_directory;
        nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
 
-       /* Find a block that has sufficient slots available.  Each VM page
+       /* Find a block that has sufficient slots available.  Each folio
         * contains two or more directory blocks.
         */
        for (b = 0; b < nr_blocks + 1; b++) {
-               /* If the directory extended into a new page, then we need to
-                * tack a new page on the end.
+               /* If the directory extended into a new folio, then we need to
+                * tack a new folio on the end.
                 */
                index = b / AFS_DIR_BLOCKS_PER_PAGE;
-               if (index == 0) {
-                       page = page0;
-                       dir_page = meta_page;
-               } else {
-                       if (nr_blocks >= AFS_DIR_MAX_BLOCKS)
-                               goto error;
-                       gfp = vnode->vfs_inode.i_mapping->gfp_mask;
-                       page = find_or_create_page(vnode->vfs_inode.i_mapping,
-                                                  index, gfp);
-                       if (!page)
+               if (nr_blocks >= AFS_DIR_MAX_BLOCKS)
+                       goto error;
+               if (index >= folio_nr_pages(folio0)) {
+                       folio = afs_dir_get_folio(vnode, index);
+                       if (!folio)
                                goto error;
-                       if (!PagePrivate(page))
-                               attach_page_private(page, (void *)1);
-                       dir_page = kmap(page);
+               } else {
+                       folio = folio0;
                }
 
+               block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio));
+
                /* Abandon the edit if we got a callback break. */
                if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
                        goto invalidated;
 
-               block = &dir_page->blocks[b % AFS_DIR_BLOCKS_PER_PAGE];
-
                _debug("block %u: %2u %3u %u",
                       b,
                       (b < AFS_DIR_BLOCKS_WITH_CTR) ? meta->meta.alloc_ctrs[b] : 99,
@@ -266,7 +274,7 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
                        afs_set_i_size(vnode, (b + 1) * AFS_DIR_BLOCK_SIZE);
                }
 
-               /* Only lower dir pages have a counter in the header. */
+               /* Only lower dir blocks have a counter in the header. */
                if (b >= AFS_DIR_BLOCKS_WITH_CTR ||
                    meta->meta.alloc_ctrs[b] >= need_slots) {
                        /* We need to try and find one or more consecutive
@@ -279,10 +287,10 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
                        }
                }
 
-               if (page != page0) {
-                       unlock_page(page);
-                       kunmap(page);
-                       put_page(page);
+               kunmap_local(block);
+               if (folio != folio0) {
+                       folio_unlock(folio);
+                       folio_put(folio);
                }
        }
 
@@ -298,8 +306,8 @@ new_directory:
        i_size = AFS_DIR_BLOCK_SIZE;
        afs_set_i_size(vnode, i_size);
        slot = AFS_DIR_RESV_BLOCKS0;
-       page = page0;
-       block = meta;
+       folio = folio0;
+       block = kmap_local_folio(folio, 0);
        nr_blocks = 1;
        b = 0;
 
@@ -318,10 +326,10 @@ found_space:
 
        /* Adjust the bitmap. */
        afs_set_contig_bits(block, slot, need_slots);
-       if (page != page0) {
-               unlock_page(page);
-               kunmap(page);
-               put_page(page);
+       kunmap_local(block);
+       if (folio != folio0) {
+               folio_unlock(folio);
+               folio_put(folio);
        }
 
        /* Adjust the allocation counter. */
@@ -333,18 +341,19 @@ found_space:
        _debug("Insert %s in %u[%u]", name->name, b, slot);
 
 out_unmap:
-       unlock_page(page0);
-       kunmap(page0);
-       put_page(page0);
+       kunmap_local(meta);
+       folio_unlock(folio0);
+       folio_put(folio0);
        _leave("");
        return;
 
 invalidated:
        trace_afs_edit_dir(vnode, why, afs_edit_dir_create_inval, 0, 0, 0, 0, name->name);
        clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
-       if (page != page0) {
-               kunmap(page);
-               put_page(page);
+       kunmap_local(block);
+       if (folio != folio0) {
+               folio_unlock(folio);
+               folio_put(folio);
        }
        goto out_unmap;
 
@@ -364,10 +373,9 @@ error:
 void afs_edit_dir_remove(struct afs_vnode *vnode,
                         struct qstr *name, enum afs_edit_dir_reason why)
 {
-       struct afs_xdr_dir_page *meta_page, *dir_page;
        union afs_xdr_dir_block *meta, *block;
        union afs_xdr_dirent *de;
-       struct page *page0, *page;
+       struct folio *folio0, *folio;
        unsigned int need_slots, nr_blocks, b;
        pgoff_t index;
        loff_t i_size;
@@ -384,9 +392,8 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
        }
        nr_blocks = i_size / AFS_DIR_BLOCK_SIZE;
 
-       page0 = find_lock_page(vnode->vfs_inode.i_mapping, 0);
-       if (!page0) {
-               clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
+       folio0 = afs_dir_get_folio(vnode, 0);
+       if (!folio0) {
                _leave(" [fgp]");
                return;
        }
@@ -394,30 +401,27 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
        /* Work out how many slots we're going to discard. */
        need_slots = afs_dir_calc_slots(name->len);
 
-       meta_page = kmap(page0);
-       meta = &meta_page->blocks[0];
+       meta = kmap_local_folio(folio0, 0);
 
-       /* Find a page that has sufficient slots available.  Each VM page
+       /* Find a block that has sufficient slots available.  Each folio
         * contains two or more directory blocks.
         */
        for (b = 0; b < nr_blocks; b++) {
                index = b / AFS_DIR_BLOCKS_PER_PAGE;
-               if (index != 0) {
-                       page = find_lock_page(vnode->vfs_inode.i_mapping, index);
-                       if (!page)
+               if (index >= folio_nr_pages(folio0)) {
+                       folio = afs_dir_get_folio(vnode, index);
+                       if (!folio)
                                goto error;
-                       dir_page = kmap(page);
                } else {
-                       page = page0;
-                       dir_page = meta_page;
+                       folio = folio0;
                }
 
+               block = kmap_local_folio(folio, b * AFS_DIR_BLOCK_SIZE - folio_file_pos(folio));
+
                /* Abandon the edit if we got a callback break. */
                if (!test_bit(AFS_VNODE_DIR_VALID, &vnode->flags))
                        goto invalidated;
 
-               block = &dir_page->blocks[b % AFS_DIR_BLOCKS_PER_PAGE];
-
                if (b > AFS_DIR_BLOCKS_WITH_CTR ||
                    meta->meta.alloc_ctrs[b] <= AFS_DIR_SLOTS_PER_BLOCK - 1 - need_slots) {
                        slot = afs_dir_scan_block(block, name, b);
@@ -425,10 +429,10 @@ void afs_edit_dir_remove(struct afs_vnode *vnode,
                                goto found_dirent;
                }
 
-               if (page != page0) {
-                       unlock_page(page);
-                       kunmap(page);
-                       put_page(page);
+               kunmap_local(block);
+               if (folio != folio0) {
+                       folio_unlock(folio);
+                       folio_put(folio);
                }
        }
 
@@ -449,10 +453,10 @@ found_dirent:
 
        /* Adjust the bitmap. */
        afs_clear_contig_bits(block, slot, need_slots);
-       if (page != page0) {
-               unlock_page(page);
-               kunmap(page);
-               put_page(page);
+       kunmap_local(block);
+       if (folio != folio0) {
+               folio_unlock(folio);
+               folio_put(folio);
        }
 
        /* Adjust the allocation counter. */
@@ -464,9 +468,9 @@ found_dirent:
        _debug("Remove %s from %u[%u]", name->name, b, slot);
 
 out_unmap:
-       unlock_page(page0);
-       kunmap(page0);
-       put_page(page0);
+       kunmap_local(meta);
+       folio_unlock(folio0);
+       folio_put(folio0);
        _leave("");
        return;
 
@@ -474,10 +478,10 @@ invalidated:
        trace_afs_edit_dir(vnode, why, afs_edit_dir_delete_inval,
                           0, 0, 0, 0, name->name);
        clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
-       if (page != page0) {
-               unlock_page(page);
-               kunmap(page);
-               put_page(page);
+       kunmap_local(block);
+       if (folio != folio0) {
+               folio_unlock(folio);
+               folio_put(folio);
        }
        goto out_unmap;
 
index eb11d04..cb6ad61 100644 (file)
@@ -324,21 +324,24 @@ static int afs_symlink_readpage(struct file *file, struct page *page)
 {
        struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
        struct afs_read *fsreq;
+       struct folio *folio = page_folio(page);
        int ret;
 
        fsreq = afs_alloc_read(GFP_NOFS);
        if (!fsreq)
                return -ENOMEM;
 
-       fsreq->pos      = page->index * PAGE_SIZE;
-       fsreq->len      = PAGE_SIZE;
+       fsreq->pos      = folio_pos(folio);
+       fsreq->len      = folio_size(folio);
        fsreq->vnode    = vnode;
        fsreq->iter     = &fsreq->def_iter;
        iov_iter_xarray(&fsreq->def_iter, READ, &page->mapping->i_pages,
                        fsreq->pos, fsreq->len);
 
        ret = afs_fetch_data(fsreq->vnode, fsreq);
-       page_endio(page, false, ret);
+       if (ret == 0)
+               SetPageUptodate(page);
+       unlock_page(page);
        return ret;
 }
 
@@ -362,7 +365,7 @@ static int afs_begin_cache_operation(struct netfs_read_request *rreq)
 }
 
 static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
-                                struct page *page, void **_fsdata)
+                                struct folio *folio, void **_fsdata)
 {
        struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
 
@@ -385,7 +388,9 @@ const struct netfs_read_request_ops afs_req_ops = {
 
 static int afs_readpage(struct file *file, struct page *page)
 {
-       return netfs_readpage(file, page, &afs_req_ops, NULL);
+       struct folio *folio = page_folio(page);
+
+       return netfs_readpage(file, folio, &afs_req_ops, NULL);
 }
 
 static void afs_readahead(struct readahead_control *ractl)
@@ -397,29 +402,29 @@ static void afs_readahead(struct readahead_control *ractl)
  * Adjust the dirty region of the page on truncation or full invalidation,
  * getting rid of the markers altogether if the region is entirely invalidated.
  */
-static void afs_invalidate_dirty(struct page *page, unsigned int offset,
+static void afs_invalidate_dirty(struct folio *folio, unsigned int offset,
                                 unsigned int length)
 {
-       struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
+       struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
        unsigned long priv;
        unsigned int f, t, end = offset + length;
 
-       priv = page_private(page);
+       priv = (unsigned long)folio_get_private(folio);
 
        /* we clean up only if the entire page is being invalidated */
-       if (offset == 0 && length == thp_size(page))
+       if (offset == 0 && length == folio_size(folio))
                goto full_invalidate;
 
         /* If the page was dirtied by page_mkwrite(), the PTE stays writable
          * and we don't get another notification to tell us to expand it
          * again.
          */
-       if (afs_is_page_dirty_mmapped(priv))
+       if (afs_is_folio_dirty_mmapped(priv))
                return;
 
        /* We may need to shorten the dirty region */
-       f = afs_page_dirty_from(page, priv);
-       t = afs_page_dirty_to(page, priv);
+       f = afs_folio_dirty_from(folio, priv);
+       t = afs_folio_dirty_to(folio, priv);
 
        if (t <= offset || f >= end)
                return; /* Doesn't overlap */
@@ -437,17 +442,17 @@ static void afs_invalidate_dirty(struct page *page, unsigned int offset,
        if (f == t)
                goto undirty;
 
-       priv = afs_page_dirty(page, f, t);
-       set_page_private(page, priv);
-       trace_afs_page_dirty(vnode, tracepoint_string("trunc"), page);
+       priv = afs_folio_dirty(folio, f, t);
+       folio_change_private(folio, (void *)priv);
+       trace_afs_folio_dirty(vnode, tracepoint_string("trunc"), folio);
        return;
 
 undirty:
-       trace_afs_page_dirty(vnode, tracepoint_string("undirty"), page);
-       clear_page_dirty_for_io(page);
+       trace_afs_folio_dirty(vnode, tracepoint_string("undirty"), folio);
+       folio_clear_dirty_for_io(folio);
 full_invalidate:
-       trace_afs_page_dirty(vnode, tracepoint_string("inval"), page);
-       detach_page_private(page);
+       trace_afs_folio_dirty(vnode, tracepoint_string("inval"), folio);
+       folio_detach_private(folio);
 }
 
 /*
@@ -458,14 +463,16 @@ full_invalidate:
 static void afs_invalidatepage(struct page *page, unsigned int offset,
                               unsigned int length)
 {
-       _enter("{%lu},%u,%u", page->index, offset, length);
+       struct folio *folio = page_folio(page);
+
+       _enter("{%lu},%u,%u", folio_index(folio), offset, length);
 
        BUG_ON(!PageLocked(page));
 
        if (PagePrivate(page))
-               afs_invalidate_dirty(page, offset, length);
+               afs_invalidate_dirty(folio, offset, length);
 
-       wait_on_page_fscache(page);
+       folio_wait_fscache(folio);
        _leave("");
 }
 
@@ -475,30 +482,31 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
  */
 static int afs_releasepage(struct page *page, gfp_t gfp_flags)
 {
-       struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
+       struct folio *folio = page_folio(page);
+       struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
 
        _enter("{{%llx:%llu}[%lu],%lx},%x",
-              vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
+              vnode->fid.vid, vnode->fid.vnode, folio_index(folio), folio->flags,
               gfp_flags);
 
        /* deny if page is being written to the cache and the caller hasn't
         * elected to wait */
 #ifdef CONFIG_AFS_FSCACHE
-       if (PageFsCache(page)) {
+       if (folio_test_fscache(folio)) {
                if (!(gfp_flags & __GFP_DIRECT_RECLAIM) || !(gfp_flags & __GFP_FS))
                        return false;
-               wait_on_page_fscache(page);
+               folio_wait_fscache(folio);
        }
 #endif
 
-       if (PagePrivate(page)) {
-               trace_afs_page_dirty(vnode, tracepoint_string("rel"), page);
-               detach_page_private(page);
+       if (folio_test_private(folio)) {
+               trace_afs_folio_dirty(vnode, tracepoint_string("rel"), folio);
+               folio_detach_private(folio);
        }
 
-       /* indicate that the page can be released */
+       /* Indicate that the folio can be released */
        _leave(" = T");
-       return 1;
+       return true;
 }
 
 static void afs_add_open_mmap(struct afs_vnode *vnode)
index 9357c53..aa4c0d6 100644 (file)
@@ -876,59 +876,59 @@ struct afs_vnode_cache_aux {
 } __packed;
 
 /*
- * We use page->private to hold the amount of the page that we've written to,
+ * We use folio->private to hold the amount of the folio that we've written to,
  * splitting the field into two parts.  However, we need to represent a range
- * 0...PAGE_SIZE, so we reduce the resolution if the size of the page
+ * 0...FOLIO_SIZE, so we reduce the resolution if the size of the folio
  * exceeds what we can encode.
  */
 #ifdef CONFIG_64BIT
-#define __AFS_PAGE_PRIV_MASK   0x7fffffffUL
-#define __AFS_PAGE_PRIV_SHIFT  32
-#define __AFS_PAGE_PRIV_MMAPPED        0x80000000UL
+#define __AFS_FOLIO_PRIV_MASK          0x7fffffffUL
+#define __AFS_FOLIO_PRIV_SHIFT         32
+#define __AFS_FOLIO_PRIV_MMAPPED       0x80000000UL
 #else
-#define __AFS_PAGE_PRIV_MASK   0x7fffUL
-#define __AFS_PAGE_PRIV_SHIFT  16
-#define __AFS_PAGE_PRIV_MMAPPED        0x8000UL
+#define __AFS_FOLIO_PRIV_MASK          0x7fffUL
+#define __AFS_FOLIO_PRIV_SHIFT         16
+#define __AFS_FOLIO_PRIV_MMAPPED       0x8000UL
 #endif
 
-static inline unsigned int afs_page_dirty_resolution(struct page *page)
+static inline unsigned int afs_folio_dirty_resolution(struct folio *folio)
 {
-       int shift = thp_order(page) + PAGE_SHIFT - (__AFS_PAGE_PRIV_SHIFT - 1);
+       int shift = folio_shift(folio) - (__AFS_FOLIO_PRIV_SHIFT - 1);
        return (shift > 0) ? shift : 0;
 }
 
-static inline size_t afs_page_dirty_from(struct page *page, unsigned long priv)
+static inline size_t afs_folio_dirty_from(struct folio *folio, unsigned long priv)
 {
-       unsigned long x = priv & __AFS_PAGE_PRIV_MASK;
+       unsigned long x = priv & __AFS_FOLIO_PRIV_MASK;
 
        /* The lower bound is inclusive */
-       return x << afs_page_dirty_resolution(page);
+       return x << afs_folio_dirty_resolution(folio);
 }
 
-static inline size_t afs_page_dirty_to(struct page *page, unsigned long priv)
+static inline size_t afs_folio_dirty_to(struct folio *folio, unsigned long priv)
 {
-       unsigned long x = (priv >> __AFS_PAGE_PRIV_SHIFT) & __AFS_PAGE_PRIV_MASK;
+       unsigned long x = (priv >> __AFS_FOLIO_PRIV_SHIFT) & __AFS_FOLIO_PRIV_MASK;
 
        /* The upper bound is immediately beyond the region */
-       return (x + 1) << afs_page_dirty_resolution(page);
+       return (x + 1) << afs_folio_dirty_resolution(folio);
 }
 
-static inline unsigned long afs_page_dirty(struct page *page, size_t from, size_t to)
+static inline unsigned long afs_folio_dirty(struct folio *folio, size_t from, size_t to)
 {
-       unsigned int res = afs_page_dirty_resolution(page);
+       unsigned int res = afs_folio_dirty_resolution(folio);
        from >>= res;
        to = (to - 1) >> res;
-       return (to << __AFS_PAGE_PRIV_SHIFT) | from;
+       return (to << __AFS_FOLIO_PRIV_SHIFT) | from;
 }
 
-static inline unsigned long afs_page_dirty_mmapped(unsigned long priv)
+static inline unsigned long afs_folio_dirty_mmapped(unsigned long priv)
 {
-       return priv | __AFS_PAGE_PRIV_MMAPPED;
+       return priv | __AFS_FOLIO_PRIV_MMAPPED;
 }
 
-static inline bool afs_is_page_dirty_mmapped(unsigned long priv)
+static inline bool afs_is_folio_dirty_mmapped(unsigned long priv)
 {
-       return priv & __AFS_PAGE_PRIV_MMAPPED;
+       return priv & __AFS_FOLIO_PRIV_MMAPPED;
 }
 
 #include <trace/events/afs.h>
index 8b1d9c2..ca4909b 100644 (file)
@@ -32,7 +32,7 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
                    struct page **_page, void **fsdata)
 {
        struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
-       struct page *page;
+       struct folio *folio;
        unsigned long priv;
        unsigned f, from;
        unsigned t, to;
@@ -46,12 +46,12 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
         * file.  We need to do this before we get a lock on the page in case
         * there's more than one writer competing for the same cache block.
         */
-       ret = netfs_write_begin(file, mapping, pos, len, flags, &page, fsdata,
+       ret = netfs_write_begin(file, mapping, pos, len, flags, &folio, fsdata,
                                &afs_req_ops, NULL);
        if (ret < 0)
                return ret;
 
-       index = page->index;
+       index = folio_index(folio);
        from = pos - index * PAGE_SIZE;
        to = from + len;
 
@@ -59,14 +59,14 @@ try_again:
        /* See if this page is already partially written in a way that we can
         * merge the new write with.
         */
-       if (PagePrivate(page)) {
-               priv = page_private(page);
-               f = afs_page_dirty_from(page, priv);
-               t = afs_page_dirty_to(page, priv);
+       if (folio_test_private(folio)) {
+               priv = (unsigned long)folio_get_private(folio);
+               f = afs_folio_dirty_from(folio, priv);
+               t = afs_folio_dirty_to(folio, priv);
                ASSERTCMP(f, <=, t);
 
-               if (PageWriteback(page)) {
-                       trace_afs_page_dirty(vnode, tracepoint_string("alrdy"), page);
+               if (folio_test_writeback(folio)) {
+                       trace_afs_folio_dirty(vnode, tracepoint_string("alrdy"), folio);
                        goto flush_conflicting_write;
                }
                /* If the file is being filled locally, allow inter-write
@@ -78,7 +78,7 @@ try_again:
                        goto flush_conflicting_write;
        }
 
-       *_page = page;
+       *_page = &folio->page;
        _leave(" = 0");
        return 0;
 
@@ -87,17 +87,17 @@ try_again:
         */
 flush_conflicting_write:
        _debug("flush conflict");
-       ret = write_one_page(page);
+       ret = folio_write_one(folio);
        if (ret < 0)
                goto error;
 
-       ret = lock_page_killable(page);
+       ret = folio_lock_killable(folio);
        if (ret < 0)
                goto error;
        goto try_again;
 
 error:
-       put_page(page);
+       folio_put(folio);
        _leave(" = %d", ret);
        return ret;
 }
@@ -107,24 +107,25 @@ error:
  */
 int afs_write_end(struct file *file, struct address_space *mapping,
                  loff_t pos, unsigned len, unsigned copied,
-                 struct page *page, void *fsdata)
+                 struct page *subpage, void *fsdata)
 {
+       struct folio *folio = page_folio(subpage);
        struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
        unsigned long priv;
-       unsigned int f, from = pos & (thp_size(page) - 1);
+       unsigned int f, from = offset_in_folio(folio, pos);
        unsigned int t, to = from + copied;
        loff_t i_size, maybe_i_size;
 
        _enter("{%llx:%llu},{%lx}",
-              vnode->fid.vid, vnode->fid.vnode, page->index);
+              vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
 
-       if (!PageUptodate(page)) {
+       if (!folio_test_uptodate(folio)) {
                if (copied < len) {
                        copied = 0;
                        goto out;
                }
 
-               SetPageUptodate(page);
+               folio_mark_uptodate(folio);
        }
 
        if (copied == 0)
@@ -141,29 +142,29 @@ int afs_write_end(struct file *file, struct address_space *mapping,
                write_sequnlock(&vnode->cb_lock);
        }
 
-       if (PagePrivate(page)) {
-               priv = page_private(page);
-               f = afs_page_dirty_from(page, priv);
-               t = afs_page_dirty_to(page, priv);
+       if (folio_test_private(folio)) {
+               priv = (unsigned long)folio_get_private(folio);
+               f = afs_folio_dirty_from(folio, priv);
+               t = afs_folio_dirty_to(folio, priv);
                if (from < f)
                        f = from;
                if (to > t)
                        t = to;
-               priv = afs_page_dirty(page, f, t);
-               set_page_private(page, priv);
-               trace_afs_page_dirty(vnode, tracepoint_string("dirty+"), page);
+               priv = afs_folio_dirty(folio, f, t);
+               folio_change_private(folio, (void *)priv);
+               trace_afs_folio_dirty(vnode, tracepoint_string("dirty+"), folio);
        } else {
-               priv = afs_page_dirty(page, from, to);
-               attach_page_private(page, (void *)priv);
-               trace_afs_page_dirty(vnode, tracepoint_string("dirty"), page);
+               priv = afs_folio_dirty(folio, from, to);
+               folio_attach_private(folio, (void *)priv);
+               trace_afs_folio_dirty(vnode, tracepoint_string("dirty"), folio);
        }
 
-       if (set_page_dirty(page))
-               _debug("dirtied %lx", page->index);
+       if (folio_mark_dirty(folio))
+               _debug("dirtied %lx", folio_index(folio));
 
 out:
-       unlock_page(page);
-       put_page(page);
+       folio_unlock(folio);
+       folio_put(folio);
        return copied;
 }
 
@@ -174,40 +175,32 @@ static void afs_kill_pages(struct address_space *mapping,
                           loff_t start, loff_t len)
 {
        struct afs_vnode *vnode = AFS_FS_I(mapping->host);
-       struct pagevec pv;
-       unsigned int loop, psize;
+       struct folio *folio;
+       pgoff_t index = start / PAGE_SIZE;
+       pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
 
        _enter("{%llx:%llu},%llx @%llx",
               vnode->fid.vid, vnode->fid.vnode, len, start);
 
-       pagevec_init(&pv);
-
        do {
-               _debug("kill %llx @%llx", len, start);
-
-               pv.nr = find_get_pages_contig(mapping, start / PAGE_SIZE,
-                                             PAGEVEC_SIZE, pv.pages);
-               if (pv.nr == 0)
-                       break;
+               _debug("kill %lx (to %lx)", index, last);
 
-               for (loop = 0; loop < pv.nr; loop++) {
-                       struct page *page = pv.pages[loop];
+               folio = filemap_get_folio(mapping, index);
+               if (!folio) {
+                       next = index + 1;
+                       continue;
+               }
 
-                       if (page->index * PAGE_SIZE >= start + len)
-                               break;
+               next = folio_next_index(folio);
 
-                       psize = thp_size(page);
-                       start += psize;
-                       len -= psize;
-                       ClearPageUptodate(page);
-                       end_page_writeback(page);
-                       lock_page(page);
-                       generic_error_remove_page(mapping, page);
-                       unlock_page(page);
-               }
+               folio_clear_uptodate(folio);
+               folio_end_writeback(folio);
+               folio_lock(folio);
+               generic_error_remove_page(mapping, &folio->page);
+               folio_unlock(folio);
+               folio_put(folio);
 
-               __pagevec_release(&pv);
-       } while (len > 0);
+       } while (index = next, index <= last);
 
        _leave("");
 }
@@ -220,37 +213,27 @@ static void afs_redirty_pages(struct writeback_control *wbc,
                              loff_t start, loff_t len)
 {
        struct afs_vnode *vnode = AFS_FS_I(mapping->host);
-       struct pagevec pv;
-       unsigned int loop, psize;
+       struct folio *folio;
+       pgoff_t index = start / PAGE_SIZE;
+       pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
 
        _enter("{%llx:%llu},%llx @%llx",
               vnode->fid.vid, vnode->fid.vnode, len, start);
 
-       pagevec_init(&pv);
-
        do {
                _debug("redirty %llx @%llx", len, start);
 
-               pv.nr = find_get_pages_contig(mapping, start / PAGE_SIZE,
-                                             PAGEVEC_SIZE, pv.pages);
-               if (pv.nr == 0)
-                       break;
-
-               for (loop = 0; loop < pv.nr; loop++) {
-                       struct page *page = pv.pages[loop];
-
-                       if (page->index * PAGE_SIZE >= start + len)
-                               break;
-
-                       psize = thp_size(page);
-                       start += psize;
-                       len -= psize;
-                       redirty_page_for_writepage(wbc, page);
-                       end_page_writeback(page);
+               folio = filemap_get_folio(mapping, index);
+               if (!folio) {
+                       next = index + 1;
+                       continue;
                }
 
-               __pagevec_release(&pv);
-       } while (len > 0);
+               next = index + folio_nr_pages(folio);
+               folio_redirty_for_writepage(wbc, folio);
+               folio_end_writeback(folio);
+               folio_put(folio);
+       } while (index = next, index <= last);
 
        _leave("");
 }
@@ -261,7 +244,7 @@ static void afs_redirty_pages(struct writeback_control *wbc,
 static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len)
 {
        struct address_space *mapping = vnode->vfs_inode.i_mapping;
-       struct page *page;
+       struct folio *folio;
        pgoff_t end;
 
        XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
@@ -272,15 +255,16 @@ static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsign
        rcu_read_lock();
 
        end = (start + len - 1) / PAGE_SIZE;
-       xas_for_each(&xas, page, end) {
-               if (!PageWriteback(page)) {
-                       kdebug("bad %x @%llx page %lx %lx", len, start, page->index, end);
-                       ASSERT(PageWriteback(page));
+       xas_for_each(&xas, folio, end) {
+               if (!folio_test_writeback(folio)) {
+                       kdebug("bad %x @%llx page %lx %lx",
+                              len, start, folio_index(folio), end);
+                       ASSERT(folio_test_writeback(folio));
                }
 
-               trace_afs_page_dirty(vnode, tracepoint_string("clear"), page);
-               detach_page_private(page);
-               page_endio(page, true, 0);
+               trace_afs_folio_dirty(vnode, tracepoint_string("clear"), folio);
+               folio_detach_private(folio);
+               folio_end_writeback(folio);
        }
 
        rcu_read_unlock();
@@ -437,7 +421,7 @@ static void afs_extend_writeback(struct address_space *mapping,
                                 unsigned int *_len)
 {
        struct pagevec pvec;
-       struct page *page;
+       struct folio *folio;
        unsigned long priv;
        unsigned int psize, filler = 0;
        unsigned int f, t;
@@ -456,43 +440,43 @@ static void afs_extend_writeback(struct address_space *mapping,
                 */
                rcu_read_lock();
 
-               xas_for_each(&xas, page, ULONG_MAX) {
+               xas_for_each(&xas, folio, ULONG_MAX) {
                        stop = true;
-                       if (xas_retry(&xas, page))
+                       if (xas_retry(&xas, folio))
                                continue;
-                       if (xa_is_value(page))
+                       if (xa_is_value(folio))
                                break;
-                       if (page->index != index)
+                       if (folio_index(folio) != index)
                                break;
 
-                       if (!page_cache_get_speculative(page)) {
+                       if (!folio_try_get_rcu(folio)) {
                                xas_reset(&xas);
                                continue;
                        }
 
                        /* Has the page moved or been split? */
-                       if (unlikely(page != xas_reload(&xas))) {
-                               put_page(page);
+                       if (unlikely(folio != xas_reload(&xas))) {
+                               folio_put(folio);
                                break;
                        }
 
-                       if (!trylock_page(page)) {
-                               put_page(page);
+                       if (!folio_trylock(folio)) {
+                               folio_put(folio);
                                break;
                        }
-                       if (!PageDirty(page) || PageWriteback(page)) {
-                               unlock_page(page);
-                               put_page(page);
+                       if (!folio_test_dirty(folio) || folio_test_writeback(folio)) {
+                               folio_unlock(folio);
+                               folio_put(folio);
                                break;
                        }
 
-                       psize = thp_size(page);
-                       priv = page_private(page);
-                       f = afs_page_dirty_from(page, priv);
-                       t = afs_page_dirty_to(page, priv);
+                       psize = folio_size(folio);
+                       priv = (unsigned long)folio_get_private(folio);
+                       f = afs_folio_dirty_from(folio, priv);
+                       t = afs_folio_dirty_to(folio, priv);
                        if (f != 0 && !new_content) {
-                               unlock_page(page);
-                               put_page(page);
+                               folio_unlock(folio);
+                               folio_put(folio);
                                break;
                        }
 
@@ -503,8 +487,8 @@ static void afs_extend_writeback(struct address_space *mapping,
                        else if (t == psize || new_content)
                                stop = false;
 
-                       index += thp_nr_pages(page);
-                       if (!pagevec_add(&pvec, page))
+                       index += folio_nr_pages(folio);
+                       if (!pagevec_add(&pvec, &folio->page))
                                break;
                        if (stop)
                                break;
@@ -521,16 +505,16 @@ static void afs_extend_writeback(struct address_space *mapping,
                        break;
 
                for (i = 0; i < pagevec_count(&pvec); i++) {
-                       page = pvec.pages[i];
-                       trace_afs_page_dirty(vnode, tracepoint_string("store+"), page);
+                       folio = page_folio(pvec.pages[i]);
+                       trace_afs_folio_dirty(vnode, tracepoint_string("store+"), folio);
 
-                       if (!clear_page_dirty_for_io(page))
+                       if (!folio_clear_dirty_for_io(folio))
                                BUG();
-                       if (test_set_page_writeback(page))
+                       if (folio_start_writeback(folio))
                                BUG();
 
-                       *_count -= thp_nr_pages(page);
-                       unlock_page(page);
+                       *_count -= folio_nr_pages(folio);
+                       folio_unlock(folio);
                }
 
                pagevec_release(&pvec);
@@ -544,10 +528,10 @@ static void afs_extend_writeback(struct address_space *mapping,
  * Synchronously write back the locked page and any subsequent non-locked dirty
  * pages.
  */
-static ssize_t afs_write_back_from_locked_page(struct address_space *mapping,
-                                              struct writeback_control *wbc,
-                                              struct page *page,
-                                              loff_t start, loff_t end)
+static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping,
+                                               struct writeback_control *wbc,
+                                               struct folio *folio,
+                                               loff_t start, loff_t end)
 {
        struct afs_vnode *vnode = AFS_FS_I(mapping->host);
        struct iov_iter iter;
@@ -558,22 +542,22 @@ static ssize_t afs_write_back_from_locked_page(struct address_space *mapping,
        long count = wbc->nr_to_write;
        int ret;
 
-       _enter(",%lx,%llx-%llx", page->index, start, end);
+       _enter(",%lx,%llx-%llx", folio_index(folio), start, end);
 
-       if (test_set_page_writeback(page))
+       if (folio_start_writeback(folio))
                BUG();
 
-       count -= thp_nr_pages(page);
+       count -= folio_nr_pages(folio);
 
        /* Find all consecutive lockable dirty pages that have contiguous
         * written regions, stopping when we find a page that is not
         * immediately lockable, is not dirty or is missing, or we reach the
         * end of the range.
         */
-       priv = page_private(page);
-       offset = afs_page_dirty_from(page, priv);
-       to = afs_page_dirty_to(page, priv);
-       trace_afs_page_dirty(vnode, tracepoint_string("store"), page);
+       priv = (unsigned long)folio_get_private(folio);
+       offset = afs_folio_dirty_from(folio, priv);
+       to = afs_folio_dirty_to(folio, priv);
+       trace_afs_folio_dirty(vnode, tracepoint_string("store"), folio);
 
        len = to - offset;
        start += offset;
@@ -586,7 +570,7 @@ static ssize_t afs_write_back_from_locked_page(struct address_space *mapping,
                max_len = min_t(unsigned long long, max_len, i_size - start);
 
                if (len < max_len &&
-                   (to == thp_size(page) || new_content))
+                   (to == folio_size(folio) || new_content))
                        afs_extend_writeback(mapping, vnode, &count,
                                             start, max_len, new_content, &len);
                len = min_t(loff_t, len, max_len);
@@ -596,7 +580,7 @@ static ssize_t afs_write_back_from_locked_page(struct address_space *mapping,
         * set; the first page is still locked at this point, but all the rest
         * have been unlocked.
         */
-       unlock_page(page);
+       folio_unlock(folio);
 
        if (start < i_size) {
                _debug("write back %x @%llx [%llx]", len, start, i_size);
@@ -657,16 +641,17 @@ static ssize_t afs_write_back_from_locked_page(struct address_space *mapping,
  * write a page back to the server
  * - the caller locked the page for us
  */
-int afs_writepage(struct page *page, struct writeback_control *wbc)
+int afs_writepage(struct page *subpage, struct writeback_control *wbc)
 {
+       struct folio *folio = page_folio(subpage);
        ssize_t ret;
        loff_t start;
 
-       _enter("{%lx},", page->index);
+       _enter("{%lx},", folio_index(folio));
 
-       start = page->index * PAGE_SIZE;
-       ret = afs_write_back_from_locked_page(page->mapping, wbc, page,
-                                             start, LLONG_MAX - start);
+       start = folio_index(folio) * PAGE_SIZE;
+       ret = afs_write_back_from_locked_folio(folio_mapping(folio), wbc,
+                                              folio, start, LLONG_MAX - start);
        if (ret < 0) {
                _leave(" = %zd", ret);
                return ret;
@@ -683,7 +668,8 @@ static int afs_writepages_region(struct address_space *mapping,
                                 struct writeback_control *wbc,
                                 loff_t start, loff_t end, loff_t *_next)
 {
-       struct page *page;
+       struct folio *folio;
+       struct page *head_page;
        ssize_t ret;
        int n;
 
@@ -693,13 +679,14 @@ static int afs_writepages_region(struct address_space *mapping,
                pgoff_t index = start / PAGE_SIZE;
 
                n = find_get_pages_range_tag(mapping, &index, end / PAGE_SIZE,
-                                            PAGECACHE_TAG_DIRTY, 1, &page);
+                                            PAGECACHE_TAG_DIRTY, 1, &head_page);
                if (!n)
                        break;
 
-               start = (loff_t)page->index * PAGE_SIZE; /* May regress with THPs */
+               folio = page_folio(head_page);
+               start = folio_pos(folio); /* May regress with THPs */
 
-               _debug("wback %lx", page->index);
+               _debug("wback %lx", folio_index(folio));
 
                /* At this point we hold neither the i_pages lock nor the
                 * page lock: the page may be truncated or invalidated
@@ -707,37 +694,38 @@ static int afs_writepages_region(struct address_space *mapping,
                 * back from swapper_space to tmpfs file mapping
                 */
                if (wbc->sync_mode != WB_SYNC_NONE) {
-                       ret = lock_page_killable(page);
+                       ret = folio_lock_killable(folio);
                        if (ret < 0) {
-                               put_page(page);
+                               folio_put(folio);
                                return ret;
                        }
                } else {
-                       if (!trylock_page(page)) {
-                               put_page(page);
+                       if (!folio_trylock(folio)) {
+                               folio_put(folio);
                                return 0;
                        }
                }
 
-               if (page->mapping != mapping || !PageDirty(page)) {
-                       start += thp_size(page);
-                       unlock_page(page);
-                       put_page(page);
+               if (folio_mapping(folio) != mapping ||
+                   !folio_test_dirty(folio)) {
+                       start += folio_size(folio);
+                       folio_unlock(folio);
+                       folio_put(folio);
                        continue;
                }
 
-               if (PageWriteback(page)) {
-                       unlock_page(page);
+               if (folio_test_writeback(folio)) {
+                       folio_unlock(folio);
                        if (wbc->sync_mode != WB_SYNC_NONE)
-                               wait_on_page_writeback(page);
-                       put_page(page);
+                               folio_wait_writeback(folio);
+                       folio_put(folio);
                        continue;
                }
 
-               if (!clear_page_dirty_for_io(page))
+               if (!folio_clear_dirty_for_io(folio))
                        BUG();
-               ret = afs_write_back_from_locked_page(mapping, wbc, page, start, end);
-               put_page(page);
+               ret = afs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
+               folio_put(folio);
                if (ret < 0) {
                        _leave(" = %zd", ret);
                        return ret;
@@ -862,7 +850,6 @@ int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
 {
        struct folio *folio = page_folio(vmf->page);
-       struct page *page = &folio->page;
        struct file *file = vmf->vma->vm_file;
        struct inode *inode = file_inode(file);
        struct afs_vnode *vnode = AFS_FS_I(inode);
@@ -870,7 +857,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
        unsigned long priv;
        vm_fault_t ret = VM_FAULT_RETRY;
 
-       _enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index);
+       _enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, folio_index(folio));
 
        afs_validate(vnode, af->key);
 
@@ -880,18 +867,18 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
         * be modified.  We then assume the entire page will need writing back.
         */
 #ifdef CONFIG_AFS_FSCACHE
-       if (PageFsCache(page) &&
-           wait_on_page_fscache_killable(page) < 0)
+       if (folio_test_fscache(folio) &&
+           folio_wait_fscache_killable(folio) < 0)
                goto out;
 #endif
 
        if (folio_wait_writeback_killable(folio))
                goto out;
 
-       if (lock_page_killable(page) < 0)
+       if (folio_lock_killable(folio) < 0)
                goto out;
 
-       /* We mustn't change page->private until writeback is complete as that
+       /* We mustn't change folio->private until writeback is complete as that
         * details the portion of the page we need to write back and we might
         * need to redirty the page if there's a problem.
         */
@@ -900,14 +887,14 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
                goto out;
        }
 
-       priv = afs_page_dirty(page, 0, thp_size(page));
-       priv = afs_page_dirty_mmapped(priv);
-       if (PagePrivate(page)) {
-               set_page_private(page, priv);
-               trace_afs_page_dirty(vnode, tracepoint_string("mkwrite+"), page);
+       priv = afs_folio_dirty(folio, 0, folio_size(folio));
+       priv = afs_folio_dirty_mmapped(priv);
+       if (folio_test_private(folio)) {
+               folio_change_private(folio, (void *)priv);
+               trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite+"), folio);
        } else {
-               attach_page_private(page, (void *)priv);
-               trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"), page);
+               folio_attach_private(folio, (void *)priv);
+               trace_afs_folio_dirty(vnode, tracepoint_string("mkwrite"), folio);
        }
        file_update_time(file);
 
@@ -948,38 +935,38 @@ void afs_prune_wb_keys(struct afs_vnode *vnode)
 /*
  * Clean up a page during invalidation.
  */
-int afs_launder_page(struct page *page)
+int afs_launder_page(struct page *subpage)
 {
-       struct address_space *mapping = page->mapping;
-       struct afs_vnode *vnode = AFS_FS_I(mapping->host);
+       struct folio *folio = page_folio(subpage);
+       struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio));
        struct iov_iter iter;
        struct bio_vec bv[1];
        unsigned long priv;
        unsigned int f, t;
        int ret = 0;
 
-       _enter("{%lx}", page->index);
+       _enter("{%lx}", folio_index(folio));
 
-       priv = page_private(page);
-       if (clear_page_dirty_for_io(page)) {
+       priv = (unsigned long)folio_get_private(folio);
+       if (folio_clear_dirty_for_io(folio)) {
                f = 0;
-               t = thp_size(page);
-               if (PagePrivate(page)) {
-                       f = afs_page_dirty_from(page, priv);
-                       t = afs_page_dirty_to(page, priv);
+               t = folio_size(folio);
+               if (folio_test_private(folio)) {
+                       f = afs_folio_dirty_from(folio, priv);
+                       t = afs_folio_dirty_to(folio, priv);
                }
 
-               bv[0].bv_page = page;
+               bv[0].bv_page = &folio->page;
                bv[0].bv_offset = f;
                bv[0].bv_len = t - f;
                iov_iter_bvec(&iter, WRITE, bv, 1, bv[0].bv_len);
 
-               trace_afs_page_dirty(vnode, tracepoint_string("launder"), page);
-               ret = afs_store_data(vnode, &iter, page_offset(page) + f, true);
+               trace_afs_folio_dirty(vnode, tracepoint_string("launder"), folio);
+               ret = afs_store_data(vnode, &iter, folio_pos(folio) + f, true);
        }
 
-       trace_afs_page_dirty(vnode, tracepoint_string("laundered"), page);
-       detach_page_private(page);
-       wait_on_page_fscache(page);
+       trace_afs_folio_dirty(vnode, tracepoint_string("laundered"), folio);
+       folio_detach_private(folio);
+       folio_wait_fscache(folio);
        return ret;
 }
index 473d21b..66899b6 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -35,7 +35,7 @@ static bool chown_ok(struct user_namespace *mnt_userns,
                     kuid_t uid)
 {
        kuid_t kuid = i_uid_into_mnt(mnt_userns, inode);
-       if (uid_eq(current_fsuid(), kuid) && uid_eq(uid, kuid))
+       if (uid_eq(current_fsuid(), kuid) && uid_eq(uid, inode->i_uid))
                return true;
        if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
                return true;
@@ -62,7 +62,7 @@ static bool chgrp_ok(struct user_namespace *mnt_userns,
 {
        kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
        if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)) &&
-           (in_group_p(gid) || gid_eq(gid, kgid)))
+           (in_group_p(gid) || gid_eq(gid, inode->i_gid)))
                return true;
        if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
                return true;
index 309516e..43c8995 100644 (file)
@@ -234,6 +234,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq,
                                  ordered_list);
                if (!test_bit(WORK_DONE_BIT, &work->flags))
                        break;
+               /*
+                * Orders all subsequent loads after reading WORK_DONE_BIT,
+                * paired with the smp_mb__before_atomic in btrfs_work_helper
+                * this guarantees that the ordered function will see all
+                * updates from ordinary work function.
+                */
+               smp_rmb();
 
                /*
                 * we are going to call the ordered done function, but
@@ -317,6 +324,13 @@ static void btrfs_work_helper(struct work_struct *normal_work)
        thresh_exec_hook(wq);
        work->func(work);
        if (need_order) {
+               /*
+                * Ensures all memory accesses done in the work function are
+                * ordered before setting the WORK_DONE_BIT. Ensuring the thread
+                * which is going to executed the ordered work sees them.
+                * Pairs with the smp_rmb in run_ordered_work.
+                */
+               smp_mb__before_atomic();
                set_bit(WORK_DONE_BIT, &work->flags);
                run_ordered_work(wq, work);
        } else {
index 59c3be8..514ead6 100644 (file)
@@ -3978,11 +3978,23 @@ static void btrfs_end_empty_barrier(struct bio *bio)
  */
 static void write_dev_flush(struct btrfs_device *device)
 {
-       struct request_queue *q = bdev_get_queue(device->bdev);
        struct bio *bio = device->flush_bio;
 
+#ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+       /*
+        * When a disk has write caching disabled, we skip submission of a bio
+        * with flush and sync requests before writing the superblock, since
+        * it's not needed. However when the integrity checker is enabled, this
+        * results in reports that there are metadata blocks referred by a
+        * superblock that were not properly flushed. So don't skip the bio
+        * submission only when the integrity checker is enabled for the sake
+        * of simplicity, since this is a debug tool and not meant for use in
+        * non-debug builds.
+        */
+       struct request_queue *q = bdev_get_queue(device->bdev);
        if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags))
                return;
+#endif
 
        bio_reset(bio);
        bio->bi_end_io = btrfs_end_empty_barrier;
index fb8cc96..92138ac 100644 (file)
@@ -3985,6 +3985,10 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
        bool need_unlock; /* for mut. excl. ops lock */
        int ret;
 
+       if (!arg)
+               btrfs_warn(fs_info,
+       "IOC_BALANCE ioctl (v1) is deprecated and will be removed in kernel 5.18");
+
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
index 65cb076..9febb80 100644 (file)
@@ -125,6 +125,7 @@ static inline size_t read_compress_length(const char *buf)
 static int copy_compressed_data_to_page(char *compressed_data,
                                        size_t compressed_size,
                                        struct page **out_pages,
+                                       unsigned long max_nr_page,
                                        u32 *cur_out,
                                        const u32 sectorsize)
 {
@@ -133,6 +134,9 @@ static int copy_compressed_data_to_page(char *compressed_data,
        struct page *cur_page;
        char *kaddr;
 
+       if ((*cur_out / PAGE_SIZE) >= max_nr_page)
+               return -E2BIG;
+
        /*
         * We never allow a segment header crossing sector boundary, previous
         * run should ensure we have enough space left inside the sector.
@@ -161,6 +165,10 @@ static int copy_compressed_data_to_page(char *compressed_data,
                                     orig_out + compressed_size - *cur_out);
 
                kunmap(cur_page);
+
+               if ((*cur_out / PAGE_SIZE) >= max_nr_page)
+                       return -E2BIG;
+
                cur_page = out_pages[*cur_out / PAGE_SIZE];
                /* Allocate a new page */
                if (!cur_page) {
@@ -203,6 +211,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
        const u32 sectorsize = btrfs_sb(mapping->host->i_sb)->sectorsize;
        struct page *page_in = NULL;
        char *sizes_ptr;
+       const unsigned long max_nr_page = *out_pages;
        int ret = 0;
        /* Points to the file offset of input data */
        u64 cur_in = start;
@@ -210,6 +219,7 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
        u32 cur_out = 0;
        u32 len = *total_out;
 
+       ASSERT(max_nr_page > 0);
        *out_pages = 0;
        *total_out = 0;
        *total_in = 0;
@@ -248,7 +258,8 @@ int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
                }
 
                ret = copy_compressed_data_to_page(workspace->cbuf, out_len,
-                                                  pages, &cur_out, sectorsize);
+                                                  pages, max_nr_page,
+                                                  &cur_out, sectorsize);
                if (ret < 0)
                        goto out;
 
index cf82ea6..8f6ceea 100644 (file)
@@ -73,8 +73,8 @@ struct scrub_page {
        u64                     physical_for_dev_replace;
        atomic_t                refs;
        u8                      mirror_num;
-       int                     have_csum:1;
-       int                     io_error:1;
+       unsigned int            have_csum:1;
+       unsigned int            io_error:1;
        u8                      csum[BTRFS_CSUM_SIZE];
 
        struct scrub_recover    *recover;
index 61ac57b..0997e3c 100644 (file)
@@ -7559,6 +7559,19 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
        fs_info->fs_devices->total_rw_bytes = 0;
 
        /*
+        * Lockdep complains about possible circular locking dependency between
+        * a disk's open_mutex (struct gendisk.open_mutex), the rw semaphores
+        * used for freeze procection of a fs (struct super_block.s_writers),
+        * which we take when starting a transaction, and extent buffers of the
+        * chunk tree if we call read_one_dev() while holding a lock on an
+        * extent buffer of the chunk tree. Since we are mounting the filesystem
+        * and at this point there can't be any concurrent task modifying the
+        * chunk tree, to keep it simple, just skip locking on the chunk tree.
+        */
+       ASSERT(!test_bit(BTRFS_FS_OPEN, &fs_info->flags));
+       path->skip_locking = 1;
+
+       /*
         * Read all device items, and then all the chunk items. All
         * device items are found before any chunk item (their object id
         * is smaller than the lowest possible object id for a chunk
@@ -7583,10 +7596,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
                                goto error;
                        break;
                }
-               /*
-                * The nodes on level 1 are not locked but we don't need to do
-                * that during mount time as nothing else can access the tree
-                */
                node = path->nodes[1];
                if (node) {
                        if (last_ra_node != node->start) {
@@ -7614,7 +7623,6 @@ int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
                         * requirement for chunk allocation, see the comment on
                         * top of btrfs_chunk_alloc() for details.
                         */
-                       ASSERT(!test_bit(BTRFS_FS_OPEN, &fs_info->flags));
                        chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
                        ret = read_one_chunk(&found_key, leaf, chunk);
                        if (ret)
index f06b680..fc42dd0 100644 (file)
 /* 307s to avoid pathologically clashing with transaction commit */
 #define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ)
 
-static ZSTD_parameters zstd_get_btrfs_parameters(unsigned int level,
+static zstd_parameters zstd_get_btrfs_parameters(unsigned int level,
                                                 size_t src_len)
 {
-       ZSTD_parameters params = ZSTD_getParams(level, src_len, 0);
+       zstd_parameters params = zstd_get_params(level, src_len);
 
        if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
                params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
@@ -48,8 +48,8 @@ struct workspace {
        unsigned long last_used; /* jiffies */
        struct list_head list;
        struct list_head lru_list;
-       ZSTD_inBuffer in_buf;
-       ZSTD_outBuffer out_buf;
+       zstd_in_buffer in_buf;
+       zstd_out_buffer out_buf;
 };
 
 /*
@@ -155,12 +155,12 @@ static void zstd_calc_ws_mem_sizes(void)
        unsigned int level;
 
        for (level = 1; level <= ZSTD_BTRFS_MAX_LEVEL; level++) {
-               ZSTD_parameters params =
+               zstd_parameters params =
                        zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT);
                size_t level_size =
                        max_t(size_t,
-                             ZSTD_CStreamWorkspaceBound(params.cParams),
-                             ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT));
+                             zstd_cstream_workspace_bound(&params.cParams),
+                             zstd_dstream_workspace_bound(ZSTD_BTRFS_MAX_INPUT));
 
                max_size = max_t(size_t, max_size, level_size);
                zstd_ws_mem_sizes[level - 1] = max_size;
@@ -371,7 +371,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
                unsigned long *total_in, unsigned long *total_out)
 {
        struct workspace *workspace = list_entry(ws, struct workspace, list);
-       ZSTD_CStream *stream;
+       zstd_cstream *stream;
        int ret = 0;
        int nr_pages = 0;
        struct page *in_page = NULL;  /* The current page to read */
@@ -381,7 +381,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
        unsigned long len = *total_out;
        const unsigned long nr_dest_pages = *out_pages;
        unsigned long max_out = nr_dest_pages * PAGE_SIZE;
-       ZSTD_parameters params = zstd_get_btrfs_parameters(workspace->req_level,
+       zstd_parameters params = zstd_get_btrfs_parameters(workspace->req_level,
                                                           len);
 
        *out_pages = 0;
@@ -389,10 +389,10 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
        *total_in = 0;
 
        /* Initialize the stream */
-       stream = ZSTD_initCStream(params, len, workspace->mem,
+       stream = zstd_init_cstream(&params, len, workspace->mem,
                        workspace->size);
        if (!stream) {
-               pr_warn("BTRFS: ZSTD_initCStream failed\n");
+               pr_warn("BTRFS: zstd_init_cstream failed\n");
                ret = -EIO;
                goto out;
        }
@@ -418,11 +418,11 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
        while (1) {
                size_t ret2;
 
-               ret2 = ZSTD_compressStream(stream, &workspace->out_buf,
+               ret2 = zstd_compress_stream(stream, &workspace->out_buf,
                                &workspace->in_buf);
-               if (ZSTD_isError(ret2)) {
-                       pr_debug("BTRFS: ZSTD_compressStream returned %d\n",
-                                       ZSTD_getErrorCode(ret2));
+               if (zstd_is_error(ret2)) {
+                       pr_debug("BTRFS: zstd_compress_stream returned %d\n",
+                                       zstd_get_error_code(ret2));
                        ret = -EIO;
                        goto out;
                }
@@ -487,10 +487,10 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
        while (1) {
                size_t ret2;
 
-               ret2 = ZSTD_endStream(stream, &workspace->out_buf);
-               if (ZSTD_isError(ret2)) {
-                       pr_debug("BTRFS: ZSTD_endStream returned %d\n",
-                                       ZSTD_getErrorCode(ret2));
+               ret2 = zstd_end_stream(stream, &workspace->out_buf);
+               if (zstd_is_error(ret2)) {
+                       pr_debug("BTRFS: zstd_end_stream returned %d\n",
+                                       zstd_get_error_code(ret2));
                        ret = -EIO;
                        goto out;
                }
@@ -548,17 +548,17 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
        struct workspace *workspace = list_entry(ws, struct workspace, list);
        struct page **pages_in = cb->compressed_pages;
        size_t srclen = cb->compressed_len;
-       ZSTD_DStream *stream;
+       zstd_dstream *stream;
        int ret = 0;
        unsigned long page_in_index = 0;
        unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
        unsigned long buf_start;
        unsigned long total_out = 0;
 
-       stream = ZSTD_initDStream(
+       stream = zstd_init_dstream(
                        ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
        if (!stream) {
-               pr_debug("BTRFS: ZSTD_initDStream failed\n");
+               pr_debug("BTRFS: zstd_init_dstream failed\n");
                ret = -EIO;
                goto done;
        }
@@ -574,11 +574,11 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
        while (1) {
                size_t ret2;
 
-               ret2 = ZSTD_decompressStream(stream, &workspace->out_buf,
+               ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
                                &workspace->in_buf);
-               if (ZSTD_isError(ret2)) {
-                       pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
-                                       ZSTD_getErrorCode(ret2));
+               if (zstd_is_error(ret2)) {
+                       pr_debug("BTRFS: zstd_decompress_stream returned %d\n",
+                                       zstd_get_error_code(ret2));
                        ret = -EIO;
                        goto done;
                }
@@ -624,16 +624,16 @@ int zstd_decompress(struct list_head *ws, unsigned char *data_in,
                size_t destlen)
 {
        struct workspace *workspace = list_entry(ws, struct workspace, list);
-       ZSTD_DStream *stream;
+       zstd_dstream *stream;
        int ret = 0;
        size_t ret2;
        unsigned long total_out = 0;
        unsigned long pg_offset = 0;
 
-       stream = ZSTD_initDStream(
+       stream = zstd_init_dstream(
                        ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
        if (!stream) {
-               pr_warn("BTRFS: ZSTD_initDStream failed\n");
+               pr_warn("BTRFS: zstd_init_dstream failed\n");
                ret = -EIO;
                goto finish;
        }
@@ -657,15 +657,15 @@ int zstd_decompress(struct list_head *ws, unsigned char *data_in,
 
                /* Check if the frame is over and we still need more input */
                if (ret2 == 0) {
-                       pr_debug("BTRFS: ZSTD_decompressStream ended early\n");
+                       pr_debug("BTRFS: zstd_decompress_stream ended early\n");
                        ret = -EIO;
                        goto finish;
                }
-               ret2 = ZSTD_decompressStream(stream, &workspace->out_buf,
+               ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
                                &workspace->in_buf);
-               if (ZSTD_isError(ret2)) {
-                       pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
-                                       ZSTD_getErrorCode(ret2));
+               if (zstd_is_error(ret2)) {
+                       pr_debug("BTRFS: zstd_decompress_stream returned %d\n",
+                                       zstd_get_error_code(ret2));
                        ret = -EIO;
                        goto finish;
                }
index 99b80b5..e53c854 100644 (file)
@@ -63,7 +63,7 @@
         (CONGESTION_ON_THRESH(congestion_kb) >> 2))
 
 static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
-                                       struct page *page, void **_fsdata);
+                                       struct folio *folio, void **_fsdata);
 
 static inline struct ceph_snap_context *page_snap_context(struct page *page)
 {
@@ -317,13 +317,14 @@ static const struct netfs_read_request_ops ceph_netfs_read_ops = {
 };
 
 /* read a single page, without unlocking it. */
-static int ceph_readpage(struct file *file, struct page *page)
+static int ceph_readpage(struct file *file, struct page *subpage)
 {
+       struct folio *folio = page_folio(subpage);
        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_vino vino = ceph_vino(inode);
-       u64 off = page_offset(page);
-       u64 len = thp_size(page);
+       size_t len = folio_size(folio);
+       u64 off = folio_file_pos(folio);
 
        if (ci->i_inline_version != CEPH_INLINE_NONE) {
                /*
@@ -331,19 +332,19 @@ static int ceph_readpage(struct file *file, struct page *page)
                 * into page cache while getting Fcr caps.
                 */
                if (off == 0) {
-                       unlock_page(page);
+                       folio_unlock(folio);
                        return -EINVAL;
                }
-               zero_user_segment(page, 0, thp_size(page));
-               SetPageUptodate(page);
-               unlock_page(page);
+               zero_user_segment(&folio->page, 0, folio_size(folio));
+               folio_mark_uptodate(folio);
+               folio_unlock(folio);
                return 0;
        }
 
-       dout("readpage ino %llx.%llx file %p off %llu len %llu page %p index %lu\n",
-            vino.ino, vino.snap, file, off, len, page, page->index);
+       dout("readpage ino %llx.%llx file %p off %llu len %zu folio %p index %lu\n",
+            vino.ino, vino.snap, file, off, len, folio, folio_index(folio));
 
-       return netfs_readpage(file, page, &ceph_netfs_read_ops, NULL);
+       return netfs_readpage(file, folio, &ceph_netfs_read_ops, NULL);
 }
 
 static void ceph_readahead(struct readahead_control *ractl)
@@ -724,7 +725,7 @@ static int ceph_writepages_start(struct address_space *mapping,
             wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
             (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
 
-       if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
+       if (ceph_inode_is_shutdown(inode)) {
                if (ci->i_wrbuffer_ref > 0) {
                        pr_warn_ratelimited(
                                "writepage_start %p %lld forced umount\n",
@@ -1145,12 +1146,12 @@ static struct ceph_snap_context *
 ceph_find_incompatible(struct page *page)
 {
        struct inode *inode = page->mapping->host;
-       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_inode_info *ci = ceph_inode(inode);
 
-       if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
-               dout(" page %p forced umount\n", page);
-               return ERR_PTR(-EIO);
+       if (ceph_inode_is_shutdown(inode)) {
+               dout(" page %p %llx:%llx is shutdown\n", page,
+                    ceph_vinop(inode));
+               return ERR_PTR(-ESTALE);
        }
 
        for (;;) {
@@ -1187,18 +1188,18 @@ ceph_find_incompatible(struct page *page)
 }
 
 static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
-                                       struct page *page, void **_fsdata)
+                                       struct folio *folio, void **_fsdata)
 {
        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_snap_context *snapc;
 
-       snapc = ceph_find_incompatible(page);
+       snapc = ceph_find_incompatible(folio_page(folio, 0));
        if (snapc) {
                int r;
 
-               unlock_page(page);
-               put_page(page);
+               folio_unlock(folio);
+               folio_put(folio);
                if (IS_ERR(snapc))
                        return PTR_ERR(snapc);
 
@@ -1216,12 +1217,12 @@ static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned
  * clean, or already dirty within the same snap context.
  */
 static int ceph_write_begin(struct file *file, struct address_space *mapping,
-                           loff_t pos, unsigned len, unsigned flags,
+                           loff_t pos, unsigned len, unsigned aop_flags,
                            struct page **pagep, void **fsdata)
 {
        struct inode *inode = file_inode(file);
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct page *page = NULL;
+       struct folio *folio = NULL;
        pgoff_t index = pos >> PAGE_SHIFT;
        int r;
 
@@ -1230,39 +1231,43 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
         * for inline_version sent to the MDS.
         */
        if (ci->i_inline_version != CEPH_INLINE_NONE) {
-               page = grab_cache_page_write_begin(mapping, index, flags);
-               if (!page)
+               unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
+               if (aop_flags & AOP_FLAG_NOFS)
+                       fgp_flags |= FGP_NOFS;
+               folio = __filemap_get_folio(mapping, index, fgp_flags,
+                                           mapping_gfp_mask(mapping));
+               if (!folio)
                        return -ENOMEM;
 
                /*
                 * The inline_version on a new inode is set to 1. If that's the
-                * case, then the page is brand new and isn't yet Uptodate.
+                * case, then the folio is brand new and isn't yet Uptodate.
                 */
                r = 0;
                if (index == 0 && ci->i_inline_version != 1) {
-                       if (!PageUptodate(page)) {
+                       if (!folio_test_uptodate(folio)) {
                                WARN_ONCE(1, "ceph: write_begin called on still-inlined inode (inline_version %llu)!\n",
                                          ci->i_inline_version);
                                r = -EINVAL;
                        }
                        goto out;
                }
-               zero_user_segment(page, 0, thp_size(page));
-               SetPageUptodate(page);
+               zero_user_segment(&folio->page, 0, folio_size(folio));
+               folio_mark_uptodate(folio);
                goto out;
        }
 
-       r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &page, NULL,
+       r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &folio, NULL,
                              &ceph_netfs_read_ops, NULL);
 out:
        if (r == 0)
-               wait_on_page_fscache(page);
+               folio_wait_fscache(folio);
        if (r < 0) {
-               if (page)
-                       put_page(page);
+               if (folio)
+                       folio_put(folio);
        } else {
-               WARN_ON_ONCE(!PageLocked(page));
-               *pagep = page;
+               WARN_ON_ONCE(!folio_test_locked(folio));
+               *pagep = &folio->page;
        }
        return r;
 }
@@ -1273,32 +1278,33 @@ out:
  */
 static int ceph_write_end(struct file *file, struct address_space *mapping,
                          loff_t pos, unsigned len, unsigned copied,
-                         struct page *page, void *fsdata)
+                         struct page *subpage, void *fsdata)
 {
+       struct folio *folio = page_folio(subpage);
        struct inode *inode = file_inode(file);
        bool check_cap = false;
 
-       dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
-            inode, page, (int)pos, (int)copied, (int)len);
+       dout("write_end file %p inode %p folio %p %d~%d (%d)\n", file,
+            inode, folio, (int)pos, (int)copied, (int)len);
 
-       if (!PageUptodate(page)) {
+       if (!folio_test_uptodate(folio)) {
                /* just return that nothing was copied on a short copy */
                if (copied < len) {
                        copied = 0;
                        goto out;
                }
-               SetPageUptodate(page);
+               folio_mark_uptodate(folio);
        }
 
        /* did file size increase? */
        if (pos+copied > i_size_read(inode))
                check_cap = ceph_inode_set_size(inode, pos+copied);
 
-       set_page_dirty(page);
+       folio_mark_dirty(folio);
 
 out:
-       unlock_page(page);
-       put_page(page);
+       folio_unlock(folio);
+       folio_put(folio);
 
        if (check_cap)
                ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
@@ -1306,17 +1312,6 @@ out:
        return copied;
 }
 
-/*
- * we set .direct_IO to indicate direct io is supported, but since we
- * intercept O_DIRECT reads and writes early, this function should
- * never get called.
- */
-static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter)
-{
-       WARN_ON(1);
-       return -EINVAL;
-}
-
 const struct address_space_operations ceph_aops = {
        .readpage = ceph_readpage,
        .readahead = ceph_readahead,
@@ -1327,7 +1322,7 @@ const struct address_space_operations ceph_aops = {
        .set_page_dirty = ceph_set_page_dirty,
        .invalidatepage = ceph_invalidatepage,
        .releasepage = ceph_releasepage,
-       .direct_IO = ceph_direct_io,
+       .direct_IO = noop_direct_IO,
 };
 
 static void ceph_block_sigs(sigset_t *oldset)
@@ -1356,6 +1351,9 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
        sigset_t oldset;
        vm_fault_t ret = VM_FAULT_SIGBUS;
 
+       if (ceph_inode_is_shutdown(inode))
+               return ret;
+
        ceph_block_sigs(&oldset);
 
        dout("filemap_fault %p %llx.%llx %llu trying to get caps\n",
@@ -1447,6 +1445,9 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
        sigset_t oldset;
        vm_fault_t ret = VM_FAULT_SIGBUS;
 
+       if (ceph_inode_is_shutdown(inode))
+               return ret;
+
        prealloc_cf = ceph_alloc_cap_flush();
        if (!prealloc_cf)
                return VM_FAULT_OOM;
index 9cfadbb..457afda 100644 (file)
 #include "super.h"
 #include "cache.h"
 
-struct ceph_aux_inode {
-       u64     version;
-       u64     mtime_sec;
-       u64     mtime_nsec;
-};
-
 struct fscache_netfs ceph_cache_netfs = {
        .name           = "ceph",
        .version        = 0,
@@ -109,20 +103,14 @@ static enum fscache_checkaux ceph_fscache_inode_check_aux(
        void *cookie_netfs_data, const void *data, uint16_t dlen,
        loff_t object_size)
 {
-       struct ceph_aux_inode aux;
        struct ceph_inode_info* ci = cookie_netfs_data;
        struct inode* inode = &ci->vfs_inode;
 
-       if (dlen != sizeof(aux) ||
+       if (dlen != sizeof(ci->i_version) ||
            i_size_read(inode) != object_size)
                return FSCACHE_CHECKAUX_OBSOLETE;
 
-       memset(&aux, 0, sizeof(aux));
-       aux.version = ci->i_version;
-       aux.mtime_sec = inode->i_mtime.tv_sec;
-       aux.mtime_nsec = inode->i_mtime.tv_nsec;
-
-       if (memcmp(data, &aux, sizeof(aux)) != 0)
+       if (*(u64 *)data != ci->i_version)
                return FSCACHE_CHECKAUX_OBSOLETE;
 
        dout("ceph inode 0x%p cached okay\n", ci);
@@ -139,7 +127,6 @@ void ceph_fscache_register_inode_cookie(struct inode *inode)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
-       struct ceph_aux_inode aux;
 
        /* No caching for filesystem */
        if (!fsc->fscache)
@@ -151,14 +138,10 @@ void ceph_fscache_register_inode_cookie(struct inode *inode)
 
        inode_lock_nested(inode, I_MUTEX_CHILD);
        if (!ci->fscache) {
-               memset(&aux, 0, sizeof(aux));
-               aux.version = ci->i_version;
-               aux.mtime_sec = inode->i_mtime.tv_sec;
-               aux.mtime_nsec = inode->i_mtime.tv_nsec;
                ci->fscache = fscache_acquire_cookie(fsc->fscache,
                                                     &ceph_fscache_inode_object_def,
                                                     &ci->i_vino, sizeof(ci->i_vino),
-                                                    &aux, sizeof(aux),
+                                                    &ci->i_version, sizeof(ci->i_version),
                                                     ci, i_size_read(inode), false);
        }
        inode_unlock(inode);
index 8f537f1..b9460b6 100644 (file)
@@ -1188,11 +1188,11 @@ void ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 
        lockdep_assert_held(&ci->i_ceph_lock);
 
-       fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+       fsc = ceph_inode_to_client(&ci->vfs_inode);
        WARN_ON_ONCE(ci->i_auth_cap == cap &&
                     !list_empty(&ci->i_dirty_item) &&
                     !fsc->blocklisted &&
-                    READ_ONCE(fsc->mount_state) != CEPH_MOUNT_SHUTDOWN);
+                    !ceph_inode_is_shutdown(&ci->vfs_inode));
 
        __ceph_remove_cap(cap, queue_release);
 }
@@ -1968,8 +1968,8 @@ retry:
                }
        }
 
-       dout("check_caps %p file_want %s used %s dirty %s flushing %s"
-            " issued %s revoking %s retain %s %s%s\n", inode,
+       dout("check_caps %llx.%llx file_want %s used %s dirty %s flushing %s"
+            " issued %s revoking %s retain %s %s%s\n", ceph_vinop(inode),
             ceph_cap_string(file_wanted),
             ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
             ceph_cap_string(ci->i_flushing_caps),
@@ -1990,7 +1990,8 @@ retry:
            (revoking & (CEPH_CAP_FILE_CACHE|
                         CEPH_CAP_FILE_LAZYIO)) && /*  or revoking cache */
            !tried_invalidate) {
-               dout("check_caps trying to invalidate on %p\n", inode);
+               dout("check_caps trying to invalidate on %llx.%llx\n",
+                    ceph_vinop(inode));
                if (try_nonblocking_invalidate(inode) < 0) {
                        dout("check_caps queuing invalidate\n");
                        queue_invalidate = true;
@@ -2629,9 +2630,9 @@ void ceph_take_cap_refs(struct ceph_inode_info *ci, int got,
  *
  * Returns 0 if caps were not able to be acquired (yet), 1 if succeed,
  * or a negative error code. There are 3 speical error codes:
- *  -EAGAIN: need to sleep but non-blocking is specified
- *  -EFBIG:  ask caller to call check_max_size() and try again.
- *  -ESTALE: ask caller to call ceph_renew_caps() and try again.
+ *  -EAGAIN:  need to sleep but non-blocking is specified
+ *  -EFBIG:   ask caller to call check_max_size() and try again.
+ *  -EUCLEAN: ask caller to call ceph_renew_caps() and try again.
  */
 enum {
        /* first 8 bits are reserved for CEPH_FILE_MODE_FOO */
@@ -2679,7 +2680,7 @@ again:
                        dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
                             inode, endoff, ci->i_max_size);
                        if (endoff > ci->i_requested_max_size)
-                               ret = ci->i_auth_cap ? -EFBIG : -ESTALE;
+                               ret = ci->i_auth_cap ? -EFBIG : -EUCLEAN;
                        goto out_unlock;
                }
                /*
@@ -2749,9 +2750,9 @@ again:
                        goto out_unlock;
                }
 
-               if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
-                       dout("get_cap_refs %p forced umount\n", inode);
-                       ret = -EIO;
+               if (ceph_inode_is_shutdown(inode)) {
+                       dout("get_cap_refs %p inode is shutdown\n", inode);
+                       ret = -ESTALE;
                        goto out_unlock;
                }
                mds_wanted = __ceph_caps_mds_wanted(ci, false);
@@ -2759,7 +2760,7 @@ again:
                        dout("get_cap_refs %p need %s > mds_wanted %s\n",
                             inode, ceph_cap_string(need),
                             ceph_cap_string(mds_wanted));
-                       ret = -ESTALE;
+                       ret = -EUCLEAN;
                        goto out_unlock;
                }
 
@@ -2843,7 +2844,7 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,
 
        ret = try_get_cap_refs(inode, need, want, 0, flags, got);
        /* three special error codes */
-       if (ret == -EAGAIN || ret == -EFBIG || ret == -ESTALE)
+       if (ret == -EAGAIN || ret == -EFBIG || ret == -EUCLEAN)
                ret = 0;
        return ret;
 }
@@ -2926,7 +2927,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
                }
 
                if (ret < 0) {
-                       if (ret == -EFBIG || ret == -ESTALE) {
+                       if (ret == -EFBIG || ret == -EUCLEAN) {
                                int ret2 = ceph_wait_on_async_create(inode);
                                if (ret2 < 0)
                                        return ret2;
@@ -2935,7 +2936,7 @@ int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got
                                check_max_size(inode, endoff);
                                continue;
                        }
-                       if (ret == -ESTALE) {
+                       if (ret == -EUCLEAN) {
                                /* session was killed, try renew caps */
                                ret = ceph_renew_caps(inode, flags);
                                if (ret == 0)
@@ -4315,7 +4316,7 @@ static void flush_dirty_session_caps(struct ceph_mds_session *s)
                                      i_dirty_item);
                inode = &ci->vfs_inode;
                ihold(inode);
-               dout("flush_dirty_caps %p\n", inode);
+               dout("flush_dirty_caps %llx.%llx\n", ceph_vinop(inode));
                spin_unlock(&mdsc->cap_dirty_lock);
                ceph_check_caps(ci, CHECK_CAPS_FLUSH, NULL);
                iput(inode);
@@ -4560,3 +4561,119 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
        spin_unlock(&dentry->d_lock);
        return ret;
 }
+
+static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       struct ceph_cap_snap *capsnap;
+       int capsnap_release = 0;
+
+       lockdep_assert_held(&ci->i_ceph_lock);
+
+       dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
+
+       while (!list_empty(&ci->i_cap_snaps)) {
+               capsnap = list_first_entry(&ci->i_cap_snaps,
+                                          struct ceph_cap_snap, ci_item);
+               __ceph_remove_capsnap(inode, capsnap, NULL, NULL);
+               ceph_put_snap_context(capsnap->context);
+               ceph_put_cap_snap(capsnap);
+               capsnap_release++;
+       }
+       wake_up_all(&ci->i_cap_wq);
+       wake_up_all(&mdsc->cap_flushing_wq);
+       return capsnap_release;
+}
+
+int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate)
+{
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+       struct ceph_mds_client *mdsc = fsc->mdsc;
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       bool is_auth;
+       bool dirty_dropped = false;
+       int iputs = 0;
+
+       lockdep_assert_held(&ci->i_ceph_lock);
+
+       dout("removing cap %p, ci is %p, inode is %p\n",
+            cap, ci, &ci->vfs_inode);
+
+       is_auth = (cap == ci->i_auth_cap);
+       __ceph_remove_cap(cap, false);
+       if (is_auth) {
+               struct ceph_cap_flush *cf;
+
+               if (ceph_inode_is_shutdown(inode)) {
+                       if (inode->i_data.nrpages > 0)
+                               *invalidate = true;
+                       if (ci->i_wrbuffer_ref > 0)
+                               mapping_set_error(&inode->i_data, -EIO);
+               }
+
+               spin_lock(&mdsc->cap_dirty_lock);
+
+               /* trash all of the cap flushes for this inode */
+               while (!list_empty(&ci->i_cap_flush_list)) {
+                       cf = list_first_entry(&ci->i_cap_flush_list,
+                                             struct ceph_cap_flush, i_list);
+                       list_del_init(&cf->g_list);
+                       list_del_init(&cf->i_list);
+                       if (!cf->is_capsnap)
+                               ceph_free_cap_flush(cf);
+               }
+
+               if (!list_empty(&ci->i_dirty_item)) {
+                       pr_warn_ratelimited(
+                               " dropping dirty %s state for %p %lld\n",
+                               ceph_cap_string(ci->i_dirty_caps),
+                               inode, ceph_ino(inode));
+                       ci->i_dirty_caps = 0;
+                       list_del_init(&ci->i_dirty_item);
+                       dirty_dropped = true;
+               }
+               if (!list_empty(&ci->i_flushing_item)) {
+                       pr_warn_ratelimited(
+                               " dropping dirty+flushing %s state for %p %lld\n",
+                               ceph_cap_string(ci->i_flushing_caps),
+                               inode, ceph_ino(inode));
+                       ci->i_flushing_caps = 0;
+                       list_del_init(&ci->i_flushing_item);
+                       mdsc->num_cap_flushing--;
+                       dirty_dropped = true;
+               }
+               spin_unlock(&mdsc->cap_dirty_lock);
+
+               if (dirty_dropped) {
+                       mapping_set_error(inode->i_mapping, -EIO);
+
+                       if (ci->i_wrbuffer_ref_head == 0 &&
+                           ci->i_wr_ref == 0 &&
+                           ci->i_dirty_caps == 0 &&
+                           ci->i_flushing_caps == 0) {
+                               ceph_put_snap_context(ci->i_head_snapc);
+                               ci->i_head_snapc = NULL;
+                       }
+               }
+
+               if (atomic_read(&ci->i_filelock_ref) > 0) {
+                       /* make further file lock syscall return -EIO */
+                       ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
+                       pr_warn_ratelimited(" dropping file locks for %p %lld\n",
+                                           inode, ceph_ino(inode));
+               }
+
+               if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
+                       cf = ci->i_prealloc_cap_flush;
+                       ci->i_prealloc_cap_flush = NULL;
+                       if (!cf->is_capsnap)
+                               ceph_free_cap_flush(cf);
+               }
+
+               if (!list_empty(&ci->i_cap_snaps))
+                       iputs = remove_capsnaps(mdsc, inode);
+       }
+       if (dirty_dropped)
+               ++iputs;
+       return iputs;
+}
index 38b78b4..3cf7c9c 100644 (file)
@@ -146,82 +146,93 @@ static int mdsc_show(struct seq_file *s, void *p)
                   name, total, avg, _min, max, sum);                   \
 }
 
-static int metric_show(struct seq_file *s, void *p)
+static int metrics_file_show(struct seq_file *s, void *p)
 {
        struct ceph_fs_client *fsc = s->private;
-       struct ceph_mds_client *mdsc = fsc->mdsc;
-       struct ceph_client_metric *m = &mdsc->metric;
-       int nr_caps = 0;
-       s64 total, sum, avg, min, max, sq;
-       u64 sum_sz, avg_sz, min_sz, max_sz;
+       struct ceph_client_metric *m = &fsc->mdsc->metric;
 
-       sum = percpu_counter_sum(&m->total_inodes);
        seq_printf(s, "item                               total\n");
        seq_printf(s, "------------------------------------------\n");
-       seq_printf(s, "%-35s%lld / %lld\n", "opened files  / total inodes",
-                  atomic64_read(&m->opened_files), sum);
-       seq_printf(s, "%-35s%lld / %lld\n", "pinned i_caps / total inodes",
-                  atomic64_read(&m->total_caps), sum);
-       seq_printf(s, "%-35s%lld / %lld\n", "opened inodes / total inodes",
-                  percpu_counter_sum(&m->opened_inodes), sum);
-
-       seq_printf(s, "\n");
+       seq_printf(s, "%-35s%lld\n", "total inodes",
+                  percpu_counter_sum(&m->total_inodes));
+       seq_printf(s, "%-35s%lld\n", "opened files",
+                  atomic64_read(&m->opened_files));
+       seq_printf(s, "%-35s%lld\n", "pinned i_caps",
+                  atomic64_read(&m->total_caps));
+       seq_printf(s, "%-35s%lld\n", "opened inodes",
+                  percpu_counter_sum(&m->opened_inodes));
+       return 0;
+}
+
+static const char * const metric_str[] = {
+       "read",
+       "write",
+       "metadata",
+       "copyfrom"
+};
+static int metrics_latency_show(struct seq_file *s, void *p)
+{
+       struct ceph_fs_client *fsc = s->private;
+       struct ceph_client_metric *cm = &fsc->mdsc->metric;
+       struct ceph_metric *m;
+       s64 total, sum, avg, min, max, sq;
+       int i;
+
        seq_printf(s, "item          total       avg_lat(us)     min_lat(us)     max_lat(us)     stdev(us)\n");
        seq_printf(s, "-----------------------------------------------------------------------------------\n");
 
-       spin_lock(&m->read_metric_lock);
-       total = m->total_reads;
-       sum = m->read_latency_sum;
-       avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
-       min = m->read_latency_min;
-       max = m->read_latency_max;
-       sq = m->read_latency_sq_sum;
-       spin_unlock(&m->read_metric_lock);
-       CEPH_LAT_METRIC_SHOW("read", total, avg, min, max, sq);
-
-       spin_lock(&m->write_metric_lock);
-       total = m->total_writes;
-       sum = m->write_latency_sum;
-       avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
-       min = m->write_latency_min;
-       max = m->write_latency_max;
-       sq = m->write_latency_sq_sum;
-       spin_unlock(&m->write_metric_lock);
-       CEPH_LAT_METRIC_SHOW("write", total, avg, min, max, sq);
-
-       spin_lock(&m->metadata_metric_lock);
-       total = m->total_metadatas;
-       sum = m->metadata_latency_sum;
-       avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
-       min = m->metadata_latency_min;
-       max = m->metadata_latency_max;
-       sq = m->metadata_latency_sq_sum;
-       spin_unlock(&m->metadata_metric_lock);
-       CEPH_LAT_METRIC_SHOW("metadata", total, avg, min, max, sq);
-
-       seq_printf(s, "\n");
+       for (i = 0; i < METRIC_MAX; i++) {
+               m = &cm->metric[i];
+               spin_lock(&m->lock);
+               total = m->total;
+               sum = m->latency_sum;
+               avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
+               min = m->latency_min;
+               max = m->latency_max;
+               sq = m->latency_sq_sum;
+               spin_unlock(&m->lock);
+               CEPH_LAT_METRIC_SHOW(metric_str[i], total, avg, min, max, sq);
+       }
+
+       return 0;
+}
+
+static int metrics_size_show(struct seq_file *s, void *p)
+{
+       struct ceph_fs_client *fsc = s->private;
+       struct ceph_client_metric *cm = &fsc->mdsc->metric;
+       struct ceph_metric *m;
+       s64 total;
+       u64 sum, avg, min, max;
+       int i;
+
        seq_printf(s, "item          total       avg_sz(bytes)   min_sz(bytes)   max_sz(bytes)  total_sz(bytes)\n");
        seq_printf(s, "----------------------------------------------------------------------------------------\n");
 
-       spin_lock(&m->read_metric_lock);
-       total = m->total_reads;
-       sum_sz = m->read_size_sum;
-       avg_sz = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum_sz, total) : 0;
-       min_sz = m->read_size_min;
-       max_sz = m->read_size_max;
-       spin_unlock(&m->read_metric_lock);
-       CEPH_SZ_METRIC_SHOW("read", total, avg_sz, min_sz, max_sz, sum_sz);
-
-       spin_lock(&m->write_metric_lock);
-       total = m->total_writes;
-       sum_sz = m->write_size_sum;
-       avg_sz = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum_sz, total) : 0;
-       min_sz = m->write_size_min;
-       max_sz = m->write_size_max;
-       spin_unlock(&m->write_metric_lock);
-       CEPH_SZ_METRIC_SHOW("write", total, avg_sz, min_sz, max_sz, sum_sz);
-
-       seq_printf(s, "\n");
+       for (i = 0; i < METRIC_MAX; i++) {
+               /* skip 'metadata' as it doesn't use the size metric */
+               if (i == METRIC_METADATA)
+                       continue;
+               m = &cm->metric[i];
+               spin_lock(&m->lock);
+               total = m->total;
+               sum = m->size_sum;
+               avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
+               min = m->size_min;
+               max = m->size_max;
+               spin_unlock(&m->lock);
+               CEPH_SZ_METRIC_SHOW(metric_str[i], total, avg, min, max, sum);
+       }
+
+       return 0;
+}
+
+static int metrics_caps_show(struct seq_file *s, void *p)
+{
+       struct ceph_fs_client *fsc = s->private;
+       struct ceph_client_metric *m = &fsc->mdsc->metric;
+       int nr_caps = 0;
+
        seq_printf(s, "item          total           miss            hit\n");
        seq_printf(s, "-------------------------------------------------\n");
 
@@ -350,8 +361,11 @@ DEFINE_SHOW_ATTRIBUTE(mdsmap);
 DEFINE_SHOW_ATTRIBUTE(mdsc);
 DEFINE_SHOW_ATTRIBUTE(caps);
 DEFINE_SHOW_ATTRIBUTE(mds_sessions);
-DEFINE_SHOW_ATTRIBUTE(metric);
 DEFINE_SHOW_ATTRIBUTE(status);
+DEFINE_SHOW_ATTRIBUTE(metrics_file);
+DEFINE_SHOW_ATTRIBUTE(metrics_latency);
+DEFINE_SHOW_ATTRIBUTE(metrics_size);
+DEFINE_SHOW_ATTRIBUTE(metrics_caps);
 
 
 /*
@@ -385,8 +399,9 @@ void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc)
        debugfs_remove(fsc->debugfs_mdsmap);
        debugfs_remove(fsc->debugfs_mds_sessions);
        debugfs_remove(fsc->debugfs_caps);
-       debugfs_remove(fsc->debugfs_metric);
+       debugfs_remove(fsc->debugfs_status);
        debugfs_remove(fsc->debugfs_mdsc);
+       debugfs_remove_recursive(fsc->debugfs_metrics_dir);
 }
 
 void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
@@ -426,12 +441,6 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
                                                fsc,
                                                &mdsc_fops);
 
-       fsc->debugfs_metric = debugfs_create_file("metrics",
-                                                 0400,
-                                                 fsc->client->debugfs_dir,
-                                                 fsc,
-                                                 &metric_fops);
-
        fsc->debugfs_caps = debugfs_create_file("caps",
                                                0400,
                                                fsc->client->debugfs_dir,
@@ -443,6 +452,18 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
                                                  fsc->client->debugfs_dir,
                                                  fsc,
                                                  &status_fops);
+
+       fsc->debugfs_metrics_dir = debugfs_create_dir("metrics",
+                                                     fsc->client->debugfs_dir);
+
+       debugfs_create_file("file", 0400, fsc->debugfs_metrics_dir, fsc,
+                           &metrics_file_fops);
+       debugfs_create_file("latency", 0400, fsc->debugfs_metrics_dir, fsc,
+                           &metrics_latency_fops);
+       debugfs_create_file("size", 0400, fsc->debugfs_metrics_dir, fsc,
+                           &metrics_size_fops);
+       debugfs_create_file("caps", 0400, fsc->debugfs_metrics_dir, fsc,
+                           &metrics_caps_fops);
 }
 
 
index 1d65934..e0fa66a 100644 (file)
@@ -157,6 +157,11 @@ static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
                ceph_mdsc_put_request(req);
                if (!inode)
                        return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE);
+       } else {
+               if (ceph_inode_is_shutdown(inode)) {
+                       iput(inode);
+                       return ERR_PTR(-ESTALE);
+               }
        }
        return inode;
 }
@@ -223,8 +228,13 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb,
                return ERR_PTR(-ESTALE);
 
        inode = ceph_find_inode(sb, vino);
-       if (inode)
+       if (inode) {
+               if (ceph_inode_is_shutdown(inode)) {
+                       iput(inode);
+                       return ERR_PTR(-ESTALE);
+               }
                return d_obtain_alias(inode);
+       }
 
        req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
                                       USE_ANY_MDS);
index b129ea5..02a0a0f 100644 (file)
@@ -525,6 +525,7 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc,
 
        if (result) {
                struct dentry *dentry = req->r_dentry;
+               struct inode *inode = d_inode(dentry);
                int pathlen = 0;
                u64 base = 0;
                char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
@@ -534,7 +535,8 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc,
                if (!d_unhashed(dentry))
                        d_drop(dentry);
 
-               /* FIXME: start returning I/O errors on all accesses? */
+               ceph_inode_shutdown(inode);
+
                pr_warn("ceph: async create failure path=(%llx)%s result=%d!\n",
                        base, IS_ERR(path) ? "<<bad>>" : path, result);
                ceph_mdsc_free_path(path, pathlen);
@@ -556,7 +558,7 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc,
                }
                ceph_kick_flushing_inode_caps(req->r_session, ci);
                spin_unlock(&ci->i_ceph_lock);
-       } else {
+       } else if (!result) {
                pr_warn("%s: no req->r_target_inode for 0x%llx\n", __func__,
                        req->r_deleg_ino);
        }
@@ -845,6 +847,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
        ssize_t ret;
        u64 off = iocb->ki_pos;
        u64 len = iov_iter_count(to);
+       u64 i_size;
 
        dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len,
             (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
@@ -868,7 +871,6 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
                struct page **pages;
                int num_pages;
                size_t page_off;
-               u64 i_size;
                bool more;
                int idx;
                size_t left;
@@ -951,11 +953,14 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
        }
 
        if (off > iocb->ki_pos) {
-               if (ret >= 0 &&
-                   iov_iter_count(to) > 0 && off >= i_size_read(inode))
+               if (off >= i_size) {
                        *retry_op = CHECK_EOF;
-               ret = off - iocb->ki_pos;
-               iocb->ki_pos = off;
+                       ret = i_size - iocb->ki_pos;
+                       iocb->ki_pos = i_size;
+               } else {
+                       ret = off - iocb->ki_pos;
+                       iocb->ki_pos = off;
+               }
        }
 
        dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
@@ -1526,6 +1531,9 @@ again:
        dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
             inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode);
 
+       if (ceph_inode_is_shutdown(inode))
+               return -ESTALE;
+
        if (direct_lock)
                ceph_start_io_direct(inode);
        else
@@ -1678,6 +1686,9 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
        loff_t pos;
        loff_t limit = max(i_size_read(inode), fsc->max_file_size);
 
+       if (ceph_inode_is_shutdown(inode))
+               return -ESTALE;
+
        if (ceph_snap(inode) != CEPH_NOSNAP)
                return -EROFS;
 
@@ -2200,6 +2211,54 @@ static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode,
        return 0;
 }
 
+static struct ceph_osd_request *
+ceph_alloc_copyfrom_request(struct ceph_osd_client *osdc,
+                           u64 src_snapid,
+                           struct ceph_object_id *src_oid,
+                           struct ceph_object_locator *src_oloc,
+                           struct ceph_object_id *dst_oid,
+                           struct ceph_object_locator *dst_oloc,
+                           u32 truncate_seq, u64 truncate_size)
+{
+       struct ceph_osd_request *req;
+       int ret;
+       u32 src_fadvise_flags =
+               CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
+               CEPH_OSD_OP_FLAG_FADVISE_NOCACHE;
+       u32 dst_fadvise_flags =
+               CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
+               CEPH_OSD_OP_FLAG_FADVISE_DONTNEED;
+
+       req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL);
+       if (!req)
+               return ERR_PTR(-ENOMEM);
+
+       req->r_flags = CEPH_OSD_FLAG_WRITE;
+
+       ceph_oloc_copy(&req->r_t.base_oloc, dst_oloc);
+       ceph_oid_copy(&req->r_t.base_oid, dst_oid);
+
+       ret = osd_req_op_copy_from_init(req, src_snapid, 0,
+                                       src_oid, src_oloc,
+                                       src_fadvise_flags,
+                                       dst_fadvise_flags,
+                                       truncate_seq,
+                                       truncate_size,
+                                       CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ);
+       if (ret)
+               goto out;
+
+       ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
+       if (ret)
+               goto out;
+
+       return req;
+
+out:
+       ceph_osdc_put_request(req);
+       return ERR_PTR(ret);
+}
+
 static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off,
                                    struct ceph_inode_info *dst_ci, u64 *dst_off,
                                    struct ceph_fs_client *fsc,
@@ -2207,6 +2266,8 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
 {
        struct ceph_object_locator src_oloc, dst_oloc;
        struct ceph_object_id src_oid, dst_oid;
+       struct ceph_osd_client *osdc;
+       struct ceph_osd_request *req;
        size_t bytes = 0;
        u64 src_objnum, src_objoff, dst_objnum, dst_objoff;
        u32 src_objlen, dst_objlen;
@@ -2217,6 +2278,7 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
        src_oloc.pool_ns = ceph_try_get_string(src_ci->i_layout.pool_ns);
        dst_oloc.pool = dst_ci->i_layout.pool_id;
        dst_oloc.pool_ns = ceph_try_get_string(dst_ci->i_layout.pool_ns);
+       osdc = &fsc->client->osdc;
 
        while (len >= object_size) {
                ceph_calc_file_object_mapping(&src_ci->i_layout, *src_off,
@@ -2232,17 +2294,22 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
                ceph_oid_printf(&dst_oid, "%llx.%08llx",
                                dst_ci->i_vino.ino, dst_objnum);
                /* Do an object remote copy */
-               ret = ceph_osdc_copy_from(&fsc->client->osdc,
-                                         src_ci->i_vino.snap, 0,
-                                         &src_oid, &src_oloc,
-                                         CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
-                                         CEPH_OSD_OP_FLAG_FADVISE_NOCACHE,
-                                         &dst_oid, &dst_oloc,
-                                         CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
-                                         CEPH_OSD_OP_FLAG_FADVISE_DONTNEED,
-                                         dst_ci->i_truncate_seq,
-                                         dst_ci->i_truncate_size,
-                                         CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ);
+               req = ceph_alloc_copyfrom_request(osdc, src_ci->i_vino.snap,
+                                                 &src_oid, &src_oloc,
+                                                 &dst_oid, &dst_oloc,
+                                                 dst_ci->i_truncate_seq,
+                                                 dst_ci->i_truncate_size);
+               if (IS_ERR(req))
+                       ret = PTR_ERR(req);
+               else {
+                       ceph_osdc_start_request(osdc, req, false);
+                       ret = ceph_osdc_wait_request(osdc, req);
+                       ceph_update_copyfrom_metrics(&fsc->mdsc->metric,
+                                                    req->r_start_latency,
+                                                    req->r_end_latency,
+                                                    object_size, ret);
+                       ceph_osdc_put_request(req);
+               }
                if (ret) {
                        if (ret == -EOPNOTSUPP) {
                                fsc->have_copy_from2 = false;
index 1c75741..e3322fc 100644 (file)
@@ -1841,15 +1841,14 @@ void ceph_queue_inode_work(struct inode *inode, int work_bit)
 static void ceph_do_invalidate_pages(struct inode *inode)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        u32 orig_gen;
        int check = 0;
 
        mutex_lock(&ci->i_truncate_mutex);
 
-       if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
-               pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n",
-                                   inode, ceph_ino(inode));
+       if (ceph_inode_is_shutdown(inode)) {
+               pr_warn_ratelimited("%s: inode %llx.%llx is shut down\n",
+                                   __func__, ceph_vinop(inode));
                mapping_set_error(inode->i_mapping, -EIO);
                truncate_pagecache(inode, 0);
                mutex_unlock(&ci->i_truncate_mutex);
@@ -1871,7 +1870,8 @@ static void ceph_do_invalidate_pages(struct inode *inode)
 
        ceph_fscache_invalidate(inode);
        if (invalidate_inode_pages2(inode->i_mapping) < 0) {
-               pr_err("invalidate_pages %p fails\n", inode);
+               pr_err("invalidate_inode_pages2 %llx.%llx failed\n",
+                      ceph_vinop(inode));
        }
 
        spin_lock(&ci->i_ceph_lock);
@@ -2103,12 +2103,14 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                loff_t isize = i_size_read(inode);
 
                dout("setattr %p size %lld -> %lld\n", inode, isize, attr->ia_size);
-               if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size > isize) {
-                       i_size_write(inode, attr->ia_size);
-                       inode->i_blocks = calc_inode_blocks(attr->ia_size);
-                       ci->i_reported_size = attr->ia_size;
-                       dirtied |= CEPH_CAP_FILE_EXCL;
-                       ia_valid |= ATTR_MTIME;
+               if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) {
+                       if (attr->ia_size > isize) {
+                               i_size_write(inode, attr->ia_size);
+                               inode->i_blocks = calc_inode_blocks(attr->ia_size);
+                               ci->i_reported_size = attr->ia_size;
+                               dirtied |= CEPH_CAP_FILE_EXCL;
+                               ia_valid |= ATTR_MTIME;
+                       }
                } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
                           attr->ia_size != isize) {
                        req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
@@ -2217,6 +2219,9 @@ int ceph_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
        if (ceph_snap(inode) != CEPH_NOSNAP)
                return -EROFS;
 
+       if (ceph_inode_is_shutdown(inode))
+               return -ESTALE;
+
        err = setattr_prepare(&init_user_ns, dentry, attr);
        if (err != 0)
                return err;
@@ -2347,6 +2352,9 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path,
        u32 valid_mask = STATX_BASIC_STATS;
        int err = 0;
 
+       if (ceph_inode_is_shutdown(inode))
+               return -ESTALE;
+
        /* Skip the getattr altogether if we're asked not to sync */
        if (!(flags & AT_STATX_DONT_SYNC)) {
                err = ceph_do_getattr(inode,
@@ -2394,3 +2402,27 @@ int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path,
        stat->result_mask = request_mask & valid_mask;
        return err;
 }
+
+void ceph_inode_shutdown(struct inode *inode)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       struct rb_node *p;
+       int iputs = 0;
+       bool invalidate = false;
+
+       spin_lock(&ci->i_ceph_lock);
+       ci->i_ceph_flags |= CEPH_I_SHUTDOWN;
+       p = rb_first(&ci->i_caps);
+       while (p) {
+               struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
+
+               p = rb_next(p);
+               iputs += ceph_purge_inode_cap(inode, cap, &invalidate);
+       }
+       spin_unlock(&ci->i_ceph_lock);
+
+       if (invalidate)
+               ceph_queue_invalidate(inode);
+       while (iputs--)
+               iput(inode);
+}
index d8c3106..d1f154a 100644 (file)
@@ -241,6 +241,9 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
        if (!(fl->fl_flags & FL_POSIX))
                return -ENOLCK;
 
+       if (ceph_inode_is_shutdown(inode))
+               return -ESTALE;
+
        dout("ceph_lock, fl_owner: %p\n", fl->fl_owner);
 
        /* set wait bit as appropriate, then make command as Ceph expects it*/
@@ -303,6 +306,9 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
        if (!(fl->fl_flags & FL_FLOCK))
                return -ENOLCK;
 
+       if (ceph_inode_is_shutdown(inode))
+               return -ESTALE;
+
        dout("ceph_flock, fl_file: %p\n", fl->fl_file);
 
        spin_lock(&ci->i_ceph_lock);
index d64413a..250aad3 100644 (file)
@@ -1590,129 +1590,23 @@ out:
        return ret;
 }
 
-static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
-{
-       struct ceph_inode_info *ci = ceph_inode(inode);
-       struct ceph_cap_snap *capsnap;
-       int capsnap_release = 0;
-
-       lockdep_assert_held(&ci->i_ceph_lock);
-
-       dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
-
-       while (!list_empty(&ci->i_cap_snaps)) {
-               capsnap = list_first_entry(&ci->i_cap_snaps,
-                                          struct ceph_cap_snap, ci_item);
-               __ceph_remove_capsnap(inode, capsnap, NULL, NULL);
-               ceph_put_snap_context(capsnap->context);
-               ceph_put_cap_snap(capsnap);
-               capsnap_release++;
-       }
-       wake_up_all(&ci->i_cap_wq);
-       wake_up_all(&mdsc->cap_flushing_wq);
-       return capsnap_release;
-}
-
 static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
                                  void *arg)
 {
-       struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
-       struct ceph_mds_client *mdsc = fsc->mdsc;
        struct ceph_inode_info *ci = ceph_inode(inode);
-       LIST_HEAD(to_remove);
-       bool dirty_dropped = false;
        bool invalidate = false;
-       int capsnap_release = 0;
+       int iputs;
 
        dout("removing cap %p, ci is %p, inode is %p\n",
             cap, ci, &ci->vfs_inode);
        spin_lock(&ci->i_ceph_lock);
-       __ceph_remove_cap(cap, false);
-       if (!ci->i_auth_cap) {
-               struct ceph_cap_flush *cf;
-
-               if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
-                       if (inode->i_data.nrpages > 0)
-                               invalidate = true;
-                       if (ci->i_wrbuffer_ref > 0)
-                               mapping_set_error(&inode->i_data, -EIO);
-               }
-
-               while (!list_empty(&ci->i_cap_flush_list)) {
-                       cf = list_first_entry(&ci->i_cap_flush_list,
-                                             struct ceph_cap_flush, i_list);
-                       list_move(&cf->i_list, &to_remove);
-               }
-
-               spin_lock(&mdsc->cap_dirty_lock);
-
-               list_for_each_entry(cf, &to_remove, i_list)
-                       list_del_init(&cf->g_list);
-
-               if (!list_empty(&ci->i_dirty_item)) {
-                       pr_warn_ratelimited(
-                               " dropping dirty %s state for %p %lld\n",
-                               ceph_cap_string(ci->i_dirty_caps),
-                               inode, ceph_ino(inode));
-                       ci->i_dirty_caps = 0;
-                       list_del_init(&ci->i_dirty_item);
-                       dirty_dropped = true;
-               }
-               if (!list_empty(&ci->i_flushing_item)) {
-                       pr_warn_ratelimited(
-                               " dropping dirty+flushing %s state for %p %lld\n",
-                               ceph_cap_string(ci->i_flushing_caps),
-                               inode, ceph_ino(inode));
-                       ci->i_flushing_caps = 0;
-                       list_del_init(&ci->i_flushing_item);
-                       mdsc->num_cap_flushing--;
-                       dirty_dropped = true;
-               }
-               spin_unlock(&mdsc->cap_dirty_lock);
-
-               if (dirty_dropped) {
-                       mapping_set_error(inode->i_mapping, -EIO);
-
-                       if (ci->i_wrbuffer_ref_head == 0 &&
-                           ci->i_wr_ref == 0 &&
-                           ci->i_dirty_caps == 0 &&
-                           ci->i_flushing_caps == 0) {
-                               ceph_put_snap_context(ci->i_head_snapc);
-                               ci->i_head_snapc = NULL;
-                       }
-               }
-
-               if (atomic_read(&ci->i_filelock_ref) > 0) {
-                       /* make further file lock syscall return -EIO */
-                       ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
-                       pr_warn_ratelimited(" dropping file locks for %p %lld\n",
-                                           inode, ceph_ino(inode));
-               }
-
-               if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
-                       list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
-                       ci->i_prealloc_cap_flush = NULL;
-               }
-
-               if (!list_empty(&ci->i_cap_snaps))
-                       capsnap_release = remove_capsnaps(mdsc, inode);
-       }
+       iputs = ceph_purge_inode_cap(inode, cap, &invalidate);
        spin_unlock(&ci->i_ceph_lock);
-       while (!list_empty(&to_remove)) {
-               struct ceph_cap_flush *cf;
-               cf = list_first_entry(&to_remove,
-                                     struct ceph_cap_flush, i_list);
-               list_del_init(&cf->i_list);
-               if (!cf->is_capsnap)
-                       ceph_free_cap_flush(cf);
-       }
 
        wake_up_all(&ci->i_cap_wq);
        if (invalidate)
                ceph_queue_invalidate(inode);
-       if (dirty_dropped)
-               iput(inode);
-       while (capsnap_release--)
+       while (iputs--)
                iput(inode);
        return 0;
 }
@@ -3467,9 +3361,14 @@ static void handle_session(struct ceph_mds_session *session,
 
        if (msg_version >= 3) {
                u32 len;
-               /* version >= 2, metadata */
-               if (__decode_session_metadata(&p, end, &blocklisted) < 0)
+               /* version >= 2 and < 5, decode metadata, skip otherwise
+                * as it's handled via flags.
+                */
+               if (msg_version >= 5)
+                       ceph_decode_skip_map(&p, end, string, string, bad);
+               else if (__decode_session_metadata(&p, end, &blocklisted) < 0)
                        goto bad;
+
                /* version >= 3, feature bits */
                ceph_decode_32_safe(&p, end, len, bad);
                if (len) {
@@ -3478,6 +3377,18 @@ static void handle_session(struct ceph_mds_session *session,
                }
        }
 
+       if (msg_version >= 5) {
+               u32 flags;
+               /* version >= 4, struct_v, struct_cv, len, metric_spec */
+               ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 2, bad);
+               /* version >= 5, flags   */
+                ceph_decode_32_safe(&p, end, flags, bad);
+               if (flags & CEPH_SESSION_BLOCKLISTED) {
+                       pr_warn("mds%d session blocklisted\n", session->s_mds);
+                       blocklisted = true;
+               }
+       }
+
        mutex_lock(&mdsc->mutex);
        if (op == CEPH_SESSION_CLOSE) {
                ceph_get_mds_session(session);
@@ -5072,7 +4983,8 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
        return;
 
 bad:
-       pr_err("error decoding fsmap\n");
+       pr_err("error decoding fsmap %d. Shutting down mount.\n", err);
+       ceph_umount_begin(mdsc->fsc->sb);
 err_out:
        mutex_lock(&mdsc->mutex);
        mdsc->mdsmap_err = err;
@@ -5139,7 +5051,8 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 bad_unlock:
        mutex_unlock(&mdsc->mutex);
 bad:
-       pr_err("error decoding mdsmap %d\n", err);
+       pr_err("error decoding mdsmap %d. Shutting down mount.\n", err);
+       ceph_umount_begin(mdsc->fsc->sb);
        return;
 }
 
index 61d67cb..3038773 100644 (file)
@@ -263,10 +263,6 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
                                goto nomem;
                        for (j = 0; j < num_export_targets; j++) {
                                target = ceph_decode_32(&pexport_targets);
-                               if (target >= m->possible_max_rank) {
-                                       err = -EIO;
-                                       goto corrupt;
-                               }
                                info->export_targets[j] = target;
                        }
                } else {
index 04d5df2..c57699d 100644 (file)
@@ -62,7 +62,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
        read->header.ver = 1;
        read->header.compat = 1;
        read->header.data_len = cpu_to_le32(sizeof(*read) - header_len);
-       sum = m->read_latency_sum;
+       sum = m->metric[METRIC_READ].latency_sum;
        jiffies_to_timespec64(sum, &ts);
        read->sec = cpu_to_le32(ts.tv_sec);
        read->nsec = cpu_to_le32(ts.tv_nsec);
@@ -74,7 +74,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
        write->header.ver = 1;
        write->header.compat = 1;
        write->header.data_len = cpu_to_le32(sizeof(*write) - header_len);
-       sum = m->write_latency_sum;
+       sum = m->metric[METRIC_WRITE].latency_sum;
        jiffies_to_timespec64(sum, &ts);
        write->sec = cpu_to_le32(ts.tv_sec);
        write->nsec = cpu_to_le32(ts.tv_nsec);
@@ -86,7 +86,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
        meta->header.ver = 1;
        meta->header.compat = 1;
        meta->header.data_len = cpu_to_le32(sizeof(*meta) - header_len);
-       sum = m->metadata_latency_sum;
+       sum = m->metric[METRIC_METADATA].latency_sum;
        jiffies_to_timespec64(sum, &ts);
        meta->sec = cpu_to_le32(ts.tv_sec);
        meta->nsec = cpu_to_le32(ts.tv_nsec);
@@ -141,8 +141,8 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
        rsize->header.ver = 1;
        rsize->header.compat = 1;
        rsize->header.data_len = cpu_to_le32(sizeof(*rsize) - header_len);
-       rsize->total_ops = cpu_to_le64(m->total_reads);
-       rsize->total_size = cpu_to_le64(m->read_size_sum);
+       rsize->total_ops = cpu_to_le64(m->metric[METRIC_READ].total);
+       rsize->total_size = cpu_to_le64(m->metric[METRIC_READ].size_sum);
        items++;
 
        /* encode the write io size metric */
@@ -151,8 +151,8 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
        wsize->header.ver = 1;
        wsize->header.compat = 1;
        wsize->header.data_len = cpu_to_le32(sizeof(*wsize) - header_len);
-       wsize->total_ops = cpu_to_le64(m->total_writes);
-       wsize->total_size = cpu_to_le64(m->write_size_sum);
+       wsize->total_ops = cpu_to_le64(m->metric[METRIC_WRITE].total);
+       wsize->total_size = cpu_to_le64(m->metric[METRIC_WRITE].size_sum);
        items++;
 
        put_unaligned_le32(items, &head->num);
@@ -220,7 +220,8 @@ static void metric_delayed_work(struct work_struct *work)
 
 int ceph_metric_init(struct ceph_client_metric *m)
 {
-       int ret;
+       struct ceph_metric *metric;
+       int ret, i;
 
        if (!m)
                return -EINVAL;
@@ -243,32 +244,18 @@ int ceph_metric_init(struct ceph_client_metric *m)
        if (ret)
                goto err_i_caps_mis;
 
-       spin_lock_init(&m->read_metric_lock);
-       m->read_latency_sq_sum = 0;
-       m->read_latency_min = KTIME_MAX;
-       m->read_latency_max = 0;
-       m->total_reads = 0;
-       m->read_latency_sum = 0;
-       m->read_size_min = U64_MAX;
-       m->read_size_max = 0;
-       m->read_size_sum = 0;
-
-       spin_lock_init(&m->write_metric_lock);
-       m->write_latency_sq_sum = 0;
-       m->write_latency_min = KTIME_MAX;
-       m->write_latency_max = 0;
-       m->total_writes = 0;
-       m->write_latency_sum = 0;
-       m->write_size_min = U64_MAX;
-       m->write_size_max = 0;
-       m->write_size_sum = 0;
-
-       spin_lock_init(&m->metadata_metric_lock);
-       m->metadata_latency_sq_sum = 0;
-       m->metadata_latency_min = KTIME_MAX;
-       m->metadata_latency_max = 0;
-       m->total_metadatas = 0;
-       m->metadata_latency_sum = 0;
+       for (i = 0; i < METRIC_MAX; i++) {
+               metric = &m->metric[i];
+               spin_lock_init(&metric->lock);
+               metric->size_sum = 0;
+               metric->size_min = U64_MAX;
+               metric->size_max = 0;
+               metric->total = 0;
+               metric->latency_sum = 0;
+               metric->latency_sq_sum = 0;
+               metric->latency_min = KTIME_MAX;
+               metric->latency_max = 0;
+       }
 
        atomic64_set(&m->opened_files, 0);
        ret = percpu_counter_init(&m->opened_inodes, 0, GFP_KERNEL);
@@ -338,9 +325,9 @@ static inline void __update_stdev(ktime_t total, ktime_t lsum,
        *sq_sump += sq;
 }
 
-void ceph_update_read_metrics(struct ceph_client_metric *m,
-                             ktime_t r_start, ktime_t r_end,
-                             unsigned int size, int rc)
+void ceph_update_metrics(struct ceph_metric *m,
+                        ktime_t r_start, ktime_t r_end,
+                        unsigned int size, int rc)
 {
        ktime_t lat = ktime_sub(r_end, r_start);
        ktime_t total;
@@ -348,63 +335,12 @@ void ceph_update_read_metrics(struct ceph_client_metric *m,
        if (unlikely(rc < 0 && rc != -ENOENT && rc != -ETIMEDOUT))
                return;
 
-       spin_lock(&m->read_metric_lock);
-       total = ++m->total_reads;
-       m->read_size_sum += size;
-       m->read_latency_sum += lat;
-       METRIC_UPDATE_MIN_MAX(m->read_size_min,
-                             m->read_size_max,
-                             size);
-       METRIC_UPDATE_MIN_MAX(m->read_latency_min,
-                             m->read_latency_max,
-                             lat);
-       __update_stdev(total, m->read_latency_sum,
-                      &m->read_latency_sq_sum, lat);
-       spin_unlock(&m->read_metric_lock);
-}
-
-void ceph_update_write_metrics(struct ceph_client_metric *m,
-                              ktime_t r_start, ktime_t r_end,
-                              unsigned int size, int rc)
-{
-       ktime_t lat = ktime_sub(r_end, r_start);
-       ktime_t total;
-
-       if (unlikely(rc && rc != -ETIMEDOUT))
-               return;
-
-       spin_lock(&m->write_metric_lock);
-       total = ++m->total_writes;
-       m->write_size_sum += size;
-       m->write_latency_sum += lat;
-       METRIC_UPDATE_MIN_MAX(m->write_size_min,
-                             m->write_size_max,
-                             size);
-       METRIC_UPDATE_MIN_MAX(m->write_latency_min,
-                             m->write_latency_max,
-                             lat);
-       __update_stdev(total, m->write_latency_sum,
-                      &m->write_latency_sq_sum, lat);
-       spin_unlock(&m->write_metric_lock);
-}
-
-void ceph_update_metadata_metrics(struct ceph_client_metric *m,
-                                 ktime_t r_start, ktime_t r_end,
-                                 int rc)
-{
-       ktime_t lat = ktime_sub(r_end, r_start);
-       ktime_t total;
-
-       if (unlikely(rc && rc != -ENOENT))
-               return;
-
-       spin_lock(&m->metadata_metric_lock);
-       total = ++m->total_metadatas;
-       m->metadata_latency_sum += lat;
-       METRIC_UPDATE_MIN_MAX(m->metadata_latency_min,
-                             m->metadata_latency_max,
-                             lat);
-       __update_stdev(total, m->metadata_latency_sum,
-                      &m->metadata_latency_sq_sum, lat);
-       spin_unlock(&m->metadata_metric_lock);
+       spin_lock(&m->lock);
+       total = ++m->total;
+       m->size_sum += size;
+       METRIC_UPDATE_MIN_MAX(m->size_min, m->size_max, size);
+       m->latency_sum += lat;
+       METRIC_UPDATE_MIN_MAX(m->latency_min, m->latency_max, lat);
+       __update_stdev(total, m->latency_sum, &m->latency_sq_sum, lat);
+       spin_unlock(&m->lock);
 }
index 0133955..bb45608 100644 (file)
@@ -125,6 +125,26 @@ struct ceph_metric_head {
        __le32 num;     /* the number of metrics that will be sent */
 } __packed;
 
+enum metric_type {
+       METRIC_READ,
+       METRIC_WRITE,
+       METRIC_METADATA,
+       METRIC_COPYFROM,
+       METRIC_MAX
+};
+
+struct ceph_metric {
+       spinlock_t lock;
+       u64 total;
+       u64 size_sum;
+       u64 size_min;
+       u64 size_max;
+       ktime_t latency_sum;
+       ktime_t latency_sq_sum;
+       ktime_t latency_min;
+       ktime_t latency_max;
+};
+
 /* This is the global metrics */
 struct ceph_client_metric {
        atomic64_t            total_dentries;
@@ -135,32 +155,7 @@ struct ceph_client_metric {
        struct percpu_counter i_caps_hit;
        struct percpu_counter i_caps_mis;
 
-       spinlock_t read_metric_lock;
-       u64 total_reads;
-       u64 read_size_sum;
-       u64 read_size_min;
-       u64 read_size_max;
-       ktime_t read_latency_sum;
-       ktime_t read_latency_sq_sum;
-       ktime_t read_latency_min;
-       ktime_t read_latency_max;
-
-       spinlock_t write_metric_lock;
-       u64 total_writes;
-       u64 write_size_sum;
-       u64 write_size_min;
-       u64 write_size_max;
-       ktime_t write_latency_sum;
-       ktime_t write_latency_sq_sum;
-       ktime_t write_latency_min;
-       ktime_t write_latency_max;
-
-       spinlock_t metadata_metric_lock;
-       u64 total_metadatas;
-       ktime_t metadata_latency_sum;
-       ktime_t metadata_latency_sq_sum;
-       ktime_t metadata_latency_min;
-       ktime_t metadata_latency_max;
+       struct ceph_metric metric[METRIC_MAX];
 
        /* The total number of directories and files that are opened */
        atomic64_t opened_files;
@@ -195,13 +190,36 @@ static inline void ceph_update_cap_mis(struct ceph_client_metric *m)
        percpu_counter_inc(&m->i_caps_mis);
 }
 
-extern void ceph_update_read_metrics(struct ceph_client_metric *m,
-                                    ktime_t r_start, ktime_t r_end,
-                                    unsigned int size, int rc);
-extern void ceph_update_write_metrics(struct ceph_client_metric *m,
-                                     ktime_t r_start, ktime_t r_end,
-                                     unsigned int size, int rc);
-extern void ceph_update_metadata_metrics(struct ceph_client_metric *m,
-                                        ktime_t r_start, ktime_t r_end,
-                                        int rc);
+extern void ceph_update_metrics(struct ceph_metric *m,
+                               ktime_t r_start, ktime_t r_end,
+                               unsigned int size, int rc);
+
+static inline void ceph_update_read_metrics(struct ceph_client_metric *m,
+                                           ktime_t r_start, ktime_t r_end,
+                                           unsigned int size, int rc)
+{
+       ceph_update_metrics(&m->metric[METRIC_READ],
+                           r_start, r_end, size, rc);
+}
+static inline void ceph_update_write_metrics(struct ceph_client_metric *m,
+                                            ktime_t r_start, ktime_t r_end,
+                                            unsigned int size, int rc)
+{
+       ceph_update_metrics(&m->metric[METRIC_WRITE],
+                           r_start, r_end, size, rc);
+}
+static inline void ceph_update_metadata_metrics(struct ceph_client_metric *m,
+                                               ktime_t r_start, ktime_t r_end,
+                                               int rc)
+{
+       ceph_update_metrics(&m->metric[METRIC_METADATA],
+                           r_start, r_end, 0, rc);
+}
+static inline void ceph_update_copyfrom_metrics(struct ceph_client_metric *m,
+                                               ktime_t r_start, ktime_t r_end,
+                                               unsigned int size, int rc)
+{
+       ceph_update_metrics(&m->metric[METRIC_COPYFROM],
+                           r_start, r_end, size, rc);
+}
 #endif /* _FS_CEPH_MDS_METRIC_H */
index fd8742b..bab6123 100644 (file)
@@ -52,8 +52,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
        struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry));
        struct ceph_mon_client *monc = &fsc->client->monc;
        struct ceph_statfs st;
-       u64 fsid;
-       int err;
+       int i, err;
        u64 data_pool;
 
        if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) {
@@ -99,12 +98,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_namelen = NAME_MAX;
 
        /* Must convert the fsid, for consistent values across arches */
+       buf->f_fsid.val[0] = 0;
        mutex_lock(&monc->mutex);
-       fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^
-              le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1));
+       for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i)
+               buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]);
        mutex_unlock(&monc->mutex);
 
-       buf->f_fsid = u64_to_fsid(fsid);
+       /* fold the fs_cluster_id into the upper bits */
+       buf->f_fsid.val[1] = monc->fs_cluster_id;
 
        return 0;
 }
@@ -577,8 +578,8 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
        if (fsopt->flags & CEPH_MOUNT_OPT_CLEANRECOVER)
                seq_show_option(m, "recover_session", "clean");
 
-       if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)
-               seq_puts(m, ",nowsync");
+       if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
+               seq_puts(m, ",wsync");
 
        if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
                seq_printf(m, ",wsize=%u", fsopt->wsize);
@@ -842,7 +843,7 @@ static void __ceph_umount_begin(struct ceph_fs_client *fsc)
  * ceph_umount_begin - initiate forced umount.  Tear down the
  * mount, skipping steps that may hang while waiting for server(s).
  */
-static void ceph_umount_begin(struct super_block *sb)
+void ceph_umount_begin(struct super_block *sb)
 {
        struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
 
index 14f951c..ac331aa 100644 (file)
@@ -48,7 +48,8 @@
 
 #define CEPH_MOUNT_OPT_DEFAULT                 \
        (CEPH_MOUNT_OPT_DCACHE |                \
-        CEPH_MOUNT_OPT_NOCOPYFROM)
+        CEPH_MOUNT_OPT_NOCOPYFROM |            \
+        CEPH_MOUNT_OPT_ASYNC_DIROPS)
 
 #define ceph_set_mount_opt(fsc, opt) \
        (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt
@@ -128,9 +129,9 @@ struct ceph_fs_client {
        struct dentry *debugfs_congestion_kb;
        struct dentry *debugfs_bdi;
        struct dentry *debugfs_mdsc, *debugfs_mdsmap;
-       struct dentry *debugfs_metric;
        struct dentry *debugfs_status;
        struct dentry *debugfs_mds_sessions;
+       struct dentry *debugfs_metrics_dir;
 #endif
 
 #ifdef CONFIG_CEPH_FSCACHE
@@ -580,6 +581,7 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 #define CEPH_I_ODIRECT         (1 << 11) /* inode in direct I/O mode */
 #define CEPH_ASYNC_CREATE_BIT  (12)      /* async create in flight for this */
 #define CEPH_I_ASYNC_CREATE    (1 << CEPH_ASYNC_CREATE_BIT)
+#define CEPH_I_SHUTDOWN                (1 << 13) /* inode is no longer usable */
 
 /*
  * Masks of ceph inode work.
@@ -939,6 +941,7 @@ extern void ceph_put_snapid_map(struct ceph_mds_client* mdsc,
                                struct ceph_snapid_map *sm);
 extern void ceph_trim_snapid_map(struct ceph_mds_client *mdsc);
 extern void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc);
+void ceph_umount_begin(struct super_block *sb);
 
 
 /*
@@ -1027,6 +1030,16 @@ extern int ceph_setattr(struct user_namespace *mnt_userns,
 extern int ceph_getattr(struct user_namespace *mnt_userns,
                        const struct path *path, struct kstat *stat,
                        u32 request_mask, unsigned int flags);
+void ceph_inode_shutdown(struct inode *inode);
+
+static inline bool ceph_inode_is_shutdown(struct inode *inode)
+{
+       unsigned long flags = READ_ONCE(ceph_inode(inode)->i_ceph_flags);
+       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+       int state = READ_ONCE(fsc->mount_state);
+
+       return (flags & CEPH_I_SHUTDOWN) || state >= CEPH_MOUNT_SHUTDOWN;
+}
 
 /* xattr.c */
 int __ceph_setxattr(struct inode *, const char *, const void *, size_t, int);
@@ -1198,6 +1211,7 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
 extern int ceph_uninline_data(struct file *filp, struct page *locked_page);
 extern int ceph_pool_perm_check(struct inode *inode, int need);
 extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);
+int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate);
 
 /* file.c */
 extern const struct file_operations ceph_file_fops;
index de2c12b..d282caf 100644 (file)
@@ -271,7 +271,8 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
        c = 0;
        spin_lock(&cifs_tcp_ses_lock);
        list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
-               if (server->is_channel)
+               /* channel info will be printed as a part of sessions below */
+               if (CIFS_SERVER_IS_CHAN(server))
                        continue;
 
                c++;
@@ -358,6 +359,8 @@ skip_rdma:
                        seq_printf(m, " signed");
                if (server->posix_ext_supported)
                        seq_printf(m, " posix");
+               if (server->nosharesock)
+                       seq_printf(m, " nosharesock");
 
                if (server->rdma)
                        seq_printf(m, "\nRDMA ");
@@ -412,12 +415,14 @@ skip_rdma:
                                   from_kuid(&init_user_ns, ses->linux_uid),
                                   from_kuid(&init_user_ns, ses->cred_uid));
 
+                       spin_lock(&ses->chan_lock);
                        if (ses->chan_count > 1) {
                                seq_printf(m, "\n\n\tExtra Channels: %zu ",
                                           ses->chan_count-1);
                                for (j = 1; j < ses->chan_count; j++)
                                        cifs_dump_channel(m, j, &ses->chans[j]);
                        }
+                       spin_unlock(&ses->chan_lock);
 
                        seq_puts(m, "\n\n\tShares: ");
                        j = 0;
index 007427b..b0864da 100644 (file)
@@ -307,12 +307,8 @@ static struct vfsmount *cifs_dfs_do_mount(struct dentry *mntpt,
 static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
 {
        struct cifs_sb_info *cifs_sb;
-       struct cifs_ses *ses;
-       struct cifs_tcon *tcon;
        void *page;
-       char *full_path, *root_path;
-       unsigned int xid;
-       int rc;
+       char *full_path;
        struct vfsmount *mnt;
 
        cifs_dbg(FYI, "in %s\n", __func__);
@@ -324,8 +320,6 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
         * the double backslashes usually used in the UNC. This function
         * gives us the latter, so we must adjust the result.
         */
-       mnt = ERR_PTR(-ENOMEM);
-
        cifs_sb = CIFS_SB(mntpt->d_sb);
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) {
                mnt = ERR_PTR(-EREMOTE);
@@ -341,60 +335,11 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt)
        }
 
        convert_delimiter(full_path, '\\');
-
        cifs_dbg(FYI, "%s: full_path: %s\n", __func__, full_path);
 
-       if (!cifs_sb_master_tlink(cifs_sb)) {
-               cifs_dbg(FYI, "%s: master tlink is NULL\n", __func__);
-               goto free_full_path;
-       }
-
-       tcon = cifs_sb_master_tcon(cifs_sb);
-       if (!tcon) {
-               cifs_dbg(FYI, "%s: master tcon is NULL\n", __func__);
-               goto free_full_path;
-       }
-
-       root_path = kstrdup(tcon->treeName, GFP_KERNEL);
-       if (!root_path) {
-               mnt = ERR_PTR(-ENOMEM);
-               goto free_full_path;
-       }
-       cifs_dbg(FYI, "%s: root path: %s\n", __func__, root_path);
-
-       ses = tcon->ses;
-       xid = get_xid();
-
-       /*
-        * If DFS root has been expired, then unconditionally fetch it again to
-        * refresh DFS referral cache.
-        */
-       rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
-                           root_path + 1, NULL, NULL);
-       if (!rc) {
-               rc = dfs_cache_find(xid, ses, cifs_sb->local_nls,
-                                   cifs_remap(cifs_sb), full_path + 1,
-                                   NULL, NULL);
-       }
-
-       free_xid(xid);
-
-       if (rc) {
-               mnt = ERR_PTR(rc);
-               goto free_root_path;
-       }
-       /*
-        * OK - we were able to get and cache a referral for @full_path.
-        *
-        * Now, pass it down to cifs_mount() and it will retry every available
-        * node server in case of failures - no need to do it here.
-        */
        mnt = cifs_dfs_do_mount(mntpt, cifs_sb, full_path);
-       cifs_dbg(FYI, "%s: cifs_dfs_do_mount:%s , mnt:%p\n", __func__,
-                full_path + 1, mnt);
+       cifs_dbg(FYI, "%s: cifs_dfs_do_mount:%s , mnt:%p\n", __func__, full_path + 1, mnt);
 
-free_root_path:
-       kfree(root_path);
 free_full_path:
        free_dentry_path(page);
 cdda_exit:
index f974075..013a4bd 100644 (file)
@@ -61,11 +61,6 @@ struct cifs_sb_info {
        /* only used when CIFS_MOUNT_USE_PREFIX_PATH is set */
        char *prepath;
 
-       /*
-        * Canonical DFS path initially provided by the mount call. We might connect to something
-        * different via DFS but we want to keep it to do failover properly.
-        */
-       char *origin_fullpath; /* \\HOST\SHARE\[OPTIONAL PATH] */
        /* randomly generated 128-bit number for indexing dfs mount groups in referral cache */
        uuid_t dfs_mount_id;
        /*
index abff31d..be74606 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
+#include <linux/utsname.h>
 #include "cifs_fs_sb.h"
 #include "cifsacl.h"
 #include <crypto/internal/hash.h>
@@ -75,7 +76,8 @@
 #define SMB_ECHO_INTERVAL_MAX 600
 #define SMB_ECHO_INTERVAL_DEFAULT 60
 
-/* dns resolution interval in seconds */
+/* dns resolution intervals in seconds */
+#define SMB_DNS_RESOLVE_INTERVAL_MIN     120
 #define SMB_DNS_RESOLVE_INTERVAL_DEFAULT 600
 
 /* maximum number of PDUs in one compound */
 #define XATTR_DOS_ATTRIB "user.DOSATTRIB"
 #endif
 
+#define CIFS_MAX_WORKSTATION_LEN  (__NEW_UTS_LEN + 1)  /* reasonable max for client */
+
 /*
  * CIFS vfs client Status information (based on what we know.)
  */
@@ -592,6 +596,7 @@ struct TCP_Server_Info {
        struct list_head pending_mid_q;
        bool noblocksnd;                /* use blocking sendmsg */
        bool noautotune;                /* do not autotune send buf sizes */
+       bool nosharesock;
        bool tcp_nodelay;
        unsigned int credits;  /* send no more requests at once */
        unsigned int max_credits; /* can override large 32000 default at mnt */
@@ -685,13 +690,34 @@ struct TCP_Server_Info {
         */
        int nr_targets;
        bool noblockcnt; /* use non-blocking connect() */
-       bool is_channel; /* if a session channel */
+
+       /*
+        * If this is a session channel,
+        * primary_server holds the ref-counted
+        * pointer to primary channel connection for the session.
+        */
+#define CIFS_SERVER_IS_CHAN(server)    (!!(server)->primary_server)
+       struct TCP_Server_Info *primary_server;
+
 #ifdef CONFIG_CIFS_SWN_UPCALL
        bool use_swn_dstaddr;
        struct sockaddr_storage swn_dstaddr;
 #endif
 #ifdef CONFIG_CIFS_DFS_UPCALL
        bool is_dfs_conn; /* if a dfs connection */
+       struct mutex refpath_lock; /* protects leaf_fullpath */
+       /*
+        * Canonical DFS full paths that were used to chase referrals in mount and reconnect.
+        *
+        * origin_fullpath: first or original referral path
+        * leaf_fullpath: last referral path (might be changed due to nested links in reconnect)
+        *
+        * current_fullpath: pointer to either origin_fullpath or leaf_fullpath
+        * NOTE: cannot be accessed outside cifs_reconnect() and smb2_reconnect()
+        *
+        * format: \\HOST\SHARE\[OPTIONAL PATH]
+        */
+       char *origin_fullpath, *leaf_fullpath, *current_fullpath;
 #endif
 };
 
@@ -908,6 +934,7 @@ struct cifs_ses {
                                   and after mount option parsing we fill it */
        char *domainName;
        char *password;
+       char *workstation_name;
        struct session_key auth_key;
        struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
        enum securityEnum sectype; /* what security flavor was specified? */
@@ -933,16 +960,21 @@ struct cifs_ses {
         * iface_lock should be taken when accessing any of these fields
         */
        spinlock_t iface_lock;
+       /* ========= begin: protected by iface_lock ======== */
        struct cifs_server_iface *iface_list;
        size_t iface_count;
        unsigned long iface_last_update; /* jiffies */
+       /* ========= end: protected by iface_lock ======== */
 
+       spinlock_t chan_lock;
+       /* ========= begin: protected by chan_lock ======== */
 #define CIFS_MAX_CHANNELS 16
        struct cifs_chan chans[CIFS_MAX_CHANNELS];
        struct cifs_chan *binding_chan;
        size_t chan_count;
        size_t chan_max;
        atomic_t chan_seq; /* round robin state */
+       /* ========= end: protected by chan_lock ======== */
 };
 
 /*
@@ -1091,7 +1123,6 @@ struct cifs_tcon {
        struct cached_fid crfid; /* Cached root fid */
        /* BB add field for back pointer to sb struct(s)? */
 #ifdef CONFIG_CIFS_DFS_UPCALL
-       char *dfs_path; /* canonical DFS path */
        struct list_head ulist; /* cache update list */
 #endif
 };
@@ -1942,4 +1973,14 @@ static inline bool is_tcon_dfs(struct cifs_tcon *tcon)
                tcon->share_flags & (SHI1005_FLAGS_DFS | SHI1005_FLAGS_DFS_ROOT);
 }
 
+static inline bool cifs_is_referral_server(struct cifs_tcon *tcon,
+                                          const struct dfs_info3_param *ref)
+{
+       /*
+        * Check if all targets are capable of handling DFS referrals as per
+        * MS-DFSC 2.2.4 RESP_GET_DFS_REFERRAL.
+        */
+       return is_tcon_dfs(tcon) || (ref && (ref->flags & DFSREF_REFERRAL_SERVER));
+}
+
 #endif /* _CIFS_GLOB_H */
index d0f85b6..f3073a6 100644 (file)
@@ -269,8 +269,9 @@ extern void cifs_close_all_deferred_files(struct cifs_tcon *cifs_tcon);
 
 extern void cifs_close_deferred_file_under_dentry(struct cifs_tcon *cifs_tcon,
                                const char *path);
-
-extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb3_fs_context *ctx);
+extern struct TCP_Server_Info *
+cifs_get_tcp_session(struct smb3_fs_context *ctx,
+                    struct TCP_Server_Info *primary_server);
 extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
                                 int from_reconnect);
 extern void cifs_put_tcon(struct cifs_tcon *tcon);
@@ -607,7 +608,7 @@ int smb2_parse_query_directory(struct cifs_tcon *tcon, struct kvec *rsp_iov,
 
 struct super_block *cifs_get_tcp_super(struct TCP_Server_Info *server);
 void cifs_put_tcp_super(struct super_block *sb);
-int update_super_prepath(struct cifs_tcon *tcon, char *prefix);
+int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix);
 char *extract_hostname(const char *unc);
 char *extract_sharename(const char *unc);
 
@@ -634,4 +635,7 @@ static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options)
                return options;
 }
 
+struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon);
+void cifs_put_tcon_super(struct super_block *sb);
+
 #endif                 /* _CIFSPROTO_H */
index 0abbff4..82577a7 100644 (file)
@@ -61,6 +61,20 @@ extern bool disable_legacy_dialects;
 /* Drop the connection to not overload the server */
 #define NUM_STATUS_IO_TIMEOUT   5
 
+struct mount_ctx {
+       struct cifs_sb_info *cifs_sb;
+       struct smb3_fs_context *fs_ctx;
+       unsigned int xid;
+       struct TCP_Server_Info *server;
+       struct cifs_ses *ses;
+       struct cifs_tcon *tcon;
+#ifdef CONFIG_CIFS_DFS_UPCALL
+       struct cifs_ses *root_ses;
+       uuid_t mount_id;
+       char *origin_fullpath, *leaf_fullpath;
+#endif
+};
+
 static int ip_connect(struct TCP_Server_Info *server);
 static int generic_ip_connect(struct TCP_Server_Info *server);
 static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
@@ -115,7 +129,7 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server)
                         * To make sure we don't use the cached entry, retry 1s
                         * after expiry.
                         */
-                       ttl = (expiry - now + 1);
+                       ttl = max_t(unsigned long, expiry - now, SMB_DNS_RESOLVE_INTERVAL_MIN) + 1;
        }
        rc = !rc ? -1 : 0;
 
@@ -148,139 +162,38 @@ static void cifs_resolve_server(struct work_struct *work)
        mutex_unlock(&server->srv_mutex);
 }
 
-#ifdef CONFIG_CIFS_DFS_UPCALL
-/* These functions must be called with server->srv_mutex held */
-static void reconn_set_next_dfs_target(struct TCP_Server_Info *server,
-                                      struct cifs_sb_info *cifs_sb,
-                                      struct dfs_cache_tgt_list *tgt_list,
-                                      struct dfs_cache_tgt_iterator **tgt_it)
-{
-       const char *name;
-       int rc;
-
-       if (!cifs_sb || !cifs_sb->origin_fullpath)
-               return;
-
-       if (!*tgt_it) {
-               *tgt_it = dfs_cache_get_tgt_iterator(tgt_list);
-       } else {
-               *tgt_it = dfs_cache_get_next_tgt(tgt_list, *tgt_it);
-               if (!*tgt_it)
-                       *tgt_it = dfs_cache_get_tgt_iterator(tgt_list);
-       }
-
-       cifs_dbg(FYI, "%s: UNC: %s\n", __func__, cifs_sb->origin_fullpath);
-
-       name = dfs_cache_get_tgt_name(*tgt_it);
-
-       kfree(server->hostname);
-
-       server->hostname = extract_hostname(name);
-       if (IS_ERR(server->hostname)) {
-               cifs_dbg(FYI,
-                        "%s: failed to extract hostname from target: %ld\n",
-                        __func__, PTR_ERR(server->hostname));
-               return;
-       }
-
-       rc = reconn_set_ipaddr_from_hostname(server);
-       if (rc) {
-               cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
-                        __func__, rc);
-       }
-}
-
-static inline int reconn_setup_dfs_targets(struct cifs_sb_info *cifs_sb,
-                                          struct dfs_cache_tgt_list *tl)
-{
-       if (!cifs_sb->origin_fullpath)
-               return -EOPNOTSUPP;
-       return dfs_cache_noreq_find(cifs_sb->origin_fullpath + 1, NULL, tl);
-}
-#endif
-
-/*
- * cifs tcp session reconnection
+/**
+ * Mark all sessions and tcons for reconnect.
  *
- * mark tcp session as reconnecting so temporarily locked
- * mark all smb sessions as reconnecting for tcp session
- * reconnect tcp session
- * wake up waiters on reconnection? - (not needed currently)
+ * @server needs to be previously set to CifsNeedReconnect.
  */
-int
-cifs_reconnect(struct TCP_Server_Info *server)
+static void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server)
 {
-       int rc = 0;
-       struct list_head *tmp, *tmp2;
        struct cifs_ses *ses;
        struct cifs_tcon *tcon;
-       struct mid_q_entry *mid_entry;
+       struct mid_q_entry *mid, *nmid;
        struct list_head retry_list;
-#ifdef CONFIG_CIFS_DFS_UPCALL
-       struct super_block *sb = NULL;
-       struct cifs_sb_info *cifs_sb = NULL;
-       struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list);
-       struct dfs_cache_tgt_iterator *tgt_it = NULL;
-#endif
+       struct TCP_Server_Info *pserver;
 
-       spin_lock(&GlobalMid_Lock);
-       server->nr_targets = 1;
-#ifdef CONFIG_CIFS_DFS_UPCALL
-       spin_unlock(&GlobalMid_Lock);
-       sb = cifs_get_tcp_super(server);
-       if (IS_ERR(sb)) {
-               rc = PTR_ERR(sb);
-               cifs_dbg(FYI, "%s: will not do DFS failover: rc = %d\n",
-                        __func__, rc);
-               sb = NULL;
-       } else {
-               cifs_sb = CIFS_SB(sb);
-               rc = reconn_setup_dfs_targets(cifs_sb, &tgt_list);
-               if (rc) {
-                       cifs_sb = NULL;
-                       if (rc != -EOPNOTSUPP) {
-                               cifs_server_dbg(VFS, "%s: no target servers for DFS failover\n",
-                                               __func__);
-                       }
-               } else {
-                       server->nr_targets = dfs_cache_get_nr_tgts(&tgt_list);
-               }
-       }
-       cifs_dbg(FYI, "%s: will retry %d target(s)\n", __func__,
-                server->nr_targets);
-       spin_lock(&GlobalMid_Lock);
-#endif
-       if (server->tcpStatus == CifsExiting) {
-               /* the demux thread will exit normally
-               next time through the loop */
-               spin_unlock(&GlobalMid_Lock);
-#ifdef CONFIG_CIFS_DFS_UPCALL
-               dfs_cache_free_tgts(&tgt_list);
-               cifs_put_tcp_super(sb);
-#endif
-               wake_up(&server->response_q);
-               return rc;
-       } else
-               server->tcpStatus = CifsNeedReconnect;
-       spin_unlock(&GlobalMid_Lock);
        server->maxBuf = 0;
        server->max_read = 0;
 
        cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
        trace_smb3_reconnect(server->CurrentMid, server->conn_id, server->hostname);
+       /*
+        * before reconnecting the tcp session, mark the smb session (uid) and the tid bad so they
+        * are not used until reconnected.
+        */
+       cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect\n", __func__);
+
+       /* If server is a channel, select the primary channel */
+       pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
 
-       /* before reconnecting the tcp session, mark the smb session (uid)
-               and the tid bad so they are not used until reconnected */
-       cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect\n",
-                __func__);
        spin_lock(&cifs_tcp_ses_lock);
-       list_for_each(tmp, &server->smb_ses_list) {
-               ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+       list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
                ses->need_reconnect = true;
-               list_for_each(tmp2, &ses->tcon_list) {
-                       tcon = list_entry(tmp2, struct cifs_tcon, tcon_list);
+               list_for_each_entry(tcon, &ses->tcon_list, tcon_list)
                        tcon->need_reconnect = true;
-               }
                if (ses->tcon_ipc)
                        ses->tcon_ipc->need_reconnect = true;
        }
@@ -290,11 +203,11 @@ cifs_reconnect(struct TCP_Server_Info *server)
        cifs_dbg(FYI, "%s: tearing down socket\n", __func__);
        mutex_lock(&server->srv_mutex);
        if (server->ssocket) {
-               cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n",
-                        server->ssocket->state, server->ssocket->flags);
+               cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n", server->ssocket->state,
+                        server->ssocket->flags);
                kernel_sock_shutdown(server->ssocket, SHUT_WR);
-               cifs_dbg(FYI, "Post shutdown state: 0x%x Flags: 0x%lx\n",
-                        server->ssocket->state, server->ssocket->flags);
+               cifs_dbg(FYI, "Post shutdown state: 0x%x Flags: 0x%lx\n", server->ssocket->state,
+                        server->ssocket->flags);
                sock_release(server->ssocket);
                server->ssocket = NULL;
        }
@@ -309,23 +222,21 @@ cifs_reconnect(struct TCP_Server_Info *server)
        INIT_LIST_HEAD(&retry_list);
        cifs_dbg(FYI, "%s: moving mids to private list\n", __func__);
        spin_lock(&GlobalMid_Lock);
-       list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
-               mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
-               kref_get(&mid_entry->refcount);
-               if (mid_entry->mid_state == MID_REQUEST_SUBMITTED)
-                       mid_entry->mid_state = MID_RETRY_NEEDED;
-               list_move(&mid_entry->qhead, &retry_list);
-               mid_entry->mid_flags |= MID_DELETED;
+       list_for_each_entry_safe(mid, nmid, &server->pending_mid_q, qhead) {
+               kref_get(&mid->refcount);
+               if (mid->mid_state == MID_REQUEST_SUBMITTED)
+                       mid->mid_state = MID_RETRY_NEEDED;
+               list_move(&mid->qhead, &retry_list);
+               mid->mid_flags |= MID_DELETED;
        }
        spin_unlock(&GlobalMid_Lock);
        mutex_unlock(&server->srv_mutex);
 
        cifs_dbg(FYI, "%s: issuing mid callbacks\n", __func__);
-       list_for_each_safe(tmp, tmp2, &retry_list) {
-               mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
-               list_del_init(&mid_entry->qhead);
-               mid_entry->callback(mid_entry);
-               cifs_mid_q_entry_release(mid_entry);
+       list_for_each_entry_safe(mid, nmid, &retry_list, qhead) {
+               list_del_init(&mid->qhead);
+               mid->callback(mid);
+               cifs_mid_q_entry_release(mid);
        }
 
        if (cifs_rdma_enabled(server)) {
@@ -333,38 +244,48 @@ cifs_reconnect(struct TCP_Server_Info *server)
                smbd_destroy(server);
                mutex_unlock(&server->srv_mutex);
        }
+}
+
+static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num_targets)
+{
+       spin_lock(&GlobalMid_Lock);
+       server->nr_targets = num_targets;
+       if (server->tcpStatus == CifsExiting) {
+               /* the demux thread will exit normally next time through the loop */
+               spin_unlock(&GlobalMid_Lock);
+               wake_up(&server->response_q);
+               return false;
+       }
+       server->tcpStatus = CifsNeedReconnect;
+       spin_unlock(&GlobalMid_Lock);
+       return true;
+}
+
+/*
+ * cifs tcp session reconnection
+ *
+ * mark tcp session as reconnecting so temporarily locked
+ * mark all smb sessions as reconnecting for tcp session
+ * reconnect tcp session
+ * wake up waiters on reconnection? - (not needed currently)
+ */
+static int __cifs_reconnect(struct TCP_Server_Info *server)
+{
+       int rc = 0;
+
+       if (!cifs_tcp_ses_needs_reconnect(server, 1))
+               return 0;
+
+       cifs_mark_tcp_ses_conns_for_reconnect(server);
 
        do {
                try_to_freeze();
-
                mutex_lock(&server->srv_mutex);
 
-
                if (!cifs_swn_set_server_dstaddr(server)) {
-#ifdef CONFIG_CIFS_DFS_UPCALL
-               if (cifs_sb && cifs_sb->origin_fullpath)
-                       /*
-                        * Set up next DFS target server (if any) for reconnect. If DFS
-                        * feature is disabled, then we will retry last server we
-                        * connected to before.
-                        */
-                       reconn_set_next_dfs_target(server, cifs_sb, &tgt_list, &tgt_it);
-               else {
-#endif
-                       /*
-                        * Resolve the hostname again to make sure that IP address is up-to-date.
-                        */
+                       /* resolve the hostname again to make sure that IP address is up-to-date */
                        rc = reconn_set_ipaddr_from_hostname(server);
-                       if (rc) {
-                               cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
-                                               __func__, rc);
-                       }
-
-#ifdef CONFIG_CIFS_DFS_UPCALL
-               }
-#endif
-
-
+                       cifs_dbg(FYI, "%s: reconn_set_ipaddr_from_hostname: rc=%d\n", __func__, rc);
                }
 
                if (cifs_rdma_enabled(server))
@@ -372,8 +293,8 @@ cifs_reconnect(struct TCP_Server_Info *server)
                else
                        rc = generic_ip_connect(server);
                if (rc) {
-                       cifs_dbg(FYI, "reconnect error %d\n", rc);
                        mutex_unlock(&server->srv_mutex);
+                       cifs_dbg(FYI, "%s: reconnect error %d\n", __func__, rc);
                        msleep(3000);
                } else {
                        atomic_inc(&tcpSesReconnectCount);
@@ -387,19 +308,128 @@ cifs_reconnect(struct TCP_Server_Info *server)
                }
        } while (server->tcpStatus == CifsNeedReconnect);
 
+       if (server->tcpStatus == CifsNeedNegotiate)
+               mod_delayed_work(cifsiod_wq, &server->echo, 0);
+
+       wake_up(&server->response_q);
+       return rc;
+}
+
 #ifdef CONFIG_CIFS_DFS_UPCALL
-       if (tgt_it) {
-               rc = dfs_cache_noreq_update_tgthint(cifs_sb->origin_fullpath + 1,
-                                                   tgt_it);
-               if (rc) {
-                       cifs_server_dbg(VFS, "%s: failed to update DFS target hint: rc = %d\n",
-                                __func__, rc);
+static int __reconnect_target_unlocked(struct TCP_Server_Info *server, const char *target)
+{
+       int rc;
+       char *hostname;
+
+       if (!cifs_swn_set_server_dstaddr(server)) {
+               if (server->hostname != target) {
+                       hostname = extract_hostname(target);
+                       if (!IS_ERR(hostname)) {
+                               kfree(server->hostname);
+                               server->hostname = hostname;
+                       } else {
+                               cifs_dbg(FYI, "%s: couldn't extract hostname or address from dfs target: %ld\n",
+                                        __func__, PTR_ERR(hostname));
+                               cifs_dbg(FYI, "%s: default to last target server: %s\n", __func__,
+                                        server->hostname);
+                       }
                }
-               dfs_cache_free_tgts(&tgt_list);
+               /* resolve the hostname again to make sure that IP address is up-to-date. */
+               rc = reconn_set_ipaddr_from_hostname(server);
+               cifs_dbg(FYI, "%s: reconn_set_ipaddr_from_hostname: rc=%d\n", __func__, rc);
        }
+       /* Reconnect the socket */
+       if (cifs_rdma_enabled(server))
+               rc = smbd_reconnect(server);
+       else
+               rc = generic_ip_connect(server);
 
-       cifs_put_tcp_super(sb);
-#endif
+       return rc;
+}
+
+static int reconnect_target_unlocked(struct TCP_Server_Info *server, struct dfs_cache_tgt_list *tl,
+                                    struct dfs_cache_tgt_iterator **target_hint)
+{
+       int rc;
+       struct dfs_cache_tgt_iterator *tit;
+
+       *target_hint = NULL;
+
+       /* If dfs target list is empty, then reconnect to last server */
+       tit = dfs_cache_get_tgt_iterator(tl);
+       if (!tit)
+               return __reconnect_target_unlocked(server, server->hostname);
+
+       /* Otherwise, try every dfs target in @tl */
+       for (; tit; tit = dfs_cache_get_next_tgt(tl, tit)) {
+               rc = __reconnect_target_unlocked(server, dfs_cache_get_tgt_name(tit));
+               if (!rc) {
+                       *target_hint = tit;
+                       break;
+               }
+       }
+       return rc;
+}
+
+static int reconnect_dfs_server(struct TCP_Server_Info *server)
+{
+       int rc = 0;
+       const char *refpath = server->current_fullpath + 1;
+       struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
+       struct dfs_cache_tgt_iterator *target_hint = NULL;
+       int num_targets = 0;
+
+       /*
+        * Determine the number of dfs targets the referral path in @cifs_sb resolves to.
+        *
+        * smb2_reconnect() needs to know how long it should wait based upon the number of dfs
+        * targets (server->nr_targets).  It's also possible that the cached referral was cleared
+        * through /proc/fs/cifs/dfscache or the target list is empty due to server settings after
+        * refreshing the referral, so, in this case, default it to 1.
+        */
+       if (!dfs_cache_noreq_find(refpath, NULL, &tl))
+               num_targets = dfs_cache_get_nr_tgts(&tl);
+       if (!num_targets)
+               num_targets = 1;
+
+       if (!cifs_tcp_ses_needs_reconnect(server, num_targets))
+               return 0;
+
+       cifs_mark_tcp_ses_conns_for_reconnect(server);
+
+       do {
+               try_to_freeze();
+               mutex_lock(&server->srv_mutex);
+
+               rc = reconnect_target_unlocked(server, &tl, &target_hint);
+               if (rc) {
+                       /* Failed to reconnect socket */
+                       mutex_unlock(&server->srv_mutex);
+                       cifs_dbg(FYI, "%s: reconnect error %d\n", __func__, rc);
+                       msleep(3000);
+                       continue;
+               }
+               /*
+                * Socket was created.  Update tcp session status to CifsNeedNegotiate so that a
+                * process waiting for reconnect will know it needs to re-establish session and tcon
+                * through the reconnected target server.
+                */
+               atomic_inc(&tcpSesReconnectCount);
+               set_credits(server, 1);
+               spin_lock(&GlobalMid_Lock);
+               if (server->tcpStatus != CifsExiting)
+                       server->tcpStatus = CifsNeedNegotiate;
+               spin_unlock(&GlobalMid_Lock);
+               cifs_swn_reset_server_dstaddr(server);
+               mutex_unlock(&server->srv_mutex);
+       } while (server->tcpStatus == CifsNeedReconnect);
+
+       if (target_hint)
+               dfs_cache_noreq_update_tgthint(refpath, target_hint);
+
+       dfs_cache_free_tgts(&tl);
+
+       /* Need to set up echo worker again once connection has been established */
        if (server->tcpStatus == CifsNeedNegotiate)
                mod_delayed_work(cifsiod_wq, &server->echo, 0);
 
@@ -407,6 +437,25 @@ cifs_reconnect(struct TCP_Server_Info *server)
        return rc;
 }
 
+int cifs_reconnect(struct TCP_Server_Info *server)
+{
+       /* If tcp session is not an dfs connection, then reconnect to last target server */
+       spin_lock(&cifs_tcp_ses_lock);
+       if (!server->is_dfs_conn || !server->origin_fullpath || !server->leaf_fullpath) {
+               spin_unlock(&cifs_tcp_ses_lock);
+               return __cifs_reconnect(server);
+       }
+       spin_unlock(&cifs_tcp_ses_lock);
+
+       return reconnect_dfs_server(server);
+}
+#else
+int cifs_reconnect(struct TCP_Server_Info *server)
+{
+       return __cifs_reconnect(server);
+}
+#endif
+
 static void
 cifs_echo_request(struct work_struct *work)
 {
@@ -665,13 +714,14 @@ dequeue_mid(struct mid_q_entry *mid, bool malformed)
         * Trying to handle/dequeue a mid after the send_recv()
         * function has finished processing it is a bug.
         */
-       if (mid->mid_flags & MID_DELETED)
+       if (mid->mid_flags & MID_DELETED) {
+               spin_unlock(&GlobalMid_Lock);
                pr_warn_once("trying to dequeue a deleted mid\n");
-       else {
+       else {
                list_del_init(&mid->qhead);
                mid->mid_flags |= MID_DELETED;
+               spin_unlock(&GlobalMid_Lock);
        }
-       spin_unlock(&GlobalMid_Lock);
 }
 
 static unsigned int
@@ -794,6 +844,10 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
                 */
        }
 
+#ifdef CONFIG_CIFS_DFS_UPCALL
+       kfree(server->origin_fullpath);
+       kfree(server->leaf_fullpath);
+#endif
        kfree(server);
 
        length = atomic_dec_return(&tcpSesAllocCount);
@@ -1217,7 +1271,13 @@ static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context *
 {
        struct sockaddr *addr = (struct sockaddr *)&ctx->dstaddr;
 
-       if (ctx->nosharesock)
+       if (ctx->nosharesock) {
+               server->nosharesock = true;
+               return 0;
+       }
+
+       /* this server does not share socket */
+       if (server->nosharesock)
                return 0;
 
        /* If multidialect negotiation see if existing sessions match one */
@@ -1283,7 +1343,7 @@ cifs_find_tcp_session(struct smb3_fs_context *ctx)
                 * Skip ses channels since they're only handled in lower layers
                 * (e.g. cifs_send_recv).
                 */
-               if (server->is_channel || !match_server(server, ctx))
+               if (CIFS_SERVER_IS_CHAN(server) || !match_server(server, ctx))
                        continue;
 
                ++server->srv_count;
@@ -1314,6 +1374,10 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
        list_del_init(&server->tcp_ses_list);
        spin_unlock(&cifs_tcp_ses_lock);
 
+       /* For secondary channels, we pick up ref-count on the primary server */
+       if (CIFS_SERVER_IS_CHAN(server))
+               cifs_put_tcp_session(server->primary_server, from_reconnect);
+
        cancel_delayed_work_sync(&server->echo);
        cancel_delayed_work_sync(&server->resolve);
 
@@ -1333,7 +1397,10 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
        spin_unlock(&GlobalMid_Lock);
 
        cifs_crypto_secmech_release(server);
-       cifs_fscache_release_client_cookie(server);
+
+       /* fscache server cookies are based on primary channel only */
+       if (!CIFS_SERVER_IS_CHAN(server))
+               cifs_fscache_release_client_cookie(server);
 
        kfree(server->session_key.response);
        server->session_key.response = NULL;
@@ -1346,7 +1413,8 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
 }
 
 struct TCP_Server_Info *
-cifs_get_tcp_session(struct smb3_fs_context *ctx)
+cifs_get_tcp_session(struct smb3_fs_context *ctx,
+                    struct TCP_Server_Info *primary_server)
 {
        struct TCP_Server_Info *tcp_ses = NULL;
        int rc;
@@ -1383,6 +1451,10 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx)
        tcp_ses->in_flight = 0;
        tcp_ses->max_in_flight = 0;
        tcp_ses->credits = 1;
+       if (primary_server) {
+               ++primary_server->srv_count;
+               tcp_ses->primary_server = primary_server;
+       }
        init_waitqueue_head(&tcp_ses->response_q);
        init_waitqueue_head(&tcp_ses->request_q);
        INIT_LIST_HEAD(&tcp_ses->pending_mid_q);
@@ -1403,6 +1475,9 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx)
        INIT_DELAYED_WORK(&tcp_ses->resolve, cifs_resolve_server);
        INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server);
        mutex_init(&tcp_ses->reconnect_mutex);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+       mutex_init(&tcp_ses->refpath_lock);
+#endif
        memcpy(&tcp_ses->srcaddr, &ctx->srcaddr,
               sizeof(tcp_ses->srcaddr));
        memcpy(&tcp_ses->dstaddr, &ctx->dstaddr,
@@ -1481,7 +1556,9 @@ smbd_connected:
        list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list);
        spin_unlock(&cifs_tcp_ses_lock);
 
-       cifs_fscache_get_client_cookie(tcp_ses);
+       /* fscache server cookies are based on primary channel only */
+       if (!CIFS_SERVER_IS_CHAN(tcp_ses))
+               cifs_fscache_get_client_cookie(tcp_ses);
 
        /* queue echo request delayed work */
        queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval);
@@ -1501,6 +1578,8 @@ out_err_crypto_release:
 
 out_err:
        if (tcp_ses) {
+               if (CIFS_SERVER_IS_CHAN(tcp_ses))
+                       cifs_put_tcp_session(tcp_ses->primary_server, false);
                kfree(tcp_ses->hostname);
                if (tcp_ses->ssocket)
                        sock_release(tcp_ses->ssocket);
@@ -1519,8 +1598,12 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx)
         * If an existing session is limited to less channels than
         * requested, it should not be reused
         */
-       if (ses->chan_max < ctx->max_channels)
+       spin_lock(&ses->chan_lock);
+       if (ses->chan_max < ctx->max_channels) {
+               spin_unlock(&ses->chan_lock);
                return 0;
+       }
+       spin_unlock(&ses->chan_lock);
 
        switch (ses->sectype) {
        case Kerberos:
@@ -1655,6 +1738,7 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
 void cifs_put_smb_ses(struct cifs_ses *ses)
 {
        unsigned int rc, xid;
+       unsigned int chan_count;
        struct TCP_Server_Info *server = ses->server;
        cifs_dbg(FYI, "%s: ses_count=%d\n", __func__, ses->ses_count);
 
@@ -1696,12 +1780,24 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
        list_del_init(&ses->smb_ses_list);
        spin_unlock(&cifs_tcp_ses_lock);
 
+       spin_lock(&ses->chan_lock);
+       chan_count = ses->chan_count;
+       spin_unlock(&ses->chan_lock);
+
        /* close any extra channels */
-       if (ses->chan_count > 1) {
+       if (chan_count > 1) {
                int i;
 
-               for (i = 1; i < ses->chan_count; i++)
+               for (i = 1; i < chan_count; i++) {
+                       /*
+                        * note: for now, we're okay accessing ses->chans
+                        * without chan_lock. But when chans can go away, we'll
+                        * need to introduce ref counting to make sure that chan
+                        * is not freed from under us.
+                        */
                        cifs_put_tcp_session(ses->chans[i].server, 0);
+                       ses->chans[i].server = NULL;
+               }
        }
 
        sesInfoFree(ses);
@@ -1885,16 +1981,18 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
                         ses->status);
 
                mutex_lock(&ses->session_mutex);
-               rc = cifs_negotiate_protocol(xid, ses);
-               if (rc) {
-                       mutex_unlock(&ses->session_mutex);
-                       /* problem -- put our ses reference */
-                       cifs_put_smb_ses(ses);
-                       free_xid(xid);
-                       return ERR_PTR(rc);
-               }
                if (ses->need_reconnect) {
                        cifs_dbg(FYI, "Session needs reconnect\n");
+
+                       rc = cifs_negotiate_protocol(xid, ses);
+                       if (rc) {
+                               mutex_unlock(&ses->session_mutex);
+                               /* problem -- put our ses reference */
+                               cifs_put_smb_ses(ses);
+                               free_xid(xid);
+                               return ERR_PTR(rc);
+                       }
+
                        rc = cifs_setup_session(xid, ses,
                                                ctx->local_nls);
                        if (rc) {
@@ -1942,6 +2040,12 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
                if (!ses->domainName)
                        goto get_ses_fail;
        }
+       if (ctx->workstation_name) {
+               ses->workstation_name = kstrdup(ctx->workstation_name,
+                                               GFP_KERNEL);
+               if (!ses->workstation_name)
+                       goto get_ses_fail;
+       }
        if (ctx->domainauto)
                ses->domainAuto = ctx->domainauto;
        ses->cred_uid = ctx->cred_uid;
@@ -1952,9 +2056,11 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
        mutex_lock(&ses->session_mutex);
 
        /* add server as first channel */
+       spin_lock(&ses->chan_lock);
        ses->chans[0].server = server;
        ses->chan_count = 1;
        ses->chan_max = ctx->multichannel ? ctx->max_channels:1;
+       spin_unlock(&ses->chan_lock);
 
        rc = cifs_negotiate_protocol(xid, ses);
        if (!rc)
@@ -2286,8 +2392,6 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
        list_add(&tcon->tcon_list, &ses->tcon_list);
        spin_unlock(&cifs_tcp_ses_lock);
 
-       cifs_fscache_get_super_cookie(tcon);
-
        return tcon;
 
 out_fail:
@@ -2849,73 +2953,64 @@ int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb)
 }
 
 /* Release all succeed connections */
-static inline void mount_put_conns(struct cifs_sb_info *cifs_sb,
-                                  unsigned int xid,
-                                  struct TCP_Server_Info *server,
-                                  struct cifs_ses *ses, struct cifs_tcon *tcon)
+static inline void mount_put_conns(struct mount_ctx *mnt_ctx)
 {
        int rc = 0;
 
-       if (tcon)
-               cifs_put_tcon(tcon);
-       else if (ses)
-               cifs_put_smb_ses(ses);
-       else if (server)
-               cifs_put_tcp_session(server, 0);
-       cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
-       free_xid(xid);
+       if (mnt_ctx->tcon)
+               cifs_put_tcon(mnt_ctx->tcon);
+       else if (mnt_ctx->ses)
+               cifs_put_smb_ses(mnt_ctx->ses);
+       else if (mnt_ctx->server)
+               cifs_put_tcp_session(mnt_ctx->server, 0);
+       mnt_ctx->cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS;
+       free_xid(mnt_ctx->xid);
 }
 
 /* Get connections for tcp, ses and tcon */
-static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb,
-                          unsigned int *xid,
-                          struct TCP_Server_Info **nserver,
-                          struct cifs_ses **nses, struct cifs_tcon **ntcon)
+static int mount_get_conns(struct mount_ctx *mnt_ctx)
 {
        int rc = 0;
-       struct TCP_Server_Info *server;
-       struct cifs_ses *ses;
-       struct cifs_tcon *tcon;
-
-       *nserver = NULL;
-       *nses = NULL;
-       *ntcon = NULL;
+       struct TCP_Server_Info *server = NULL;
+       struct cifs_ses *ses = NULL;
+       struct cifs_tcon *tcon = NULL;
+       struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+       struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+       unsigned int xid;
 
-       *xid = get_xid();
+       xid = get_xid();
 
        /* get a reference to a tcp session */
-       server = cifs_get_tcp_session(ctx);
+       server = cifs_get_tcp_session(ctx, NULL);
        if (IS_ERR(server)) {
                rc = PTR_ERR(server);
-               return rc;
+               server = NULL;
+               goto out;
        }
 
-       *nserver = server;
-
        /* get a reference to a SMB session */
        ses = cifs_get_smb_ses(server, ctx);
        if (IS_ERR(ses)) {
                rc = PTR_ERR(ses);
-               return rc;
+               ses = NULL;
+               goto out;
        }
 
-       *nses = ses;
-
        if ((ctx->persistent == true) && (!(ses->server->capabilities &
                                            SMB2_GLOBAL_CAP_PERSISTENT_HANDLES))) {
                cifs_server_dbg(VFS, "persistent handles not supported by server\n");
-               return -EOPNOTSUPP;
+               rc = -EOPNOTSUPP;
+               goto out;
        }
 
        /* search for existing tcon to this server share */
        tcon = cifs_get_tcon(ses, ctx);
        if (IS_ERR(tcon)) {
                rc = PTR_ERR(tcon);
-               return rc;
+               tcon = NULL;
+               goto out;
        }
 
-       *ntcon = tcon;
-
        /* if new SMB3.11 POSIX extensions are supported do not remap / and \ */
        if (tcon->posix_extensions)
                cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_POSIX_PATHS;
@@ -2926,17 +3021,19 @@ static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cif
                 * reset of caps checks mount to see if unix extensions disabled
                 * for just this mount.
                 */
-               reset_cifs_unix_caps(*xid, tcon, cifs_sb, ctx);
+               reset_cifs_unix_caps(xid, tcon, cifs_sb, ctx);
                if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) &&
                    (le64_to_cpu(tcon->fsUnixInfo.Capability) &
-                    CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP))
-                       return -EACCES;
+                    CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP)) {
+                       rc = -EACCES;
+                       goto out;
+               }
        } else
                tcon->unix_ext = 0; /* server does not support them */
 
        /* do not care if a following call succeed - informational */
        if (!tcon->pipe && server->ops->qfs_tcon) {
-               server->ops->qfs_tcon(*xid, tcon, cifs_sb);
+               server->ops->qfs_tcon(xid, tcon, cifs_sb);
                if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) {
                        if (tcon->fsDevInfo.DeviceCharacteristics &
                            cpu_to_le32(FILE_READ_ONLY_DEVICE))
@@ -2946,6 +3043,12 @@ static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cif
                                cifs_dbg(VFS, "read only mount of RW share\n");
                        /* no need to log a RW mount of a typical RW share */
                }
+               /*
+                * The cookie is initialized from volume info returned above.
+                * Inside cifs_fscache_get_super_cookie it checks
+                * that we do not get super cookie twice.
+                */
+               cifs_fscache_get_super_cookie(tcon);
        }
 
        /*
@@ -2960,7 +3063,13 @@ static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cif
            (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx)))
                cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx);
 
-       return 0;
+out:
+       mnt_ctx->server = server;
+       mnt_ctx->ses = ses;
+       mnt_ctx->tcon = tcon;
+       mnt_ctx->xid = xid;
+
+       return rc;
 }
 
 static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
@@ -2990,18 +3099,17 @@ static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
 }
 
 #ifdef CONFIG_CIFS_DFS_UPCALL
-static int mount_get_dfs_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb,
-                              unsigned int *xid, struct TCP_Server_Info **nserver,
-                              struct cifs_ses **nses, struct cifs_tcon **ntcon)
+/* Get unique dfs connections */
+static int mount_get_dfs_conns(struct mount_ctx *mnt_ctx)
 {
        int rc;
 
-       ctx->nosharesock = true;
-       rc = mount_get_conns(ctx, cifs_sb, xid, nserver, nses, ntcon);
-       if (*nserver) {
+       mnt_ctx->fs_ctx->nosharesock = true;
+       rc = mount_get_conns(mnt_ctx);
+       if (mnt_ctx->server) {
                cifs_dbg(FYI, "%s: marking tcp session as a dfs connection\n", __func__);
                spin_lock(&cifs_tcp_ses_lock);
-               (*nserver)->is_dfs_conn = true;
+               mnt_ctx->server->is_dfs_conn = true;
                spin_unlock(&cifs_tcp_ses_lock);
        }
        return rc;
@@ -3043,190 +3151,38 @@ build_unc_path_to_root(const struct smb3_fs_context *ctx,
 }
 
 /*
- * expand_dfs_referral - Perform a dfs referral query and update the cifs_sb
- *
- * If a referral is found, cifs_sb->ctx->mount_options will be (re-)allocated
- * to a string containing updated options for the submount.  Otherwise it
- * will be left untouched.
+ * expand_dfs_referral - Update cifs_sb from dfs referral path
  *
- * Returns the rc from get_dfs_path to the caller, which can be used to
- * determine whether there were referrals.
+ * cifs_sb->ctx->mount_options will be (re-)allocated to a string containing updated options for the
+ * submount.  Otherwise it will be left untouched.
  */
-static int
-expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses,
-                   struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb,
-                   char *ref_path)
+static int expand_dfs_referral(struct mount_ctx *mnt_ctx, const char *full_path,
+                              struct dfs_info3_param *referral)
 {
        int rc;
-       struct dfs_info3_param referral = {0};
-       char *full_path = NULL, *mdata = NULL;
-
-       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
-               return -EREMOTE;
-
-       full_path = build_unc_path_to_root(ctx, cifs_sb, true);
-       if (IS_ERR(full_path))
-               return PTR_ERR(full_path);
-
-       rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
-                           ref_path, &referral, NULL);
-       if (!rc) {
-               char *fake_devname = NULL;
-
-               mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options,
-                                                  full_path + 1, &referral,
-                                                  &fake_devname);
-               free_dfs_info_param(&referral);
-
-               if (IS_ERR(mdata)) {
-                       rc = PTR_ERR(mdata);
-                       mdata = NULL;
-               } else {
-                       /*
-                        * We can not clear out the whole structure since we
-                        * no longer have an explicit function to parse
-                        * a mount-string. Instead we need to clear out the
-                        * individual fields that are no longer valid.
-                        */
-                       kfree(ctx->prepath);
-                       ctx->prepath = NULL;
-                       rc = cifs_setup_volume_info(ctx, mdata, fake_devname);
-               }
-               kfree(fake_devname);
-               kfree(cifs_sb->ctx->mount_options);
-               cifs_sb->ctx->mount_options = mdata;
-       }
-       kfree(full_path);
-       return rc;
-}
-
-static int get_next_dfs_tgt(struct dfs_cache_tgt_list *tgt_list,
-                           struct dfs_cache_tgt_iterator **tgt_it)
-{
-       if (!*tgt_it)
-               *tgt_it = dfs_cache_get_tgt_iterator(tgt_list);
-       else
-               *tgt_it = dfs_cache_get_next_tgt(tgt_list, *tgt_it);
-       return !*tgt_it ? -EHOSTDOWN : 0;
-}
-
-static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it,
-                          struct smb3_fs_context *fake_ctx, struct smb3_fs_context *ctx)
-{
-       const char *tgt = dfs_cache_get_tgt_name(tgt_it);
-       int len = strlen(tgt) + 2;
-       char *new_unc;
-
-       new_unc = kmalloc(len, GFP_KERNEL);
-       if (!new_unc)
-               return -ENOMEM;
-       scnprintf(new_unc, len, "\\%s", tgt);
-
-       kfree(ctx->UNC);
-       ctx->UNC = new_unc;
-
-       if (fake_ctx->prepath) {
-               kfree(ctx->prepath);
-               ctx->prepath = fake_ctx->prepath;
-               fake_ctx->prepath = NULL;
-       }
-       memcpy(&ctx->dstaddr, &fake_ctx->dstaddr, sizeof(ctx->dstaddr));
-
-       return 0;
-}
-
-static int do_dfs_failover(const char *path, const char *full_path, struct cifs_sb_info *cifs_sb,
-                          struct smb3_fs_context *ctx, struct cifs_ses *root_ses,
-                          unsigned int *xid, struct TCP_Server_Info **server,
-                          struct cifs_ses **ses, struct cifs_tcon **tcon)
-{
-       int rc;
-       char *npath = NULL;
-       struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list);
-       struct dfs_cache_tgt_iterator *tgt_it = NULL;
-       struct smb3_fs_context tmp_ctx = {NULL};
-
-       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
-               return -EOPNOTSUPP;
-
-       npath = dfs_cache_canonical_path(path, cifs_sb->local_nls, cifs_remap(cifs_sb));
-       if (IS_ERR(npath))
-               return PTR_ERR(npath);
-
-       cifs_dbg(FYI, "%s: path=%s full_path=%s\n", __func__, npath, full_path);
-
-       rc = dfs_cache_noreq_find(npath, NULL, &tgt_list);
-       if (rc)
-               goto out;
-       /*
-        * We use a 'tmp_ctx' here because we need pass it down to the mount_{get,put} functions to
-        * test connection against new DFS targets.
-        */
-       rc = smb3_fs_context_dup(&tmp_ctx, ctx);
-       if (rc)
-               goto out;
-
-       for (;;) {
-               struct dfs_info3_param ref = {0};
-               char *fake_devname = NULL, *mdata = NULL;
-
-               /* Get next DFS target server - if any */
-               rc = get_next_dfs_tgt(&tgt_list, &tgt_it);
-               if (rc)
-                       break;
-
-               rc = dfs_cache_get_tgt_referral(npath, tgt_it, &ref);
-               if (rc)
-                       break;
-
-               cifs_dbg(FYI, "%s: old ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
-                        tmp_ctx.prepath);
-
-               mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, full_path + 1, &ref,
-                                                  &fake_devname);
-               free_dfs_info_param(&ref);
-
-               if (IS_ERR(mdata)) {
-                       rc = PTR_ERR(mdata);
-                       mdata = NULL;
-               } else
-                       rc = cifs_setup_volume_info(&tmp_ctx, mdata, fake_devname);
-
-               kfree(mdata);
-               kfree(fake_devname);
-
-               if (rc)
-                       break;
-
-               cifs_dbg(FYI, "%s: new ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
-                        tmp_ctx.prepath);
-
-               mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon);
-               rc = mount_get_dfs_conns(&tmp_ctx, cifs_sb, xid, server, ses, tcon);
-               if (!rc || (*server && *ses)) {
-                       /*
-                        * We were able to connect to new target server. Update current context with
-                        * new target server.
-                        */
-                       rc = update_vol_info(tgt_it, &tmp_ctx, ctx);
-                       break;
-               }
-       }
-       if (!rc) {
-               cifs_dbg(FYI, "%s: final ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
-                        tmp_ctx.prepath);
+       struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+       struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+       char *fake_devname = NULL, *mdata = NULL;
+
+       mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, full_path + 1, referral,
+                                          &fake_devname);
+       if (IS_ERR(mdata)) {
+               rc = PTR_ERR(mdata);
+               mdata = NULL;
+       } else {
                /*
-                * Update DFS target hint in DFS referral cache with the target server we
-                * successfully reconnected to.
+                * We can not clear out the whole structure since we no longer have an explicit
+                * function to parse a mount-string. Instead we need to clear out the individual
+                * fields that are no longer valid.
                 */
-               rc = dfs_cache_update_tgthint(*xid, root_ses ? root_ses : *ses, cifs_sb->local_nls,
-                                             cifs_remap(cifs_sb), path, tgt_it);
+               kfree(ctx->prepath);
+               ctx->prepath = NULL;
+               rc = cifs_setup_volume_info(ctx, mdata, fake_devname);
        }
+       kfree(fake_devname);
+       kfree(cifs_sb->ctx->mount_options);
+       cifs_sb->ctx->mount_options = mdata;
 
-out:
-       kfree(npath);
-       smb3_cleanup_fs_context_contents(&tmp_ctx);
-       dfs_cache_free_tgts(&tgt_list);
        return rc;
 }
 #endif
@@ -3333,12 +3289,14 @@ cifs_are_all_path_components_accessible(struct TCP_Server_Info *server,
  * Check if path is remote (e.g. a DFS share). Return -EREMOTE if it is,
  * otherwise 0.
  */
-static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
-                         const unsigned int xid,
-                         struct TCP_Server_Info *server,
-                         struct cifs_tcon *tcon)
+static int is_path_remote(struct mount_ctx *mnt_ctx)
 {
        int rc;
+       struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+       struct TCP_Server_Info *server = mnt_ctx->server;
+       unsigned int xid = mnt_ctx->xid;
+       struct cifs_tcon *tcon = mnt_ctx->tcon;
+       struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
        char *full_path;
 
        if (!server->ops->is_path_accessible)
@@ -3376,280 +3334,289 @@ static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *
 }
 
 #ifdef CONFIG_CIFS_DFS_UPCALL
-static void set_root_ses(struct cifs_sb_info *cifs_sb, const uuid_t *mount_id, struct cifs_ses *ses,
-                        struct cifs_ses **root_ses)
+static void set_root_ses(struct mount_ctx *mnt_ctx)
 {
-       if (ses) {
+       if (mnt_ctx->ses) {
                spin_lock(&cifs_tcp_ses_lock);
-               ses->ses_count++;
+               mnt_ctx->ses->ses_count++;
                spin_unlock(&cifs_tcp_ses_lock);
-               dfs_cache_add_refsrv_session(mount_id, ses);
+               dfs_cache_add_refsrv_session(&mnt_ctx->mount_id, mnt_ctx->ses);
        }
-       *root_ses = ses;
+       mnt_ctx->root_ses = mnt_ctx->ses;
 }
 
-/* Set up next dfs prefix path in @dfs_path */
-static int next_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
-                           const unsigned int xid, struct TCP_Server_Info *server,
-                           struct cifs_tcon *tcon, char **dfs_path)
+static int is_dfs_mount(struct mount_ctx *mnt_ctx, bool *isdfs, struct dfs_cache_tgt_list *root_tl)
 {
-       char *path, *npath;
-       int added_treename = is_tcon_dfs(tcon);
        int rc;
+       struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+       struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
 
-       path = cifs_build_path_to_root(ctx, cifs_sb, tcon, added_treename);
-       if (!path)
-               return -ENOMEM;
+       *isdfs = true;
 
-       rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
-       if (rc == -EREMOTE) {
-               struct smb3_fs_context v = {NULL};
-               /* if @path contains a tree name, skip it in the prefix path */
-               if (added_treename) {
-                       rc = smb3_parse_devname(path, &v);
-                       if (rc)
-                               goto out;
-                       npath = build_unc_path_to_root(&v, cifs_sb, true);
-                       smb3_cleanup_fs_context_contents(&v);
-               } else {
-                       v.UNC = ctx->UNC;
-                       v.prepath = path + 1;
-                       npath = build_unc_path_to_root(&v, cifs_sb, true);
-               }
+       rc = mount_get_conns(mnt_ctx);
+       /*
+        * If called with 'nodfs' mount option, then skip DFS resolving.  Otherwise unconditionally
+        * try to get an DFS referral (even cached) to determine whether it is an DFS mount.
+        *
+        * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem
+        * to respond with PATH_NOT_COVERED to requests that include the prefix.
+        */
+       if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) ||
+           dfs_cache_find(mnt_ctx->xid, mnt_ctx->ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
+                          ctx->UNC + 1, NULL, root_tl)) {
+               if (rc)
+                       return rc;
+               /* Check if it is fully accessible and then mount it */
+               rc = is_path_remote(mnt_ctx);
+               if (!rc)
+                       *isdfs = false;
+               else if (rc != -EREMOTE)
+                       return rc;
+       }
+       return 0;
+}
 
-               if (IS_ERR(npath)) {
-                       rc = PTR_ERR(npath);
-                       goto out;
-               }
+static int connect_dfs_target(struct mount_ctx *mnt_ctx, const char *full_path,
+                             const char *ref_path, struct dfs_cache_tgt_iterator *tit)
+{
+       int rc;
+       struct dfs_info3_param ref = {};
+       struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+       char *oldmnt = cifs_sb->ctx->mount_options;
+
+       rc = dfs_cache_get_tgt_referral(ref_path, tit, &ref);
+       if (rc)
+               goto out;
+
+       rc = expand_dfs_referral(mnt_ctx, full_path, &ref);
+       if (rc)
+               goto out;
 
-               kfree(*dfs_path);
-               *dfs_path = npath;
-               rc = -EREMOTE;
+       /* Connect to new target only if we were redirected (e.g. mount options changed) */
+       if (oldmnt != cifs_sb->ctx->mount_options) {
+               mount_put_conns(mnt_ctx);
+               rc = mount_get_dfs_conns(mnt_ctx);
+       }
+       if (!rc) {
+               if (cifs_is_referral_server(mnt_ctx->tcon, &ref))
+                       set_root_ses(mnt_ctx);
+               rc = dfs_cache_update_tgthint(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls,
+                                             cifs_remap(cifs_sb), ref_path, tit);
        }
 
 out:
-       kfree(path);
+       free_dfs_info_param(&ref);
        return rc;
 }
 
-/* Check if resolved targets can handle any DFS referrals */
-static int is_referral_server(const char *ref_path, struct cifs_sb_info *cifs_sb,
-                             struct cifs_tcon *tcon, bool *ref_server)
+static int connect_dfs_root(struct mount_ctx *mnt_ctx, struct dfs_cache_tgt_list *root_tl)
 {
        int rc;
-       struct dfs_info3_param ref = {0};
+       char *full_path;
+       struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+       struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+       struct dfs_cache_tgt_iterator *tit;
 
-       cifs_dbg(FYI, "%s: ref_path=%s\n", __func__, ref_path);
+       /* Put initial connections as they might be shared with other mounts.  We need unique dfs
+        * connections per mount to properly failover, so mount_get_dfs_conns() must be used from
+        * now on.
+        */
+       mount_put_conns(mnt_ctx);
+       mount_get_dfs_conns(mnt_ctx);
 
-       if (is_tcon_dfs(tcon)) {
-               *ref_server = true;
-       } else {
-               char *npath;
+       full_path = build_unc_path_to_root(ctx, cifs_sb, true);
+       if (IS_ERR(full_path))
+               return PTR_ERR(full_path);
 
-               npath = dfs_cache_canonical_path(ref_path, cifs_sb->local_nls, cifs_remap(cifs_sb));
-               if (IS_ERR(npath))
-                       return PTR_ERR(npath);
+       mnt_ctx->origin_fullpath = dfs_cache_canonical_path(ctx->UNC, cifs_sb->local_nls,
+                                                           cifs_remap(cifs_sb));
+       if (IS_ERR(mnt_ctx->origin_fullpath)) {
+               rc = PTR_ERR(mnt_ctx->origin_fullpath);
+               mnt_ctx->origin_fullpath = NULL;
+               goto out;
+       }
 
-               rc = dfs_cache_noreq_find(npath, &ref, NULL);
-               kfree(npath);
-               if (rc) {
-                       cifs_dbg(VFS, "%s: dfs_cache_noreq_find: failed (rc=%d)\n", __func__, rc);
-                       return rc;
+       /* Try all dfs root targets */
+       for (rc = -ENOENT, tit = dfs_cache_get_tgt_iterator(root_tl);
+            tit; tit = dfs_cache_get_next_tgt(root_tl, tit)) {
+               rc = connect_dfs_target(mnt_ctx, full_path, mnt_ctx->origin_fullpath + 1, tit);
+               if (!rc) {
+                       mnt_ctx->leaf_fullpath = kstrdup(mnt_ctx->origin_fullpath, GFP_KERNEL);
+                       if (!mnt_ctx->leaf_fullpath)
+                               rc = -ENOMEM;
+                       break;
                }
-               cifs_dbg(FYI, "%s: ref.flags=0x%x\n", __func__, ref.flags);
-               /*
-                * Check if all targets are capable of handling DFS referrals as per
-                * MS-DFSC 2.2.4 RESP_GET_DFS_REFERRAL.
-                */
-               *ref_server = !!(ref.flags & DFSREF_REFERRAL_SERVER);
-               free_dfs_info_param(&ref);
        }
-       return 0;
+
+out:
+       kfree(full_path);
+       return rc;
 }
 
-int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
+static int __follow_dfs_link(struct mount_ctx *mnt_ctx)
 {
-       int rc = 0;
-       unsigned int xid;
-       struct TCP_Server_Info *server = NULL;
-       struct cifs_ses *ses = NULL, *root_ses = NULL;
-       struct cifs_tcon *tcon = NULL;
-       int count = 0;
-       uuid_t mount_id = {0};
-       char *ref_path = NULL, *full_path = NULL;
-       char *oldmnt = NULL;
-       bool ref_server = false;
+       int rc;
+       struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+       struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+       char *full_path;
+       struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
+       struct dfs_cache_tgt_iterator *tit;
 
-       rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
-       /*
-        * If called with 'nodfs' mount option, then skip DFS resolving.  Otherwise unconditionally
-        * try to get an DFS referral (even cached) to determine whether it is an DFS mount.
-        *
-        * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem
-        * to respond with PATH_NOT_COVERED to requests that include the prefix.
-        */
-       if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) ||
-           dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL,
-                          NULL)) {
-               if (rc)
-                       goto error;
-               /* Check if it is fully accessible and then mount it */
-               rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
-               if (!rc)
-                       goto out;
-               if (rc != -EREMOTE)
-                       goto error;
+       full_path = build_unc_path_to_root(ctx, cifs_sb, true);
+       if (IS_ERR(full_path))
+               return PTR_ERR(full_path);
+
+       kfree(mnt_ctx->leaf_fullpath);
+       mnt_ctx->leaf_fullpath = dfs_cache_canonical_path(full_path, cifs_sb->local_nls,
+                                                         cifs_remap(cifs_sb));
+       if (IS_ERR(mnt_ctx->leaf_fullpath)) {
+               rc = PTR_ERR(mnt_ctx->leaf_fullpath);
+               mnt_ctx->leaf_fullpath = NULL;
+               goto out;
        }
 
-       mount_put_conns(cifs_sb, xid, server, ses, tcon);
-       /*
-        * Ignore error check here because we may failover to other targets from cached a
-        * referral.
-        */
-       (void)mount_get_dfs_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
+       /* Get referral from dfs link */
+       rc = dfs_cache_find(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls,
+                           cifs_remap(cifs_sb), mnt_ctx->leaf_fullpath + 1, NULL, &tl);
+       if (rc)
+               goto out;
 
-       /* Get path of DFS root */
-       ref_path = build_unc_path_to_root(ctx, cifs_sb, false);
-       if (IS_ERR(ref_path)) {
-               rc = PTR_ERR(ref_path);
-               ref_path = NULL;
-               goto error;
+       /* Try all dfs link targets */
+       for (rc = -ENOENT, tit = dfs_cache_get_tgt_iterator(&tl);
+            tit; tit = dfs_cache_get_next_tgt(&tl, tit)) {
+               rc = connect_dfs_target(mnt_ctx, full_path, mnt_ctx->leaf_fullpath + 1, tit);
+               if (!rc) {
+                       rc = is_path_remote(mnt_ctx);
+                       break;
+               }
+       }
+
+out:
+       kfree(full_path);
+       dfs_cache_free_tgts(&tl);
+       return rc;
+}
+
+static int follow_dfs_link(struct mount_ctx *mnt_ctx)
+{
+       int rc;
+       struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
+       struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+       char *full_path;
+       int num_links = 0;
+
+       full_path = build_unc_path_to_root(ctx, cifs_sb, true);
+       if (IS_ERR(full_path))
+               return PTR_ERR(full_path);
+
+       kfree(mnt_ctx->origin_fullpath);
+       mnt_ctx->origin_fullpath = dfs_cache_canonical_path(full_path, cifs_sb->local_nls,
+                                                           cifs_remap(cifs_sb));
+       kfree(full_path);
+
+       if (IS_ERR(mnt_ctx->origin_fullpath)) {
+               rc = PTR_ERR(mnt_ctx->origin_fullpath);
+               mnt_ctx->origin_fullpath = NULL;
+               return rc;
        }
 
-       uuid_gen(&mount_id);
-       set_root_ses(cifs_sb, &mount_id, ses, &root_ses);
        do {
-               /* Save full path of last DFS path we used to resolve final target server */
-               kfree(full_path);
-               full_path = build_unc_path_to_root(ctx, cifs_sb, !!count);
-               if (IS_ERR(full_path)) {
-                       rc = PTR_ERR(full_path);
-                       full_path = NULL;
-                       break;
-               }
-               /* Chase referral */
-               oldmnt = cifs_sb->ctx->mount_options;
-               rc = expand_dfs_referral(xid, root_ses, ctx, cifs_sb, ref_path + 1);
-               if (rc)
-                       break;
-               /* Connect to new DFS target only if we were redirected */
-               if (oldmnt != cifs_sb->ctx->mount_options) {
-                       mount_put_conns(cifs_sb, xid, server, ses, tcon);
-                       rc = mount_get_dfs_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
-               }
-               if (rc && !server && !ses) {
-                       /* Failed to connect. Try to connect to other targets in the referral. */
-                       rc = do_dfs_failover(ref_path + 1, full_path, cifs_sb, ctx, root_ses, &xid,
-                                            &server, &ses, &tcon);
-               }
-               if (rc == -EACCES || rc == -EOPNOTSUPP || !server || !ses)
+               rc = __follow_dfs_link(mnt_ctx);
+               if (!rc || rc != -EREMOTE)
                        break;
-               if (!tcon)
-                       continue;
+       } while (rc = -ELOOP, ++num_links < MAX_NESTED_LINKS);
 
-               /* Make sure that requests go through new root servers */
-               rc = is_referral_server(ref_path + 1, cifs_sb, tcon, &ref_server);
-               if (rc)
-                       break;
-               if (ref_server)
-                       set_root_ses(cifs_sb, &mount_id, ses, &root_ses);
+       return rc;
+}
+
+/* Set up DFS referral paths for failover */
+static void setup_server_referral_paths(struct mount_ctx *mnt_ctx)
+{
+       struct TCP_Server_Info *server = mnt_ctx->server;
 
-               /* Get next dfs path and then continue chasing them if -EREMOTE */
-               rc = next_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path);
-               /* Prevent recursion on broken link referrals */
-               if (rc == -EREMOTE && ++count > MAX_NESTED_LINKS)
-                       rc = -ELOOP;
-       } while (rc == -EREMOTE);
+       server->origin_fullpath = mnt_ctx->origin_fullpath;
+       server->leaf_fullpath = mnt_ctx->leaf_fullpath;
+       server->current_fullpath = mnt_ctx->leaf_fullpath;
+       mnt_ctx->origin_fullpath = mnt_ctx->leaf_fullpath = NULL;
+}
 
-       if (rc || !tcon || !ses)
+int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
+{
+       int rc;
+       struct mount_ctx mnt_ctx = { .cifs_sb = cifs_sb, .fs_ctx = ctx, };
+       struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
+       bool isdfs;
+
+       rc = is_dfs_mount(&mnt_ctx, &isdfs, &tl);
+       if (rc)
                goto error;
+       if (!isdfs)
+               goto out;
 
-       kfree(ref_path);
-       /*
-        * Store DFS full path in both superblock and tree connect structures.
-        *
-        * For DFS root mounts, the prefix path (cifs_sb->prepath) is preserved during reconnect so
-        * only the root path is set in cifs_sb->origin_fullpath and tcon->dfs_path. And for DFS
-        * links, the prefix path is included in both and may be changed during reconnect.  See
-        * cifs_tree_connect().
-        */
-       ref_path = dfs_cache_canonical_path(full_path, cifs_sb->local_nls, cifs_remap(cifs_sb));
-       kfree(full_path);
-       full_path = NULL;
+       uuid_gen(&mnt_ctx.mount_id);
+       rc = connect_dfs_root(&mnt_ctx, &tl);
+       dfs_cache_free_tgts(&tl);
 
-       if (IS_ERR(ref_path)) {
-               rc = PTR_ERR(ref_path);
-               ref_path = NULL;
+       if (rc)
                goto error;
-       }
-       cifs_sb->origin_fullpath = ref_path;
 
-       ref_path = kstrdup(cifs_sb->origin_fullpath, GFP_KERNEL);
-       if (!ref_path) {
-               rc = -ENOMEM;
+       rc = is_path_remote(&mnt_ctx);
+       if (rc == -EREMOTE)
+               rc = follow_dfs_link(&mnt_ctx);
+       if (rc)
                goto error;
-       }
-       spin_lock(&cifs_tcp_ses_lock);
-       tcon->dfs_path = ref_path;
-       ref_path = NULL;
-       spin_unlock(&cifs_tcp_ses_lock);
 
+       setup_server_referral_paths(&mnt_ctx);
        /*
-        * After reconnecting to a different server, unique ids won't
-        * match anymore, so we disable serverino. This prevents
-        * dentry revalidation to think the dentry are stale (ESTALE).
+        * After reconnecting to a different server, unique ids won't match anymore, so we disable
+        * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE).
         */
        cifs_autodisable_serverino(cifs_sb);
        /*
-        * Force the use of prefix path to support failover on DFS paths that
-        * resolve to targets that have different prefix paths.
+        * Force the use of prefix path to support failover on DFS paths that resolve to targets
+        * that have different prefix paths.
         */
        cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
        kfree(cifs_sb->prepath);
        cifs_sb->prepath = ctx->prepath;
        ctx->prepath = NULL;
-       uuid_copy(&cifs_sb->dfs_mount_id, &mount_id);
+       uuid_copy(&cifs_sb->dfs_mount_id, &mnt_ctx.mount_id);
 
 out:
-       free_xid(xid);
-       cifs_try_adding_channels(cifs_sb, ses);
-       return mount_setup_tlink(cifs_sb, ses, tcon);
+       free_xid(mnt_ctx.xid);
+       cifs_try_adding_channels(cifs_sb, mnt_ctx.ses);
+       return mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon);
 
 error:
-       kfree(ref_path);
-       kfree(full_path);
-       kfree(cifs_sb->origin_fullpath);
-       dfs_cache_put_refsrv_sessions(&mount_id);
-       mount_put_conns(cifs_sb, xid, server, ses, tcon);
+       dfs_cache_put_refsrv_sessions(&mnt_ctx.mount_id);
+       kfree(mnt_ctx.origin_fullpath);
+       kfree(mnt_ctx.leaf_fullpath);
+       mount_put_conns(&mnt_ctx);
        return rc;
 }
 #else
 int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 {
        int rc = 0;
-       unsigned int xid;
-       struct cifs_ses *ses;
-       struct cifs_tcon *tcon;
-       struct TCP_Server_Info *server;
+       struct mount_ctx mnt_ctx = { .cifs_sb = cifs_sb, .fs_ctx = ctx, };
 
-       rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
+       rc = mount_get_conns(&mnt_ctx);
        if (rc)
                goto error;
 
-       if (tcon) {
-               rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
+       if (mnt_ctx.tcon) {
+               rc = is_path_remote(&mnt_ctx);
                if (rc == -EREMOTE)
                        rc = -EOPNOTSUPP;
                if (rc)
                        goto error;
        }
 
-       free_xid(xid);
-
-       return mount_setup_tlink(cifs_sb, ses, tcon);
+       free_xid(mnt_ctx.xid);
+       return mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon);
 
 error:
-       mount_put_conns(cifs_sb, xid, server, ses, tcon);
+       mount_put_conns(&mnt_ctx);
        return rc;
 }
 #endif
@@ -3818,7 +3785,6 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
        kfree(cifs_sb->prepath);
 #ifdef CONFIG_CIFS_DFS_UPCALL
        dfs_cache_put_refsrv_sessions(&cifs_sb->dfs_mount_id);
-       kfree(cifs_sb->origin_fullpath);
 #endif
        call_rcu(&cifs_sb->rcu, delayed_free);
 }
@@ -4145,104 +4111,246 @@ cifs_prune_tlinks(struct work_struct *work)
 }
 
 #ifdef CONFIG_CIFS_DFS_UPCALL
-int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc)
+static void mark_tcon_tcp_ses_for_reconnect(struct cifs_tcon *tcon)
+{
+       int i;
+
+       for (i = 0; i < tcon->ses->chan_count; i++) {
+               spin_lock(&GlobalMid_Lock);
+               if (tcon->ses->chans[i].server->tcpStatus != CifsExiting)
+                       tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect;
+               spin_unlock(&GlobalMid_Lock);
+       }
+}
+
+/* Update dfs referral path of superblock */
+static int update_server_fullpath(struct TCP_Server_Info *server, struct cifs_sb_info *cifs_sb,
+                                 const char *target)
+{
+       int rc = 0;
+       size_t len = strlen(target);
+       char *refpath, *npath;
+
+       if (unlikely(len < 2 || *target != '\\'))
+               return -EINVAL;
+
+       if (target[1] == '\\') {
+               len += 1;
+               refpath = kmalloc(len, GFP_KERNEL);
+               if (!refpath)
+                       return -ENOMEM;
+
+               scnprintf(refpath, len, "%s", target);
+       } else {
+               len += sizeof("\\");
+               refpath = kmalloc(len, GFP_KERNEL);
+               if (!refpath)
+                       return -ENOMEM;
+
+               scnprintf(refpath, len, "\\%s", target);
+       }
+
+       npath = dfs_cache_canonical_path(refpath, cifs_sb->local_nls, cifs_remap(cifs_sb));
+       kfree(refpath);
+
+       if (IS_ERR(npath)) {
+               rc = PTR_ERR(npath);
+       } else {
+               mutex_lock(&server->refpath_lock);
+               kfree(server->leaf_fullpath);
+               server->leaf_fullpath = npath;
+               mutex_unlock(&server->refpath_lock);
+               server->current_fullpath = server->leaf_fullpath;
+       }
+       return rc;
+}
+
+static int target_share_matches_server(struct TCP_Server_Info *server, const char *tcp_host,
+                                      size_t tcp_host_len, char *share, bool *target_match)
+{
+       int rc = 0;
+       const char *dfs_host;
+       size_t dfs_host_len;
+
+       *target_match = true;
+       extract_unc_hostname(share, &dfs_host, &dfs_host_len);
+
+       /* Check if hostnames or addresses match */
+       if (dfs_host_len != tcp_host_len || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) {
+               cifs_dbg(FYI, "%s: %.*s doesn't match %.*s\n", __func__, (int)dfs_host_len,
+                        dfs_host, (int)tcp_host_len, tcp_host);
+               rc = match_target_ip(server, dfs_host, dfs_host_len, target_match);
+               if (rc)
+                       cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc);
+       }
+       return rc;
+}
+
+static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tcon,
+                                    struct cifs_sb_info *cifs_sb, char *tree, bool islink,
+                                    struct dfs_cache_tgt_list *tl)
 {
        int rc;
        struct TCP_Server_Info *server = tcon->ses->server;
        const struct smb_version_operations *ops = server->ops;
-       struct dfs_cache_tgt_list tl;
-       struct dfs_cache_tgt_iterator *it = NULL;
-       char *tree;
+       struct cifs_tcon *ipc = tcon->ses->tcon_ipc;
+       char *share = NULL, *prefix = NULL;
        const char *tcp_host;
        size_t tcp_host_len;
-       const char *dfs_host;
-       size_t dfs_host_len;
-       char *share = NULL, *prefix = NULL;
-       struct dfs_info3_param ref = {0};
-       bool isroot;
+       struct dfs_cache_tgt_iterator *tit;
+       bool target_match;
 
-       tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
-       if (!tree)
-               return -ENOMEM;
+       extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len);
 
-       /* If it is not dfs or there was no cached dfs referral, then reconnect to same share */
-       if (!tcon->dfs_path || dfs_cache_noreq_find(tcon->dfs_path + 1, &ref, &tl)) {
-               if (tcon->ipc) {
-                       scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
-                       rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
-               } else {
-                       rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc);
-               }
+       tit = dfs_cache_get_tgt_iterator(tl);
+       if (!tit) {
+               rc = -ENOENT;
                goto out;
        }
 
-       isroot = ref.server_type == DFS_TYPE_ROOT;
-       free_dfs_info_param(&ref);
-
-       extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len);
-
-       for (it = dfs_cache_get_tgt_iterator(&tl); it; it = dfs_cache_get_next_tgt(&tl, it)) {
-               bool target_match;
+       /* Try to tree connect to all dfs targets */
+       for (; tit; tit = dfs_cache_get_next_tgt(tl, tit)) {
+               const char *target = dfs_cache_get_tgt_name(tit);
+               struct dfs_cache_tgt_list ntl = DFS_CACHE_TGT_LIST_INIT(ntl);
 
                kfree(share);
                kfree(prefix);
-               share = NULL;
-               prefix = NULL;
+               share = prefix = NULL;
 
-               rc = dfs_cache_get_tgt_share(tcon->dfs_path + 1, it, &share, &prefix);
+               /* Check if share matches with tcp ses */
+               rc = dfs_cache_get_tgt_share(server->current_fullpath + 1, tit, &share, &prefix);
                if (rc) {
-                       cifs_dbg(VFS, "%s: failed to parse target share %d\n",
-                                __func__, rc);
-                       continue;
+                       cifs_dbg(VFS, "%s: failed to parse target share: %d\n", __func__, rc);
+                       break;
                }
 
-               extract_unc_hostname(share, &dfs_host, &dfs_host_len);
-
-               if (dfs_host_len != tcp_host_len
-                   || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) {
-                       cifs_dbg(FYI, "%s: %.*s doesn't match %.*s\n", __func__, (int)dfs_host_len,
-                                dfs_host, (int)tcp_host_len, tcp_host);
+               rc = target_share_matches_server(server, tcp_host, tcp_host_len, share,
+                                                &target_match);
+               if (rc)
+                       break;
+               if (!target_match) {
+                       rc = -EHOSTUNREACH;
+                       continue;
+               }
 
-                       rc = match_target_ip(server, dfs_host, dfs_host_len, &target_match);
-                       if (rc) {
-                               cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc);
+               if (ipc->need_reconnect) {
+                       scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
+                       rc = ops->tree_connect(xid, ipc->ses, tree, ipc, cifs_sb->local_nls);
+                       if (rc)
                                break;
-                       }
-
-                       if (!target_match) {
-                               cifs_dbg(FYI, "%s: skipping target\n", __func__);
-                               continue;
-                       }
                }
 
-               if (tcon->ipc) {
-                       scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", share);
-                       rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
+               scnprintf(tree, MAX_TREE_SIZE, "\\%s", share);
+               if (!islink) {
+                       rc = ops->tree_connect(xid, tcon->ses, tree, tcon, cifs_sb->local_nls);
+                       break;
+               }
+               /*
+                * If no dfs referrals were returned from link target, then just do a TREE_CONNECT
+                * to it.  Otherwise, cache the dfs referral and then mark current tcp ses for
+                * reconnect so either the demultiplex thread or the echo worker will reconnect to
+                * newly resolved target.
+                */
+               if (dfs_cache_find(xid, tcon->ses, cifs_sb->local_nls, cifs_remap(cifs_sb), target,
+                                  NULL, &ntl)) {
+                       rc = ops->tree_connect(xid, tcon->ses, tree, tcon, cifs_sb->local_nls);
+                       if (rc)
+                               continue;
+                       rc = dfs_cache_noreq_update_tgthint(server->current_fullpath + 1, tit);
+                       if (!rc)
+                               rc = cifs_update_super_prepath(cifs_sb, prefix);
                } else {
-                       scnprintf(tree, MAX_TREE_SIZE, "\\%s", share);
-                       rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
-                       /* Only handle prefix paths of DFS link targets */
-                       if (!rc && !isroot) {
-                               rc = update_super_prepath(tcon, prefix);
-                               break;
-                       }
+                       /* Target is another dfs share */
+                       rc = update_server_fullpath(server, cifs_sb, target);
+                       dfs_cache_free_tgts(tl);
+
+                       if (!rc) {
+                               rc = -EREMOTE;
+                               list_replace_init(&ntl.tl_list, &tl->tl_list);
+                       } else
+                               dfs_cache_free_tgts(&ntl);
                }
-               if (rc == -EREMOTE)
-                       break;
+               break;
        }
 
+out:
        kfree(share);
        kfree(prefix);
 
-       if (!rc) {
-               if (it)
-                       rc = dfs_cache_noreq_update_tgthint(tcon->dfs_path + 1, it);
-               else
-                       rc = -ENOENT;
+       return rc;
+}
+
+static int tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tcon,
+                                  struct cifs_sb_info *cifs_sb, char *tree, bool islink,
+                                  struct dfs_cache_tgt_list *tl)
+{
+       int rc;
+       int num_links = 0;
+       struct TCP_Server_Info *server = tcon->ses->server;
+
+       do {
+               rc = __tree_connect_dfs_target(xid, tcon, cifs_sb, tree, islink, tl);
+               if (!rc || rc != -EREMOTE)
+                       break;
+       } while (rc = -ELOOP, ++num_links < MAX_NESTED_LINKS);
+       /*
+        * If we couldn't tree connect to any targets from last referral path, then retry from
+        * original referral path.
+        */
+       if (rc && server->current_fullpath != server->origin_fullpath) {
+               server->current_fullpath = server->origin_fullpath;
+               mark_tcon_tcp_ses_for_reconnect(tcon);
        }
-       dfs_cache_free_tgts(&tl);
+
+       dfs_cache_free_tgts(tl);
+       return rc;
+}
+
+int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc)
+{
+       int rc;
+       struct TCP_Server_Info *server = tcon->ses->server;
+       const struct smb_version_operations *ops = server->ops;
+       struct super_block *sb = NULL;
+       struct cifs_sb_info *cifs_sb;
+       struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
+       char *tree;
+       struct dfs_info3_param ref = {0};
+
+       tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL);
+       if (!tree)
+               return -ENOMEM;
+
+       if (tcon->ipc) {
+               scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
+               rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
+               goto out;
+       }
+
+       sb = cifs_get_tcp_super(server);
+       if (IS_ERR(sb)) {
+               rc = PTR_ERR(sb);
+               cifs_dbg(VFS, "%s: could not find superblock: %d\n", __func__, rc);
+               goto out;
+       }
+
+       cifs_sb = CIFS_SB(sb);
+
+       /* If it is not dfs or there was no cached dfs referral, then reconnect to same share */
+       if (!server->current_fullpath ||
+           dfs_cache_noreq_find(server->current_fullpath + 1, &ref, &tl)) {
+               rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, cifs_sb->local_nls);
+               goto out;
+       }
+
+       rc = tree_connect_dfs_target(xid, tcon, cifs_sb, tree, ref.server_type == DFS_TYPE_LINK,
+                                    &tl);
+       free_dfs_info_param(&ref);
+
 out:
        kfree(tree);
+       cifs_put_tcp_super(sb);
+
        return rc;
 }
 #else
index 2837455..5c1259d 100644 (file)
@@ -283,7 +283,7 @@ static int dfscache_proc_show(struct seq_file *m, void *v)
                        seq_printf(m,
                                   "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,hdr_flags=0x%x,ref_flags=0x%x,interlink=%s,path_consumed=%d,expired=%s\n",
                                   ce->path, ce->srvtype == DFS_TYPE_ROOT ? "root" : "link",
-                                  ce->ttl, ce->etime.tv_nsec, ce->ref_flags, ce->hdr_flags,
+                                  ce->ttl, ce->etime.tv_nsec, ce->hdr_flags, ce->ref_flags,
                                   IS_DFS_INTERLINK(ce->hdr_flags) ? "yes" : "no",
                                   ce->path_consumed, cache_entry_expired(ce) ? "yes" : "no");
 
@@ -1364,9 +1364,9 @@ static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cach
 }
 
 /* Refresh dfs referral of tcon and mark it for reconnect if needed */
-static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh)
+static int __refresh_tcon(const char *path, struct cifs_ses **sessions, struct cifs_tcon *tcon,
+                         bool force_refresh)
 {
-       const char *path = tcon->dfs_path + 1;
        struct cifs_ses *ses;
        struct cache_entry *ce;
        struct dfs_info3_param *refs = NULL;
@@ -1422,6 +1422,20 @@ out:
        return rc;
 }
 
+static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh)
+{
+       struct TCP_Server_Info *server = tcon->ses->server;
+
+       mutex_lock(&server->refpath_lock);
+       if (strcasecmp(server->leaf_fullpath, server->origin_fullpath))
+               __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, force_refresh);
+       mutex_unlock(&server->refpath_lock);
+
+       __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, force_refresh);
+
+       return 0;
+}
+
 /**
  * dfs_cache_remount_fs - remount a DFS share
  *
@@ -1435,6 +1449,7 @@ out:
 int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
 {
        struct cifs_tcon *tcon;
+       struct TCP_Server_Info *server;
        struct mount_group *mg;
        struct cifs_ses *sessions[CACHE_MAX_ENTRIES + 1] = {NULL};
        int rc;
@@ -1443,13 +1458,15 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
                return -EINVAL;
 
        tcon = cifs_sb_master_tcon(cifs_sb);
-       if (!tcon->dfs_path) {
-               cifs_dbg(FYI, "%s: not a dfs tcon\n", __func__);
+       server = tcon->ses->server;
+
+       if (!server->origin_fullpath) {
+               cifs_dbg(FYI, "%s: not a dfs mount\n", __func__);
                return 0;
        }
 
        if (uuid_is_null(&cifs_sb->dfs_mount_id)) {
-               cifs_dbg(FYI, "%s: tcon has no dfs mount group id\n", __func__);
+               cifs_dbg(FYI, "%s: no dfs mount group id\n", __func__);
                return -EINVAL;
        }
 
@@ -1457,7 +1474,7 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
        mg = find_mount_group_locked(&cifs_sb->dfs_mount_id);
        if (IS_ERR(mg)) {
                mutex_unlock(&mount_group_list_lock);
-               cifs_dbg(FYI, "%s: tcon has ipc session to refresh referral\n", __func__);
+               cifs_dbg(FYI, "%s: no ipc session for refreshing referral\n", __func__);
                return PTR_ERR(mg);
        }
        kref_get(&mg->refcount);
@@ -1498,9 +1515,12 @@ static void refresh_mounts(struct cifs_ses **sessions)
 
        spin_lock(&cifs_tcp_ses_lock);
        list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
+               if (!server->is_dfs_conn)
+                       continue;
+
                list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
                        list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
-                               if (tcon->dfs_path) {
+                               if (!tcon->ipc && !tcon->need_reconnect) {
                                        tcon->tc_count++;
                                        list_add_tail(&tcon->ulist, &tcons);
                                }
@@ -1510,8 +1530,16 @@ static void refresh_mounts(struct cifs_ses **sessions)
        spin_unlock(&cifs_tcp_ses_lock);
 
        list_for_each_entry_safe(tcon, ntcon, &tcons, ulist) {
+               struct TCP_Server_Info *server = tcon->ses->server;
+
                list_del_init(&tcon->ulist);
-               refresh_tcon(sessions, tcon, false);
+
+               mutex_lock(&server->refpath_lock);
+               if (strcasecmp(server->leaf_fullpath, server->origin_fullpath))
+                       __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, false);
+               mutex_unlock(&server->refpath_lock);
+
+               __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, false);
                cifs_put_tcon(tcon);
        }
 }
index 1b855fc..9fee3af 100644 (file)
@@ -2692,12 +2692,23 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
        tcon = tlink_tcon(smbfile->tlink);
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
                server = tcon->ses->server;
-               if (server->ops->flush)
-                       rc = server->ops->flush(xid, tcon, &smbfile->fid);
-               else
+               if (server->ops->flush == NULL) {
                        rc = -ENOSYS;
+                       goto strict_fsync_exit;
+               }
+
+               if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
+                       smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
+                       if (smbfile) {
+                               rc = server->ops->flush(xid, tcon, &smbfile->fid);
+                               cifsFileInfo_put(smbfile);
+                       } else
+                               cifs_dbg(FYI, "ignore fsync for file not open for write\n");
+               } else
+                       rc = server->ops->flush(xid, tcon, &smbfile->fid);
        }
 
+strict_fsync_exit:
        free_xid(xid);
        return rc;
 }
@@ -2709,6 +2720,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        struct cifs_tcon *tcon;
        struct TCP_Server_Info *server;
        struct cifsFileInfo *smbfile = file->private_data;
+       struct inode *inode = file_inode(file);
        struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
 
        rc = file_write_and_wait_range(file, start, end);
@@ -2725,12 +2737,23 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
        tcon = tlink_tcon(smbfile->tlink);
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
                server = tcon->ses->server;
-               if (server->ops->flush)
-                       rc = server->ops->flush(xid, tcon, &smbfile->fid);
-               else
+               if (server->ops->flush == NULL) {
                        rc = -ENOSYS;
+                       goto fsync_exit;
+               }
+
+               if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
+                       smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
+                       if (smbfile) {
+                               rc = server->ops->flush(xid, tcon, &smbfile->fid);
+                               cifsFileInfo_put(smbfile);
+                       } else
+                               cifs_dbg(FYI, "ignore fsync for file not open for write\n");
+               } else
+                       rc = server->ops->flush(xid, tcon, &smbfile->fid);
        }
 
+fsync_exit:
        free_xid(xid);
        return rc;
 }
index 38d96a4..6a179ae 100644 (file)
@@ -308,7 +308,9 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
        new_ctx->nodename = NULL;
        new_ctx->username = NULL;
        new_ctx->password = NULL;
+       new_ctx->server_hostname = NULL;
        new_ctx->domainname = NULL;
+       new_ctx->workstation_name = NULL;
        new_ctx->UNC = NULL;
        new_ctx->source = NULL;
        new_ctx->iocharset = NULL;
@@ -323,6 +325,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
        DUP_CTX_STR(UNC);
        DUP_CTX_STR(source);
        DUP_CTX_STR(domainname);
+       DUP_CTX_STR(workstation_name);
        DUP_CTX_STR(nodename);
        DUP_CTX_STR(iocharset);
 
@@ -459,6 +462,7 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx)
                return -EINVAL;
 
        /* record the server hostname */
+       kfree(ctx->server_hostname);
        ctx->server_hostname = kstrndup(devname + 2, pos - devname - 2, GFP_KERNEL);
        if (!ctx->server_hostname)
                return -ENOMEM;
@@ -720,6 +724,11 @@ static int smb3_verify_reconfigure_ctx(struct fs_context *fc,
                cifs_errorf(fc, "can not change domainname during remount\n");
                return -EINVAL;
        }
+       if (new_ctx->workstation_name &&
+           (!old_ctx->workstation_name || strcmp(new_ctx->workstation_name, old_ctx->workstation_name))) {
+               cifs_errorf(fc, "can not change workstation_name during remount\n");
+               return -EINVAL;
+       }
        if (new_ctx->nodename &&
            (!old_ctx->nodename || strcmp(new_ctx->nodename, old_ctx->nodename))) {
                cifs_errorf(fc, "can not change nodename during remount\n");
@@ -753,7 +762,8 @@ static int smb3_reconfigure(struct fs_context *fc)
                return rc;
 
        /*
-        * We can not change UNC/username/password/domainname/nodename/iocharset
+        * We can not change UNC/username/password/domainname/
+        * workstation_name/nodename/iocharset
         * during reconnect so ignore what we have in the new context and
         * just use what we already have in cifs_sb->ctx.
         */
@@ -762,6 +772,7 @@ static int smb3_reconfigure(struct fs_context *fc)
        STEAL_STRING(cifs_sb, ctx, username);
        STEAL_STRING(cifs_sb, ctx, password);
        STEAL_STRING(cifs_sb, ctx, domainname);
+       STEAL_STRING(cifs_sb, ctx, workstation_name);
        STEAL_STRING(cifs_sb, ctx, nodename);
        STEAL_STRING(cifs_sb, ctx, iocharset);
 
@@ -1414,13 +1425,22 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
 
 int smb3_init_fs_context(struct fs_context *fc)
 {
+       int rc;
        struct smb3_fs_context *ctx;
        char *nodename = utsname()->nodename;
        int i;
 
        ctx = kzalloc(sizeof(struct smb3_fs_context), GFP_KERNEL);
-       if (unlikely(!ctx))
-               return -ENOMEM;
+       if (unlikely(!ctx)) {
+               rc = -ENOMEM;
+               goto err_exit;
+       }
+
+       ctx->workstation_name = kstrdup(nodename, GFP_KERNEL);
+       if (unlikely(!ctx->workstation_name)) {
+               rc = -ENOMEM;
+               goto err_exit;
+       }
 
        /*
         * does not have to be perfect mapping since field is
@@ -1493,6 +1513,14 @@ int smb3_init_fs_context(struct fs_context *fc)
        fc->fs_private = ctx;
        fc->ops = &smb3_fs_context_ops;
        return 0;
+
+err_exit:
+       if (ctx) {
+               kfree(ctx->workstation_name);
+               kfree(ctx);
+       }
+
+       return rc;
 }
 
 void
@@ -1518,6 +1546,8 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx)
        ctx->source = NULL;
        kfree(ctx->domainname);
        ctx->domainname = NULL;
+       kfree(ctx->workstation_name);
+       ctx->workstation_name = NULL;
        kfree(ctx->nodename);
        ctx->nodename = NULL;
        kfree(ctx->iocharset);
index b2d22cf..e54090d 100644 (file)
@@ -170,6 +170,7 @@ struct smb3_fs_context {
        char *server_hostname;
        char *UNC;
        char *nodename;
+       char *workstation_name;
        char *iocharset;  /* local code page for mapping to and from Unicode */
        char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
        char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
index 8eedd20..7e409a3 100644 (file)
@@ -87,6 +87,14 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon)
        char *sharename;
        struct cifs_fscache_super_auxdata auxdata;
 
+       /*
+        * Check if cookie was already initialized so don't reinitialize it.
+        * In the future, as we integrate with newer fscache features,
+        * we may want to instead add a check if cookie has changed
+        */
+       if (tcon->fscache == NULL)
+               return;
+
        sharename = extract_sharename(tcon->treeName);
        if (IS_ERR(sharename)) {
                cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__);
index ba2c3e8..5148d48 100644 (file)
@@ -75,6 +75,7 @@ sesInfoAlloc(void)
                INIT_LIST_HEAD(&ret_buf->tcon_list);
                mutex_init(&ret_buf->session_mutex);
                spin_lock_init(&ret_buf->iface_lock);
+               spin_lock_init(&ret_buf->chan_lock);
        }
        return ret_buf;
 }
@@ -94,6 +95,7 @@ sesInfoFree(struct cifs_ses *buf_to_free)
        kfree_sensitive(buf_to_free->password);
        kfree(buf_to_free->user_name);
        kfree(buf_to_free->domainName);
+       kfree(buf_to_free->workstation_name);
        kfree_sensitive(buf_to_free->auth_key.response);
        kfree(buf_to_free->iface_list);
        kfree_sensitive(buf_to_free);
@@ -138,9 +140,6 @@ tconInfoFree(struct cifs_tcon *buf_to_free)
        kfree(buf_to_free->nativeFileSystem);
        kfree_sensitive(buf_to_free->password);
        kfree(buf_to_free->crfid.fid);
-#ifdef CONFIG_CIFS_DFS_UPCALL
-       kfree(buf_to_free->dfs_path);
-#endif
        kfree(buf_to_free);
 }
 
@@ -1287,69 +1286,20 @@ out:
        return rc;
 }
 
-static void tcon_super_cb(struct super_block *sb, void *arg)
+int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix)
 {
-       struct super_cb_data *sd = arg;
-       struct cifs_tcon *tcon = sd->data;
-       struct cifs_sb_info *cifs_sb;
-
-       if (sd->sb)
-               return;
-
-       cifs_sb = CIFS_SB(sb);
-       if (tcon->dfs_path && cifs_sb->origin_fullpath &&
-           !strcasecmp(tcon->dfs_path, cifs_sb->origin_fullpath))
-               sd->sb = sb;
-}
-
-static inline struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon)
-{
-       return __cifs_get_super(tcon_super_cb, tcon);
-}
-
-static inline void cifs_put_tcon_super(struct super_block *sb)
-{
-       __cifs_put_super(sb);
-}
-#else
-static inline struct super_block *cifs_get_tcon_super(struct cifs_tcon *tcon)
-{
-       return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline void cifs_put_tcon_super(struct super_block *sb)
-{
-}
-#endif
-
-int update_super_prepath(struct cifs_tcon *tcon, char *prefix)
-{
-       struct super_block *sb;
-       struct cifs_sb_info *cifs_sb;
-       int rc = 0;
-
-       sb = cifs_get_tcon_super(tcon);
-       if (IS_ERR(sb))
-               return PTR_ERR(sb);
-
-       cifs_sb = CIFS_SB(sb);
-
        kfree(cifs_sb->prepath);
 
        if (prefix && *prefix) {
                cifs_sb->prepath = kstrdup(prefix, GFP_ATOMIC);
-               if (!cifs_sb->prepath) {
-                       rc = -ENOMEM;
-                       goto out;
-               }
+               if (!cifs_sb->prepath)
+                       return -ENOMEM;
 
                convert_delimiter(cifs_sb->prepath, CIFS_DIR_SEP(cifs_sb));
        } else
                cifs_sb->prepath = NULL;
 
        cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
-
-out:
-       cifs_put_tcon_super(sb);
-       return rc;
+       return 0;
 }
+#endif
index 25a2b8e..fe707f4 100644 (file)
@@ -119,7 +119,9 @@ typedef struct _AUTHENTICATE_MESSAGE {
  */
 
 int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses);
-void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, struct cifs_ses *ses);
+int build_ntlmssp_negotiate_blob(unsigned char **pbuffer, u16 *buflen,
+                                struct cifs_ses *ses,
+                                const struct nls_table *nls_cp);
 int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
                        struct cifs_ses *ses,
                        const struct nls_table *nls_cp);
index 23e02db..2c10b18 100644 (file)
@@ -54,41 +54,53 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface)
 {
        int i;
 
+       spin_lock(&ses->chan_lock);
        for (i = 0; i < ses->chan_count; i++) {
-               if (is_server_using_iface(ses->chans[i].server, iface))
+               if (is_server_using_iface(ses->chans[i].server, iface)) {
+                       spin_unlock(&ses->chan_lock);
                        return true;
+               }
        }
+       spin_unlock(&ses->chan_lock);
        return false;
 }
 
 /* returns number of channels added */
 int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
 {
-       int old_chan_count = ses->chan_count;
-       int left = ses->chan_max - ses->chan_count;
+       int old_chan_count, new_chan_count;
+       int left;
        int i = 0;
        int rc = 0;
        int tries = 0;
        struct cifs_server_iface *ifaces = NULL;
        size_t iface_count;
 
+       if (ses->server->dialect < SMB30_PROT_ID) {
+               cifs_dbg(VFS, "multichannel is not supported on this protocol version, use 3.0 or above\n");
+               return 0;
+       }
+
+       spin_lock(&ses->chan_lock);
+
+       new_chan_count = old_chan_count = ses->chan_count;
+       left = ses->chan_max - ses->chan_count;
+
        if (left <= 0) {
                cifs_dbg(FYI,
                         "ses already at max_channels (%zu), nothing to open\n",
                         ses->chan_max);
-               return 0;
-       }
-
-       if (ses->server->dialect < SMB30_PROT_ID) {
-               cifs_dbg(VFS, "multichannel is not supported on this protocol version, use 3.0 or above\n");
+               spin_unlock(&ses->chan_lock);
                return 0;
        }
 
        if (!(ses->server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL)) {
                cifs_dbg(VFS, "server %s does not support multichannel\n", ses->server->hostname);
                ses->chan_max = 1;
+               spin_unlock(&ses->chan_lock);
                return 0;
        }
+       spin_unlock(&ses->chan_lock);
 
        /*
         * Make a copy of the iface list at the time and use that
@@ -142,10 +154,11 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
                cifs_dbg(FYI, "successfully opened new channel on iface#%d\n",
                         i);
                left--;
+               new_chan_count++;
        }
 
        kfree(ifaces);
-       return ses->chan_count - old_chan_count;
+       return new_chan_count - old_chan_count;
 }
 
 /*
@@ -157,10 +170,14 @@ cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server)
 {
        int i;
 
+       spin_lock(&ses->chan_lock);
        for (i = 0; i < ses->chan_count; i++) {
-               if (ses->chans[i].server == server)
+               if (ses->chans[i].server == server) {
+                       spin_unlock(&ses->chan_lock);
                        return &ses->chans[i];
+               }
        }
+       spin_unlock(&ses->chan_lock);
        return NULL;
 }
 
@@ -168,6 +185,7 @@ static int
 cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
                     struct cifs_server_iface *iface)
 {
+       struct TCP_Server_Info *chan_server;
        struct cifs_chan *chan;
        struct smb3_fs_context ctx = {NULL};
        static const char unc_fmt[] = "\\%s\\foo";
@@ -240,18 +258,19 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
               SMB2_CLIENT_GUID_SIZE);
        ctx.use_client_guid = true;
 
-       mutex_lock(&ses->session_mutex);
+       chan_server = cifs_get_tcp_session(&ctx, ses->server);
 
+       mutex_lock(&ses->session_mutex);
+       spin_lock(&ses->chan_lock);
        chan = ses->binding_chan = &ses->chans[ses->chan_count];
-       chan->server = cifs_get_tcp_session(&ctx);
+       chan->server = chan_server;
        if (IS_ERR(chan->server)) {
                rc = PTR_ERR(chan->server);
                chan->server = NULL;
+               spin_unlock(&ses->chan_lock);
                goto out;
        }
-       spin_lock(&cifs_tcp_ses_lock);
-       chan->server->is_channel = true;
-       spin_unlock(&cifs_tcp_ses_lock);
+       spin_unlock(&ses->chan_lock);
 
        /*
         * We need to allocate the server crypto now as we will need
@@ -283,8 +302,11 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
         * ses to the new server.
         */
 
+       spin_lock(&ses->chan_lock);
        ses->chan_count++;
        atomic_set(&ses->chan_seq, 0);
+       spin_unlock(&ses->chan_lock);
+
 out:
        ses->binding = false;
        ses->binding_chan = NULL;
@@ -599,18 +621,85 @@ int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
        return 0;
 }
 
+static int size_of_ntlmssp_blob(struct cifs_ses *ses, int base_size)
+{
+       int sz = base_size + ses->auth_key.len
+               - CIFS_SESS_KEY_SIZE + CIFS_CPHTXT_SIZE + 2;
+
+       if (ses->domainName)
+               sz += sizeof(__le16) * strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
+       else
+               sz += sizeof(__le16);
+
+       if (ses->user_name)
+               sz += sizeof(__le16) * strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN);
+       else
+               sz += sizeof(__le16);
+
+       sz += sizeof(__le16) * strnlen(ses->workstation_name, CIFS_MAX_WORKSTATION_LEN);
+
+       return sz;
+}
+
+static inline void cifs_security_buffer_from_str(SECURITY_BUFFER *pbuf,
+                                                char *str_value,
+                                                int str_length,
+                                                unsigned char *pstart,
+                                                unsigned char **pcur,
+                                                const struct nls_table *nls_cp)
+{
+       unsigned char *tmp = pstart;
+       int len;
+
+       if (!pbuf)
+               return;
+
+       if (!pcur)
+               pcur = &tmp;
+
+       if (!str_value) {
+               pbuf->BufferOffset = cpu_to_le32(*pcur - pstart);
+               pbuf->Length = 0;
+               pbuf->MaximumLength = 0;
+               *pcur += sizeof(__le16);
+       } else {
+               len = cifs_strtoUTF16((__le16 *)*pcur,
+                                     str_value,
+                                     str_length,
+                                     nls_cp);
+               len *= sizeof(__le16);
+               pbuf->BufferOffset = cpu_to_le32(*pcur - pstart);
+               pbuf->Length = cpu_to_le16(len);
+               pbuf->MaximumLength = cpu_to_le16(len);
+               *pcur += len;
+       }
+}
+
 /* BB Move to ntlmssp.c eventually */
 
-/* We do not malloc the blob, it is passed in pbuffer, because
-   it is fixed size, and small, making this approach cleaner */
-void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
-                                        struct cifs_ses *ses)
+int build_ntlmssp_negotiate_blob(unsigned char **pbuffer,
+                                u16 *buflen,
+                                struct cifs_ses *ses,
+                                const struct nls_table *nls_cp)
 {
+       int rc = 0;
        struct TCP_Server_Info *server = cifs_ses_server(ses);
-       NEGOTIATE_MESSAGE *sec_blob = (NEGOTIATE_MESSAGE *)pbuffer;
+       NEGOTIATE_MESSAGE *sec_blob;
        __u32 flags;
+       unsigned char *tmp;
+       int len;
+
+       len = size_of_ntlmssp_blob(ses, sizeof(NEGOTIATE_MESSAGE));
+       *pbuffer = kmalloc(len, GFP_KERNEL);
+       if (!*pbuffer) {
+               rc = -ENOMEM;
+               cifs_dbg(VFS, "Error %d during NTLMSSP allocation\n", rc);
+               *buflen = 0;
+               goto setup_ntlm_neg_ret;
+       }
+       sec_blob = (NEGOTIATE_MESSAGE *)*pbuffer;
 
-       memset(pbuffer, 0, sizeof(NEGOTIATE_MESSAGE));
+       memset(*pbuffer, 0, sizeof(NEGOTIATE_MESSAGE));
        memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
        sec_blob->MessageType = NtLmNegotiate;
 
@@ -624,34 +713,25 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
        if (!server->session_estab || ses->ntlmssp->sesskey_per_smbsess)
                flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
 
+       tmp = *pbuffer + sizeof(NEGOTIATE_MESSAGE);
        sec_blob->NegotiateFlags = cpu_to_le32(flags);
 
-       sec_blob->WorkstationName.BufferOffset = 0;
-       sec_blob->WorkstationName.Length = 0;
-       sec_blob->WorkstationName.MaximumLength = 0;
+       /* these fields should be null in negotiate phase MS-NLMP 3.1.5.1.1 */
+       cifs_security_buffer_from_str(&sec_blob->DomainName,
+                                     NULL,
+                                     CIFS_MAX_DOMAINNAME_LEN,
+                                     *pbuffer, &tmp,
+                                     nls_cp);
 
-       /* Domain name is sent on the Challenge not Negotiate NTLMSSP request */
-       sec_blob->DomainName.BufferOffset = 0;
-       sec_blob->DomainName.Length = 0;
-       sec_blob->DomainName.MaximumLength = 0;
-}
-
-static int size_of_ntlmssp_blob(struct cifs_ses *ses)
-{
-       int sz = sizeof(AUTHENTICATE_MESSAGE) + ses->auth_key.len
-               - CIFS_SESS_KEY_SIZE + CIFS_CPHTXT_SIZE + 2;
-
-       if (ses->domainName)
-               sz += 2 * strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
-       else
-               sz += 2;
-
-       if (ses->user_name)
-               sz += 2 * strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN);
-       else
-               sz += 2;
+       cifs_security_buffer_from_str(&sec_blob->WorkstationName,
+                                     NULL,
+                                     CIFS_MAX_WORKSTATION_LEN,
+                                     *pbuffer, &tmp,
+                                     nls_cp);
 
-       return sz;
+       *buflen = tmp - *pbuffer;
+setup_ntlm_neg_ret:
+       return rc;
 }
 
 int build_ntlmssp_auth_blob(unsigned char **pbuffer,
@@ -663,6 +743,7 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer,
        AUTHENTICATE_MESSAGE *sec_blob;
        __u32 flags;
        unsigned char *tmp;
+       int len;
 
        rc = setup_ntlmv2_rsp(ses, nls_cp);
        if (rc) {
@@ -670,7 +751,9 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer,
                *buflen = 0;
                goto setup_ntlmv2_ret;
        }
-       *pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL);
+
+       len = size_of_ntlmssp_blob(ses, sizeof(AUTHENTICATE_MESSAGE));
+       *pbuffer = kmalloc(len, GFP_KERNEL);
        if (!*pbuffer) {
                rc = -ENOMEM;
                cifs_dbg(VFS, "Error %d during NTLMSSP allocation\n", rc);
@@ -686,7 +769,7 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer,
                NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO |
                NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
                NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC |
-               NTLMSSP_NEGOTIATE_SEAL;
+               NTLMSSP_NEGOTIATE_SEAL | NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED;
        if (ses->server->sign)
                flags |= NTLMSSP_NEGOTIATE_SIGN;
        if (!ses->server->session_estab || ses->ntlmssp->sesskey_per_smbsess)
@@ -719,42 +802,23 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer,
                sec_blob->NtChallengeResponse.MaximumLength = 0;
        }
 
-       if (ses->domainName == NULL) {
-               sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
-               sec_blob->DomainName.Length = 0;
-               sec_blob->DomainName.MaximumLength = 0;
-               tmp += 2;
-       } else {
-               int len;
-               len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName,
-                                     CIFS_MAX_DOMAINNAME_LEN, nls_cp);
-               len *= 2; /* unicode is 2 bytes each */
-               sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
-               sec_blob->DomainName.Length = cpu_to_le16(len);
-               sec_blob->DomainName.MaximumLength = cpu_to_le16(len);
-               tmp += len;
-       }
+       cifs_security_buffer_from_str(&sec_blob->DomainName,
+                                     ses->domainName,
+                                     CIFS_MAX_DOMAINNAME_LEN,
+                                     *pbuffer, &tmp,
+                                     nls_cp);
 
-       if (ses->user_name == NULL) {
-               sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
-               sec_blob->UserName.Length = 0;
-               sec_blob->UserName.MaximumLength = 0;
-               tmp += 2;
-       } else {
-               int len;
-               len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name,
-                                     CIFS_MAX_USERNAME_LEN, nls_cp);
-               len *= 2; /* unicode is 2 bytes each */
-               sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
-               sec_blob->UserName.Length = cpu_to_le16(len);
-               sec_blob->UserName.MaximumLength = cpu_to_le16(len);
-               tmp += len;
-       }
+       cifs_security_buffer_from_str(&sec_blob->UserName,
+                                     ses->user_name,
+                                     CIFS_MAX_USERNAME_LEN,
+                                     *pbuffer, &tmp,
+                                     nls_cp);
 
-       sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
-       sec_blob->WorkstationName.Length = 0;
-       sec_blob->WorkstationName.MaximumLength = 0;
-       tmp += 2;
+       cifs_security_buffer_from_str(&sec_blob->WorkstationName,
+                                     ses->workstation_name,
+                                     CIFS_MAX_WORKSTATION_LEN,
+                                     *pbuffer, &tmp,
+                                     nls_cp);
 
        if (((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) ||
                (ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
@@ -1230,6 +1294,7 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
        struct cifs_ses *ses = sess_data->ses;
        __u16 bytes_remaining;
        char *bcc_ptr;
+       unsigned char *ntlmsspblob = NULL;
        u16 blob_len;
 
        cifs_dbg(FYI, "rawntlmssp session setup negotiate phase\n");
@@ -1253,10 +1318,15 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data)
        pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
 
        /* Build security blob before we assemble the request */
-       build_ntlmssp_negotiate_blob(pSMB->req.SecurityBlob, ses);
-       sess_data->iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE);
-       sess_data->iov[1].iov_base = pSMB->req.SecurityBlob;
-       pSMB->req.SecurityBlobLength = cpu_to_le16(sizeof(NEGOTIATE_MESSAGE));
+       rc = build_ntlmssp_negotiate_blob(&ntlmsspblob,
+                                    &blob_len, ses,
+                                    sess_data->nls_cp);
+       if (rc)
+               goto out;
+
+       sess_data->iov[1].iov_len = blob_len;
+       sess_data->iov[1].iov_base = ntlmsspblob;
+       pSMB->req.SecurityBlobLength = cpu_to_le16(blob_len);
 
        rc = _sess_auth_rawntlmssp_assemble_req(sess_data);
        if (rc)
index 8297703..fe5bfa2 100644 (file)
@@ -46,6 +46,10 @@ struct cop_vars {
        struct smb2_file_link_info link_info;
 };
 
+/*
+ * note: If cfile is passed, the reference to it is dropped here.
+ * So make sure that you do not reuse cfile after return from this func.
+ */
 static int
 smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
                 struct cifs_sb_info *cifs_sb, const char *full_path,
@@ -536,10 +540,11 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
                create_options |= OPEN_REPARSE_POINT;
 
                /* Failed on a symbolic link - query a reparse point info */
+               cifs_get_readable_path(tcon, full_path, &cfile);
                rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
                                      FILE_READ_ATTRIBUTES, FILE_OPEN,
                                      create_options, ACL_NO_MODE,
-                                     smb2_data, SMB2_OP_QUERY_INFO, NULL);
+                                     smb2_data, SMB2_OP_QUERY_INFO, cfile);
        }
        if (rc)
                goto out;
@@ -587,10 +592,11 @@ smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
                create_options |= OPEN_REPARSE_POINT;
 
                /* Failed on a symbolic link - query a reparse point info */
+               cifs_get_readable_path(tcon, full_path, &cfile);
                rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
                                      FILE_READ_ATTRIBUTES, FILE_OPEN,
                                      create_options, ACL_NO_MODE,
-                                     smb2_data, SMB2_OP_POSIX_QUERY_INFO, NULL);
+                                     smb2_data, SMB2_OP_POSIX_QUERY_INFO, cfile);
        }
        if (rc)
                goto out;
@@ -707,10 +713,12 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
                   struct cifs_sb_info *cifs_sb, bool set_alloc)
 {
        __le64 eof = cpu_to_le64(size);
+       struct cifsFileInfo *cfile;
 
+       cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile);
        return smb2_compound_op(xid, tcon, cifs_sb, full_path,
                                FILE_WRITE_DATA, FILE_OPEN, 0, ACL_NO_MODE,
-                               &eof, SMB2_OP_SET_EOF, NULL);
+                               &eof, SMB2_OP_SET_EOF, cfile);
 }
 
 int
@@ -719,6 +727,8 @@ smb2_set_file_info(struct inode *inode, const char *full_path,
 {
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct tcon_link *tlink;
+       struct cifs_tcon *tcon;
+       struct cifsFileInfo *cfile;
        int rc;
 
        if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) &&
@@ -729,10 +739,12 @@ smb2_set_file_info(struct inode *inode, const char *full_path,
        tlink = cifs_sb_tlink(cifs_sb);
        if (IS_ERR(tlink))
                return PTR_ERR(tlink);
+       tcon = tlink_tcon(tlink);
 
-       rc = smb2_compound_op(xid, tlink_tcon(tlink), cifs_sb, full_path,
+       cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile);
+       rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
                              FILE_WRITE_ATTRIBUTES, FILE_OPEN,
-                             0, ACL_NO_MODE, buf, SMB2_OP_SET_INFO, NULL);
+                             0, ACL_NO_MODE, buf, SMB2_OP_SET_INFO, cfile);
        cifs_put_tlink(tlink);
        return rc;
 }
index 7acf71d..c5b1dea 100644 (file)
@@ -2844,6 +2844,7 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
        struct fsctl_get_dfs_referral_req *dfs_req = NULL;
        struct get_dfs_referral_rsp *dfs_rsp = NULL;
        u32 dfs_req_size = 0, dfs_rsp_size = 0;
+       int retry_count = 0;
 
        cifs_dbg(FYI, "%s: path: %s\n", __func__, search_name);
 
@@ -2895,11 +2896,14 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
                                true /* is_fsctl */,
                                (char *)dfs_req, dfs_req_size, CIFSMaxBufSize,
                                (char **)&dfs_rsp, &dfs_rsp_size);
-       } while (rc == -EAGAIN);
+               if (!is_retryable_error(rc))
+                       break;
+               usleep_range(512, 2048);
+       } while (++retry_count < 5);
 
        if (rc) {
-               if ((rc != -ENOENT) && (rc != -EOPNOTSUPP))
-                       cifs_tcon_dbg(VFS, "ioctl error in %s rc=%d\n", __func__, rc);
+               if (!is_retryable_error(rc) && rc != -ENOENT && rc != -EOPNOTSUPP)
+                       cifs_tcon_dbg(VFS, "%s: ioctl error: rc=%d\n", __func__, rc);
                goto out;
        }
 
index d2ecb2e..2f5f2c4 100644 (file)
@@ -155,7 +155,11 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
        if (tcon == NULL)
                return 0;
 
-       if (smb2_command == SMB2_TREE_CONNECT)
+       /*
+        * Need to also skip SMB2_IOCTL because it is used for checking nested dfs links in
+        * cifs_tree_connect().
+        */
+       if (smb2_command == SMB2_TREE_CONNECT || smb2_command == SMB2_IOCTL)
                return 0;
 
        if (tcon->tidStatus == CifsExiting) {
@@ -253,7 +257,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon,
        /*
         * If we are reconnecting an extra channel, bind
         */
-       if (server->is_channel) {
+       if (CIFS_SERVER_IS_CHAN(server)) {
                ses->binding = true;
                ses->binding_chan = cifs_ses_find_chan(ses, server);
        }
@@ -1456,7 +1460,7 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
        int rc;
        struct cifs_ses *ses = sess_data->ses;
        struct smb2_sess_setup_rsp *rsp = NULL;
-       char *ntlmssp_blob = NULL;
+       unsigned char *ntlmssp_blob = NULL;
        bool use_spnego = false; /* else use raw ntlmssp */
        u16 blob_length = 0;
 
@@ -1475,22 +1479,17 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data)
        if (rc)
                goto out_err;
 
-       ntlmssp_blob = kmalloc(sizeof(struct _NEGOTIATE_MESSAGE),
-                              GFP_KERNEL);
-       if (ntlmssp_blob == NULL) {
-               rc = -ENOMEM;
-               goto out;
-       }
+       rc = build_ntlmssp_negotiate_blob(&ntlmssp_blob,
+                                         &blob_length, ses,
+                                         sess_data->nls_cp);
+       if (rc)
+               goto out_err;
 
-       build_ntlmssp_negotiate_blob(ntlmssp_blob, ses);
        if (use_spnego) {
                /* BB eventually need to add this */
                cifs_dbg(VFS, "spnego not supported for SMB2 yet\n");
                rc = -EOPNOTSUPP;
                goto out;
-       } else {
-               blob_length = sizeof(struct _NEGOTIATE_MESSAGE);
-               /* with raw NTLMSSP we don't encapsulate in SPNEGO */
        }
        sess_data->iov[1].iov_base = ntlmssp_blob;
        sess_data->iov[1].iov_len = blob_length;
@@ -1841,7 +1840,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
        cifs_small_buf_release(req);
        rsp = (struct smb2_tree_connect_rsp *)rsp_iov.iov_base;
        trace_smb3_tcon(xid, tcon->tid, ses->Suid, tree, rc);
-       if (rc != 0) {
+       if ((rc != 0) || (rsp == NULL)) {
                cifs_stats_fail_inc(tcon, SMB2_TREE_CONNECT_HE);
                tcon->need_reconnect = true;
                goto tcon_error_exit;
@@ -2669,7 +2668,18 @@ int smb311_posix_mkdir(const unsigned int xid, struct inode *inode,
                goto err_free_rsp_buf;
        }
 
+       /*
+        * Although unlikely to be possible for rsp to be null and rc not set,
+        * adding check below is slightly safer long term (and quiets Coverity
+        * warning)
+        */
        rsp = (struct smb2_create_rsp *)rsp_iov.iov_base;
+       if (rsp == NULL) {
+               rc = -EIO;
+               kfree(pc_buf);
+               goto err_free_req;
+       }
+
        trace_smb3_posix_mkdir_done(xid, le64_to_cpu(rsp->PersistentFileId),
                                    tcon->tid,
                                    ses->Suid, CREATE_NOT_FILE,
@@ -2942,7 +2952,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
                        tcon->need_reconnect = true;
                }
                goto creat_exit;
-       } else
+       } else if (rsp == NULL) /* unlikely to happen, but safer to check */
+               goto creat_exit;
+       else
                trace_smb3_open_done(xid, le64_to_cpu(rsp->PersistentFileId),
                                     tcon->tid,
                                     ses->Suid, oparms->create_options,
@@ -3163,6 +3175,16 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
        if ((plen == NULL) || (out_data == NULL))
                goto ioctl_exit;
 
+       /*
+        * Although unlikely to be possible for rsp to be null and rc not set,
+        * adding check below is slightly safer long term (and quiets Coverity
+        * warning)
+        */
+       if (rsp == NULL) {
+               rc = -EIO;
+               goto ioctl_exit;
+       }
+
        *plen = le32_to_cpu(rsp->OutputCount);
 
        /* We check for obvious errors in the output buffer length and offset */
index b737932..61ea3d3 100644 (file)
@@ -1044,14 +1044,17 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses)
        if (!ses)
                return NULL;
 
+       spin_lock(&ses->chan_lock);
        if (!ses->binding) {
                /* round robin */
                if (ses->chan_count > 1) {
                        index = (uint)atomic_inc_return(&ses->chan_seq);
                        index %= ses->chan_count;
                }
+               spin_unlock(&ses->chan_lock);
                return ses->chans[index].server;
        } else {
+               spin_unlock(&ses->chan_lock);
                return cifs_ses_server(ses);
        }
 }
index bcb1b91..9a249bf 100644 (file)
@@ -96,16 +96,9 @@ static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl)
        DBG_BUGON(1);
 }
 
-/*
- * a compressed_pages[] placeholder in order to avoid
- * being filled with file pages for in-place decompression.
- */
-#define PAGE_UNALLOCATED     ((void *)0x5F0E4B1D)
-
 /* how to allocate cached pages for a pcluster */
 enum z_erofs_cache_alloctype {
        DONTALLOC,      /* don't allocate any cached pages */
-       DELAYEDALLOC,   /* delayed allocation (at the time of submitting io) */
        /*
         * try to use cached I/O if page allocation succeeds or fallback
         * to in-place I/O instead to avoid any direct reclaim.
@@ -267,10 +260,6 @@ static void preload_compressed_pages(struct z_erofs_collector *clt,
                        /* I/O is needed, no possible to decompress directly */
                        standalone = false;
                        switch (type) {
-                       case DELAYEDALLOC:
-                               t = tagptr_init(compressed_page_t,
-                                               PAGE_UNALLOCATED);
-                               break;
                        case TRYALLOC:
                                newpage = erofs_allocpage(pagepool, gfp);
                                if (!newpage)
@@ -371,8 +360,8 @@ static bool z_erofs_try_inplace_io(struct z_erofs_collector *clt,
 
 /* callers must be with collection lock held */
 static int z_erofs_attach_page(struct z_erofs_collector *clt,
-                              struct page *page,
-                              enum z_erofs_page_type type)
+                              struct page *page, enum z_erofs_page_type type,
+                              bool pvec_safereuse)
 {
        int ret;
 
@@ -382,9 +371,9 @@ static int z_erofs_attach_page(struct z_erofs_collector *clt,
            z_erofs_try_inplace_io(clt, page))
                return 0;
 
-       ret = z_erofs_pagevec_enqueue(&clt->vector, page, type);
+       ret = z_erofs_pagevec_enqueue(&clt->vector, page, type,
+                                     pvec_safereuse);
        clt->cl->vcnt += (unsigned int)ret;
-
        return ret ? 0 : -EAGAIN;
 }
 
@@ -727,7 +716,8 @@ hitted:
                tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED);
 
 retry:
-       err = z_erofs_attach_page(clt, page, page_type);
+       err = z_erofs_attach_page(clt, page, page_type,
+                                 clt->mode >= COLLECT_PRIMARY_FOLLOWED);
        /* should allocate an additional short-lived page for pagevec */
        if (err == -EAGAIN) {
                struct page *const newpage =
@@ -735,7 +725,7 @@ retry:
 
                set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE);
                err = z_erofs_attach_page(clt, newpage,
-                                         Z_EROFS_PAGE_TYPE_EXCLUSIVE);
+                                         Z_EROFS_PAGE_TYPE_EXCLUSIVE, true);
                if (!err)
                        goto retry;
        }
@@ -1089,15 +1079,6 @@ repeat:
        if (!page)
                goto out_allocpage;
 
-       /*
-        * the cached page has not been allocated and
-        * an placeholder is out there, prepare it now.
-        */
-       if (page == PAGE_UNALLOCATED) {
-               tocache = true;
-               goto out_allocpage;
-       }
-
        /* process the target tagged pointer */
        t = tagptr_init(compressed_page_t, page);
        justfound = tagptr_unfold_tags(t);
index 879df53..4a69515 100644 (file)
@@ -179,4 +179,3 @@ static inline void z_erofs_onlinepage_endio(struct page *page)
 #define Z_EROFS_VMAP_GLOBAL_PAGES      2048
 
 #endif
-
index dfd7fe0..b05464f 100644 (file)
@@ -106,11 +106,18 @@ static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor,
 
 static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor,
                                           struct page *page,
-                                          enum z_erofs_page_type type)
+                                          enum z_erofs_page_type type,
+                                          bool pvec_safereuse)
 {
-       if (!ctor->next && type)
-               if (ctor->index + 1 == ctor->nr)
+       if (!ctor->next) {
+               /* some pages cannot be reused as pvec safely without I/O */
+               if (type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && !pvec_safereuse)
+                       type = Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED;
+
+               if (type != Z_EROFS_PAGE_TYPE_EXCLUSIVE &&
+                   ctor->index + 1 == ctor->nr)
                        return false;
+       }
 
        if (ctor->index >= ctor->nr)
                z_erofs_pagevec_ctor_pagedown(ctor, false);
index 83e9bc0..f1693d4 100644 (file)
@@ -653,7 +653,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
                return PTR_ERR(inode);
        }
 
-       err = dquot_initialize(inode);
+       err = f2fs_dquot_initialize(inode);
        if (err) {
                iput(inode);
                goto err_out;
@@ -705,9 +705,6 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
        }
 
 #ifdef CONFIG_QUOTA
-       /* Needed for iput() to work correctly and not trash data */
-       sbi->sb->s_flags |= SB_ACTIVE;
-
        /*
         * Turn on quotas which were not enabled for read-only mounts if
         * filesystem has quota feature, so that they are updated correctly.
@@ -1162,7 +1159,8 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
        if (!is_journalled_quota(sbi))
                return false;
 
-       down_write(&sbi->quota_sem);
+       if (!down_write_trylock(&sbi->quota_sem))
+               return true;
        if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
                ret = false;
        } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) {
index 20a083d..49121a2 100644 (file)
@@ -336,8 +336,8 @@ static const struct f2fs_compress_ops f2fs_lz4_ops = {
 
 static int zstd_init_compress_ctx(struct compress_ctx *cc)
 {
-       ZSTD_parameters params;
-       ZSTD_CStream *stream;
+       zstd_parameters params;
+       zstd_cstream *stream;
        void *workspace;
        unsigned int workspace_size;
        unsigned char level = F2FS_I(cc->inode)->i_compress_flag >>
@@ -346,17 +346,17 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc)
        if (!level)
                level = F2FS_ZSTD_DEFAULT_CLEVEL;
 
-       params = ZSTD_getParams(level, cc->rlen, 0);
-       workspace_size = ZSTD_CStreamWorkspaceBound(params.cParams);
+       params = zstd_get_params(F2FS_ZSTD_DEFAULT_CLEVEL, cc->rlen);
+       workspace_size = zstd_cstream_workspace_bound(&params.cParams);
 
        workspace = f2fs_kvmalloc(F2FS_I_SB(cc->inode),
                                        workspace_size, GFP_NOFS);
        if (!workspace)
                return -ENOMEM;
 
-       stream = ZSTD_initCStream(params, 0, workspace, workspace_size);
+       stream = zstd_init_cstream(&params, 0, workspace, workspace_size);
        if (!stream) {
-               printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initCStream failed\n",
+               printk_ratelimited("%sF2FS-fs (%s): %s zstd_init_cstream failed\n",
                                KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id,
                                __func__);
                kvfree(workspace);
@@ -379,9 +379,9 @@ static void zstd_destroy_compress_ctx(struct compress_ctx *cc)
 
 static int zstd_compress_pages(struct compress_ctx *cc)
 {
-       ZSTD_CStream *stream = cc->private2;
-       ZSTD_inBuffer inbuf;
-       ZSTD_outBuffer outbuf;
+       zstd_cstream *stream = cc->private2;
+       zstd_in_buffer inbuf;
+       zstd_out_buffer outbuf;
        int src_size = cc->rlen;
        int dst_size = src_size - PAGE_SIZE - COMPRESS_HEADER_SIZE;
        int ret;
@@ -394,19 +394,19 @@ static int zstd_compress_pages(struct compress_ctx *cc)
        outbuf.dst = cc->cbuf->cdata;
        outbuf.size = dst_size;
 
-       ret = ZSTD_compressStream(stream, &outbuf, &inbuf);
-       if (ZSTD_isError(ret)) {
-               printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n",
+       ret = zstd_compress_stream(stream, &outbuf, &inbuf);
+       if (zstd_is_error(ret)) {
+               printk_ratelimited("%sF2FS-fs (%s): %s zstd_compress_stream failed, ret: %d\n",
                                KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id,
-                               __func__, ZSTD_getErrorCode(ret));
+                               __func__, zstd_get_error_code(ret));
                return -EIO;
        }
 
-       ret = ZSTD_endStream(stream, &outbuf);
-       if (ZSTD_isError(ret)) {
-               printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_endStream returned %d\n",
+       ret = zstd_end_stream(stream, &outbuf);
+       if (zstd_is_error(ret)) {
+               printk_ratelimited("%sF2FS-fs (%s): %s zstd_end_stream returned %d\n",
                                KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id,
-                               __func__, ZSTD_getErrorCode(ret));
+                               __func__, zstd_get_error_code(ret));
                return -EIO;
        }
 
@@ -423,22 +423,22 @@ static int zstd_compress_pages(struct compress_ctx *cc)
 
 static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic)
 {
-       ZSTD_DStream *stream;
+       zstd_dstream *stream;
        void *workspace;
        unsigned int workspace_size;
        unsigned int max_window_size =
                        MAX_COMPRESS_WINDOW_SIZE(dic->log_cluster_size);
 
-       workspace_size = ZSTD_DStreamWorkspaceBound(max_window_size);
+       workspace_size = zstd_dstream_workspace_bound(max_window_size);
 
        workspace = f2fs_kvmalloc(F2FS_I_SB(dic->inode),
                                        workspace_size, GFP_NOFS);
        if (!workspace)
                return -ENOMEM;
 
-       stream = ZSTD_initDStream(max_window_size, workspace, workspace_size);
+       stream = zstd_init_dstream(max_window_size, workspace, workspace_size);
        if (!stream) {
-               printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initDStream failed\n",
+               printk_ratelimited("%sF2FS-fs (%s): %s zstd_init_dstream failed\n",
                                KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id,
                                __func__);
                kvfree(workspace);
@@ -460,9 +460,9 @@ static void zstd_destroy_decompress_ctx(struct decompress_io_ctx *dic)
 
 static int zstd_decompress_pages(struct decompress_io_ctx *dic)
 {
-       ZSTD_DStream *stream = dic->private2;
-       ZSTD_inBuffer inbuf;
-       ZSTD_outBuffer outbuf;
+       zstd_dstream *stream = dic->private2;
+       zstd_in_buffer inbuf;
+       zstd_out_buffer outbuf;
        int ret;
 
        inbuf.pos = 0;
@@ -473,11 +473,11 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic)
        outbuf.dst = dic->rbuf;
        outbuf.size = dic->rlen;
 
-       ret = ZSTD_decompressStream(stream, &outbuf, &inbuf);
-       if (ZSTD_isError(ret)) {
-               printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n",
+       ret = zstd_decompress_stream(stream, &outbuf, &inbuf);
+       if (zstd_is_error(ret)) {
+               printk_ratelimited("%sF2FS-fs (%s): %s zstd_decompress_stream failed, ret: %d\n",
                                KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id,
-                               __func__, ZSTD_getErrorCode(ret));
+                               __func__, zstd_get_error_code(ret));
                return -EIO;
        }
 
@@ -882,6 +882,25 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index)
        return is_page_in_cluster(cc, index);
 }
 
+bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec,
+                               int index, int nr_pages)
+{
+       unsigned long pgidx;
+       int i;
+
+       if (nr_pages - index < cc->cluster_size)
+               return false;
+
+       pgidx = pvec->pages[index]->index;
+
+       for (i = 1; i < cc->cluster_size; i++) {
+               if (pvec->pages[index + i]->index != pgidx + i)
+                       return false;
+       }
+
+       return true;
+}
+
 static bool cluster_has_invalid_data(struct compress_ctx *cc)
 {
        loff_t i_size = i_size_read(cc->inode);
@@ -1531,6 +1550,7 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,
        if (cluster_may_compress(cc)) {
                err = f2fs_compress_pages(cc);
                if (err == -EAGAIN) {
+                       add_compr_block_stat(cc->inode, cc->cluster_size);
                        goto write;
                } else if (err) {
                        f2fs_put_rpages_wbc(cc, wbc, true, 1);
index f4fd6c2..9f754aa 100644 (file)
@@ -1465,10 +1465,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
        struct extent_info ei = {0, };
        block_t blkaddr;
        unsigned int start_pgofs;
+       int bidx = 0;
 
        if (!maxblocks)
                return 0;
 
+       map->m_bdev = inode->i_sb->s_bdev;
+       map->m_multidev_dio =
+               f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
+
        map->m_len = 0;
        map->m_flags = 0;
 
@@ -1491,6 +1496,21 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
                if (flag == F2FS_GET_BLOCK_DIO)
                        f2fs_wait_on_block_writeback_range(inode,
                                                map->m_pblk, map->m_len);
+
+               if (map->m_multidev_dio) {
+                       block_t blk_addr = map->m_pblk;
+
+                       bidx = f2fs_target_device_index(sbi, map->m_pblk);
+
+                       map->m_bdev = FDEV(bidx).bdev;
+                       map->m_pblk -= FDEV(bidx).start_blk;
+                       map->m_len = min(map->m_len,
+                               FDEV(bidx).end_blk + 1 - map->m_pblk);
+
+                       if (map->m_may_create)
+                               f2fs_update_device_state(sbi, inode->i_ino,
+                                                       blk_addr, map->m_len);
+               }
                goto out;
        }
 
@@ -1609,6 +1629,9 @@ next_block:
        if (flag == F2FS_GET_BLOCK_PRE_AIO)
                goto skip;
 
+       if (map->m_multidev_dio)
+               bidx = f2fs_target_device_index(sbi, blkaddr);
+
        if (map->m_len == 0) {
                /* preallocated unwritten block should be mapped for fiemap. */
                if (blkaddr == NEW_ADDR)
@@ -1617,10 +1640,15 @@ next_block:
 
                map->m_pblk = blkaddr;
                map->m_len = 1;
+
+               if (map->m_multidev_dio)
+                       map->m_bdev = FDEV(bidx).bdev;
        } else if ((map->m_pblk != NEW_ADDR &&
                        blkaddr == (map->m_pblk + ofs)) ||
                        (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
                        flag == F2FS_GET_BLOCK_PRE_DIO) {
+               if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
+                       goto sync_out;
                ofs++;
                map->m_len++;
        } else {
@@ -1673,10 +1701,32 @@ skip:
 
 sync_out:
 
-       /* for hardware encryption, but to avoid potential issue in future */
-       if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
+       if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
+               /*
+                * for hardware encryption, but to avoid potential issue
+                * in future
+                */
                f2fs_wait_on_block_writeback_range(inode,
                                                map->m_pblk, map->m_len);
+               invalidate_mapping_pages(META_MAPPING(sbi),
+                                               map->m_pblk, map->m_pblk);
+
+               if (map->m_multidev_dio) {
+                       block_t blk_addr = map->m_pblk;
+
+                       bidx = f2fs_target_device_index(sbi, map->m_pblk);
+
+                       map->m_bdev = FDEV(bidx).bdev;
+                       map->m_pblk -= FDEV(bidx).start_blk;
+
+                       if (map->m_may_create)
+                               f2fs_update_device_state(sbi, inode->i_ino,
+                                                       blk_addr, map->m_len);
+
+                       f2fs_bug_on(sbi, blk_addr + map->m_len >
+                                               FDEV(bidx).end_blk + 1);
+               }
+       }
 
        if (flag == F2FS_GET_BLOCK_PRECACHE) {
                if (map->m_flags & F2FS_MAP_MAPPED) {
@@ -1696,7 +1746,7 @@ unlock_out:
                f2fs_balance_fs(sbi, dn.node_changed);
        }
 out:
-       trace_f2fs_map_blocks(inode, map, err);
+       trace_f2fs_map_blocks(inode, map, create, flag, err);
        return err;
 }
 
@@ -1755,6 +1805,9 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
                map_bh(bh, inode->i_sb, map.m_pblk);
                bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
                bh->b_size = blks_to_bytes(inode, map.m_len);
+
+               if (map.m_multidev_dio)
+                       bh->b_bdev = map.m_bdev;
        }
        return err;
 }
@@ -2989,6 +3042,10 @@ readd:
                        need_readd = false;
 #ifdef CONFIG_F2FS_FS_COMPRESSION
                        if (f2fs_compressed_file(inode)) {
+                               void *fsdata = NULL;
+                               struct page *pagep;
+                               int ret2;
+
                                ret = f2fs_init_compress_ctx(&cc);
                                if (ret) {
                                        done = 1;
@@ -3007,27 +3064,23 @@ readd:
                                if (unlikely(f2fs_cp_error(sbi)))
                                        goto lock_page;
 
-                               if (f2fs_cluster_is_empty(&cc)) {
-                                       void *fsdata = NULL;
-                                       struct page *pagep;
-                                       int ret2;
+                               if (!f2fs_cluster_is_empty(&cc))
+                                       goto lock_page;
 
-                                       ret2 = f2fs_prepare_compress_overwrite(
+                               ret2 = f2fs_prepare_compress_overwrite(
                                                        inode, &pagep,
                                                        page->index, &fsdata);
-                                       if (ret2 < 0) {
-                                               ret = ret2;
-                                               done = 1;
-                                               break;
-                                       } else if (ret2 &&
-                                               !f2fs_compress_write_end(inode,
-                                                               fsdata, page->index,
-                                                               1)) {
-                                               retry = 1;
-                                               break;
-                                       }
-                               } else {
-                                       goto lock_page;
+                               if (ret2 < 0) {
+                                       ret = ret2;
+                                       done = 1;
+                                       break;
+                               } else if (ret2 &&
+                                       (!f2fs_compress_write_end(inode,
+                                               fsdata, page->index, 1) ||
+                                        !f2fs_all_cluster_page_loaded(&cc,
+                                               &pvec, i, nr_pages))) {
+                                       retry = 1;
+                                       break;
                                }
                        }
 #endif
index b339ae8..ce9fc9f 100644 (file)
@@ -55,6 +55,7 @@ enum {
        FAULT_DISCARD,
        FAULT_WRITE_IO,
        FAULT_SLAB_ALLOC,
+       FAULT_DQUOT_INIT,
        FAULT_MAX,
 };
 
@@ -561,6 +562,9 @@ enum {
 
 #define MAX_DIR_RA_PAGES       4       /* maximum ra pages of dir */
 
+/* dirty segments threshold for triggering CP */
+#define DEFAULT_DIRTY_THRESHOLD                4
+
 /* for in-memory extent cache entry */
 #define F2FS_MIN_EXTENT_LEN    64      /* minimum extent length */
 
@@ -617,6 +621,7 @@ struct extent_tree {
                                F2FS_MAP_UNWRITTEN)
 
 struct f2fs_map_blocks {
+       struct block_device *m_bdev;    /* for multi-device dio */
        block_t m_pblk;
        block_t m_lblk;
        unsigned int m_len;
@@ -625,6 +630,7 @@ struct f2fs_map_blocks {
        pgoff_t *m_next_extent;         /* point to next possible extent */
        int m_seg_type;
        bool m_may_create;              /* indicate it is from write path */
+       bool m_multidev_dio;            /* indicate it allows multi-device dio */
 };
 
 /* for flag in get_data_block */
@@ -1284,8 +1290,10 @@ enum {
 };
 
 enum {
-       FS_MODE_ADAPTIVE,       /* use both lfs/ssr allocation */
-       FS_MODE_LFS,            /* use lfs allocation only */
+       FS_MODE_ADAPTIVE,               /* use both lfs/ssr allocation */
+       FS_MODE_LFS,                    /* use lfs allocation only */
+       FS_MODE_FRAGMENT_SEG,           /* segment fragmentation mode */
+       FS_MODE_FRAGMENT_BLK,           /* block fragmentation mode */
 };
 
 enum {
@@ -1728,12 +1736,15 @@ struct f2fs_sb_info {
 
        /* For shrinker support */
        struct list_head s_list;
+       struct mutex umount_mutex;
+       unsigned int shrinker_run_no;
+
+       /* For multi devices */
        int s_ndevs;                            /* number of devices */
        struct f2fs_dev_info *devs;             /* for device list */
        unsigned int dirty_device;              /* for checkpoint data flush */
        spinlock_t dev_lock;                    /* protect dirty_device */
-       struct mutex umount_mutex;
-       unsigned int shrinker_run_no;
+       bool aligned_blksize;                   /* all devices has the same logical blksize */
 
        /* For write statistics */
        u64 sectors_written_start;
@@ -1756,6 +1767,9 @@ struct f2fs_sb_info {
 
        unsigned long seq_file_ra_mul;          /* multiplier for ra_pages of seq. files in fadvise */
 
+       int max_fragment_chunk;                 /* max chunk size for block fragmentation mode */
+       int max_fragment_hole;                  /* max hole size for block fragmentation mode */
+
 #ifdef CONFIG_F2FS_FS_COMPRESSION
        struct kmem_cache *page_array_slab;     /* page array entry */
        unsigned int page_array_slab_size;      /* default page array slab size */
@@ -3363,6 +3377,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
  */
 int f2fs_inode_dirtied(struct inode *inode, bool sync);
 void f2fs_inode_synced(struct inode *inode);
+int f2fs_dquot_initialize(struct inode *inode);
 int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
 int f2fs_quota_sync(struct super_block *sb, int type);
 loff_t max_file_blocks(struct inode *inode);
@@ -3492,6 +3507,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
                        block_t old_blkaddr, block_t *new_blkaddr,
                        struct f2fs_summary *sum, int type,
                        struct f2fs_io_info *fio);
+void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
+                                       block_t blkaddr, unsigned int blkcnt);
 void f2fs_wait_on_page_writeback(struct page *page,
                        enum page_type type, bool ordered, bool locked);
 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
@@ -3516,6 +3533,16 @@ unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
 unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
                        unsigned int segno);
 
+#define DEF_FRAGMENT_SIZE      4
+#define MIN_FRAGMENT_SIZE      1
+#define MAX_FRAGMENT_SIZE      512
+
+static inline bool f2fs_need_rand_seg(struct f2fs_sb_info *sbi)
+{
+       return F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_SEG ||
+               F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK;
+}
+
 /*
  * checkpoint.c
  */
@@ -4027,6 +4054,8 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed,
                                                        block_t blkaddr);
 bool f2fs_cluster_is_empty(struct compress_ctx *cc);
 bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index);
+bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec,
+                               int index, int nr_pages);
 bool f2fs_sanity_check_cluster(struct dnode_of_data *dn);
 void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page);
 int f2fs_write_multi_pages(struct compress_ctx *cc,
@@ -4152,8 +4181,7 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode)
 
        if (!f2fs_compressed_file(inode))
                return true;
-       if (S_ISREG(inode->i_mode) &&
-               (get_dirty_pages(inode) || atomic_read(&fi->i_compr_blocks)))
+       if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))
                return false;
 
        fi->i_flags &= ~F2FS_COMPR_FL;
@@ -4302,6 +4330,16 @@ static inline int block_unaligned_IO(struct inode *inode,
        return align & blocksize_mask;
 }
 
+static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi,
+                                                               int flag)
+{
+       if (!f2fs_is_multi_device(sbi))
+               return false;
+       if (flag != F2FS_GET_BLOCK_DIO)
+               return false;
+       return sbi->aligned_blksize;
+}
+
 static inline bool f2fs_force_buffered_io(struct inode *inode,
                                struct kiocb *iocb, struct iov_iter *iter)
 {
@@ -4310,7 +4348,9 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
 
        if (f2fs_post_read_required(inode))
                return true;
-       if (f2fs_is_multi_device(sbi))
+
+       /* disallow direct IO if any of devices has unaligned blksize */
+       if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
                return true;
        /*
         * for blkzoned device, fallback direct IO to buffered IO, so
index eb971e1..92ec269 100644 (file)
@@ -786,7 +786,7 @@ int f2fs_truncate(struct inode *inode)
                return -EIO;
        }
 
-       err = dquot_initialize(inode);
+       err = f2fs_dquot_initialize(inode);
        if (err)
                return err;
 
@@ -916,7 +916,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
                return err;
 
        if (is_quota_modification(inode, attr)) {
-               err = dquot_initialize(inode);
+               err = f2fs_dquot_initialize(inode);
                if (err)
                        return err;
        }
@@ -3020,7 +3020,7 @@ static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
        }
        f2fs_put_page(ipage, 1);
 
-       err = dquot_initialize(inode);
+       err = f2fs_dquot_initialize(inode);
        if (err)
                return err;
 
index 77391e3..a946ce0 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/delay.h>
 #include <linux/freezer.h>
 #include <linux/sched/signal.h>
+#include <linux/random.h>
 
 #include "f2fs.h"
 #include "node.h"
@@ -257,7 +258,9 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
                p->max_search = sbi->max_victim_search;
 
        /* let's select beginning hot/small space first in no_heap mode*/
-       if (test_opt(sbi, NOHEAP) &&
+       if (f2fs_need_rand_seg(sbi))
+               p->offset = prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec);
+       else if (test_opt(sbi, NOHEAP) &&
                (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
                p->offset = 0;
        else
index 56a20d5..ea08f0d 100644 (file)
@@ -192,7 +192,7 @@ int f2fs_convert_inline_inode(struct inode *inode)
                        f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb))
                return 0;
 
-       err = dquot_initialize(inode);
+       err = f2fs_dquot_initialize(inode);
        if (err)
                return err;
 
index 9141147..0f8b2df 100644 (file)
@@ -527,7 +527,7 @@ make_now:
                inode->i_op = &f2fs_dir_inode_operations;
                inode->i_fop = &f2fs_dir_operations;
                inode->i_mapping->a_ops = &f2fs_dblock_aops;
-               inode_nohighmem(inode);
+               mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
        } else if (S_ISLNK(inode->i_mode)) {
                if (file_is_encrypt(inode))
                        inode->i_op = &f2fs_encrypted_symlink_inode_operations;
@@ -754,7 +754,7 @@ void f2fs_evict_inode(struct inode *inode)
        if (inode->i_nlink || is_bad_inode(inode))
                goto no_delete;
 
-       err = dquot_initialize(inode);
+       err = f2fs_dquot_initialize(inode);
        if (err) {
                err = 0;
                set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
index 9c528e5..a728a0a 100644 (file)
@@ -74,7 +74,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
        if (err)
                goto fail_drop;
 
-       err = dquot_initialize(inode);
+       err = f2fs_dquot_initialize(inode);
        if (err)
                goto fail_drop;
 
@@ -345,7 +345,7 @@ static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir,
        if (!f2fs_is_checkpoint_ready(sbi))
                return -ENOSPC;
 
-       err = dquot_initialize(dir);
+       err = f2fs_dquot_initialize(dir);
        if (err)
                return err;
 
@@ -404,7 +404,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
                        F2FS_I(old_dentry->d_inode)->i_projid)))
                return -EXDEV;
 
-       err = dquot_initialize(dir);
+       err = f2fs_dquot_initialize(dir);
        if (err)
                return err;
 
@@ -460,7 +460,7 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino)
                return 0;
        }
 
-       err = dquot_initialize(dir);
+       err = f2fs_dquot_initialize(dir);
        if (err)
                return err;
 
@@ -598,10 +598,10 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
                goto fail;
        }
 
-       err = dquot_initialize(dir);
+       err = f2fs_dquot_initialize(dir);
        if (err)
                goto fail;
-       err = dquot_initialize(inode);
+       err = f2fs_dquot_initialize(inode);
        if (err)
                goto fail;
 
@@ -675,7 +675,7 @@ static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
        if (err)
                return err;
 
-       err = dquot_initialize(dir);
+       err = f2fs_dquot_initialize(dir);
        if (err)
                return err;
 
@@ -746,7 +746,7 @@ static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
        if (unlikely(f2fs_cp_error(sbi)))
                return -EIO;
 
-       err = dquot_initialize(dir);
+       err = f2fs_dquot_initialize(dir);
        if (err)
                return err;
 
@@ -757,7 +757,7 @@ static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
        inode->i_op = &f2fs_dir_inode_operations;
        inode->i_fop = &f2fs_dir_operations;
        inode->i_mapping->a_ops = &f2fs_dblock_aops;
-       inode_nohighmem(inode);
+       mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
 
        set_inode_flag(inode, FI_INC_LINK);
        f2fs_lock_op(sbi);
@@ -803,7 +803,7 @@ static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
        if (!f2fs_is_checkpoint_ready(sbi))
                return -ENOSPC;
 
-       err = dquot_initialize(dir);
+       err = f2fs_dquot_initialize(dir);
        if (err)
                return err;
 
@@ -841,7 +841,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry,
        struct inode *inode;
        int err;
 
-       err = dquot_initialize(dir);
+       err = f2fs_dquot_initialize(dir);
        if (err)
                return err;
 
@@ -965,16 +965,16 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
                        return err;
        }
 
-       err = dquot_initialize(old_dir);
+       err = f2fs_dquot_initialize(old_dir);
        if (err)
                goto out;
 
-       err = dquot_initialize(new_dir);
+       err = f2fs_dquot_initialize(new_dir);
        if (err)
                goto out;
 
        if (new_inode) {
-               err = dquot_initialize(new_inode);
+               err = f2fs_dquot_initialize(new_inode);
                if (err)
                        goto out;
        }
@@ -1138,11 +1138,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
                        F2FS_I(new_dentry->d_inode)->i_projid)))
                return -EXDEV;
 
-       err = dquot_initialize(old_dir);
+       err = f2fs_dquot_initialize(old_dir);
        if (err)
                goto out;
 
-       err = dquot_initialize(new_dir);
+       err = f2fs_dquot_initialize(new_dir);
        if (err)
                goto out;
 
index e863136..556fcd8 100644 (file)
@@ -1443,6 +1443,7 @@ page_hit:
                          nid, nid_of_node(page), ino_of_node(page),
                          ofs_of_node(page), cpver_of_node(page),
                          next_blkaddr_of_node(page));
+               set_sbi_flag(sbi, SBI_NEED_FSCK);
                err = -EINVAL;
 out_err:
                ClearPageUptodate(page);
index ff14a6e..18b98cf 100644 (file)
@@ -138,11 +138,6 @@ static inline bool excess_cached_nats(struct f2fs_sb_info *sbi)
        return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD;
 }
 
-static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi)
-{
-       return get_pages(sbi, F2FS_DIRTY_NODES) >= sbi->blocks_per_seg * 8;
-}
-
 enum mem_type {
        FREE_NIDS,      /* indicates the free nid list */
        NAT_ENTRIES,    /* indicates the cached nat entry */
index 0465551..6a1b466 100644 (file)
@@ -81,7 +81,7 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
        if (IS_ERR(inode))
                return ERR_CAST(inode);
 
-       err = dquot_initialize(inode);
+       err = f2fs_dquot_initialize(inode);
        if (err)
                goto err_out;
 
@@ -203,7 +203,7 @@ retry:
                        goto out_put;
                }
 
-               err = dquot_initialize(einode);
+               err = f2fs_dquot_initialize(einode);
                if (err) {
                        iput(einode);
                        goto out_put;
@@ -508,7 +508,7 @@ got_it:
                if (IS_ERR(inode))
                        return PTR_ERR(inode);
 
-               ret = dquot_initialize(inode);
+               ret = f2fs_dquot_initialize(inode);
                if (ret) {
                        iput(inode);
                        return ret;
@@ -787,8 +787,6 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
        }
 
 #ifdef CONFIG_QUOTA
-       /* Needed for iput() to work correctly and not trash data */
-       sbi->sb->s_flags |= SB_ACTIVE;
        /* Turn on quotas so that they are updated correctly */
        quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
 #endif
@@ -816,10 +814,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
        err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list);
        if (!err)
                f2fs_bug_on(sbi, !list_empty(&inode_list));
-       else {
-               /* restore s_flags to let iput() trash data */
-               sbi->sb->s_flags = s_flags;
-       }
+       else
+               f2fs_bug_on(sbi, sbi->sb->s_flags & SB_ACTIVE);
 skip:
        fix_curseg_write_pointer = !check_only || list_empty(&inode_list);
 
index a135d22..df9ed75 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/timer.h>
 #include <linux/freezer.h>
 #include <linux/sched/signal.h>
+#include <linux/random.h>
 
 #include "f2fs.h"
 #include "segment.h"
@@ -529,6 +530,25 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
        }
 }
 
+static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
+{
+       int factor = rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
+       unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
+       unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
+       unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
+       unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
+       unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
+       unsigned int threshold = sbi->blocks_per_seg * factor *
+                                       DEFAULT_DIRTY_THRESHOLD;
+       unsigned int global_threshold = threshold * 3 / 2;
+
+       if (dents >= threshold || qdata >= threshold ||
+               nodes >= threshold || meta >= threshold ||
+               imeta >= threshold)
+               return true;
+       return dents + qdata + nodes + meta + imeta >  global_threshold;
+}
+
 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
 {
        if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
@@ -547,8 +567,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
        else
                f2fs_build_free_nids(sbi, false, false);
 
-       if (excess_dirty_nats(sbi) || excess_dirty_nodes(sbi) ||
-               excess_prefree_segs(sbi))
+       if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) ||
+               excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi))
                goto do_sync;
 
        /* there is background inflight IO or foreground operation recently */
@@ -561,7 +581,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
                goto do_sync;
 
        /* checkpoint is the only way to shrink partial cached entries */
-       if (f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
+       if (f2fs_available_free_memory(sbi, NAT_ENTRIES) &&
                f2fs_available_free_memory(sbi, INO_ENTRIES))
                return;
 
@@ -2630,6 +2650,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
        unsigned short seg_type = curseg->seg_type;
 
        sanity_check_seg_type(sbi, seg_type);
+       if (f2fs_need_rand_seg(sbi))
+               return prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec);
 
        /* if segs_per_sec is large than 1, we need to keep original policy. */
        if (__is_large_section(sbi))
@@ -2681,6 +2703,9 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
        curseg->next_segno = segno;
        reset_curseg(sbi, type, 1);
        curseg->alloc_type = LFS;
+       if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
+               curseg->fragment_remained_chunk =
+                               prandom_u32() % sbi->max_fragment_chunk + 1;
 }
 
 static int __next_free_blkoff(struct f2fs_sb_info *sbi,
@@ -2707,12 +2732,22 @@ static int __next_free_blkoff(struct f2fs_sb_info *sbi,
 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
                                struct curseg_info *seg)
 {
-       if (seg->alloc_type == SSR)
+       if (seg->alloc_type == SSR) {
                seg->next_blkoff =
                        __next_free_blkoff(sbi, seg->segno,
                                                seg->next_blkoff + 1);
-       else
+       } else {
                seg->next_blkoff++;
+               if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) {
+                       /* To allocate block chunks in different sizes, use random number */
+                       if (--seg->fragment_remained_chunk <= 0) {
+                               seg->fragment_remained_chunk =
+                                  prandom_u32() % sbi->max_fragment_chunk + 1;
+                               seg->next_blkoff +=
+                                  prandom_u32() % sbi->max_fragment_hole + 1;
+                       }
+               }
+       }
 }
 
 bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
@@ -3485,24 +3520,30 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
        up_read(&SM_I(sbi)->curseg_lock);
 }
 
-static void update_device_state(struct f2fs_io_info *fio)
+void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
+                                       block_t blkaddr, unsigned int blkcnt)
 {
-       struct f2fs_sb_info *sbi = fio->sbi;
-       unsigned int devidx;
-
        if (!f2fs_is_multi_device(sbi))
                return;
 
-       devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
+       while (1) {
+               unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
+               unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
 
-       /* update device state for fsync */
-       f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
+               /* update device state for fsync */
+               f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
 
-       /* update device state for checkpoint */
-       if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
-               spin_lock(&sbi->dev_lock);
-               f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
-               spin_unlock(&sbi->dev_lock);
+               /* update device state for checkpoint */
+               if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
+                       spin_lock(&sbi->dev_lock);
+                       f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
+                       spin_unlock(&sbi->dev_lock);
+               }
+
+               if (blkcnt <= blks)
+                       break;
+               blkcnt -= blks;
+               blkaddr += blks;
        }
 }
 
@@ -3529,7 +3570,7 @@ reallocate:
                goto reallocate;
        }
 
-       update_device_state(fio);
+       f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
 
        if (keep_order)
                up_read(&fio->sbi->io_order_lock);
@@ -3611,6 +3652,9 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
                goto drop_bio;
        }
 
+       invalidate_mapping_pages(META_MAPPING(sbi),
+                               fio->new_blkaddr, fio->new_blkaddr);
+
        stat_inc_inplace_blocks(fio->sbi);
 
        if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE)))
@@ -3618,7 +3662,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
        else
                err = f2fs_submit_page_bio(fio);
        if (!err) {
-               update_device_state(fio);
+               f2fs_update_device_state(fio->sbi, fio->ino,
+                                               fio->new_blkaddr, 1);
                f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
        }
 
index 89fff25..46fde9f 100644 (file)
@@ -314,6 +314,7 @@ struct curseg_info {
        unsigned short next_blkoff;             /* next block offset to write */
        unsigned int zone;                      /* current zone number */
        unsigned int next_segno;                /* preallocated segment */
+       int fragment_remained_chunk;            /* remained block size in a chunk for block fragmentation mode */
        bool inited;                            /* indicate inmem log is inited */
 };
 
index cf049a0..040b6d0 100644 (file)
@@ -58,6 +58,7 @@ const char *f2fs_fault_name[FAULT_MAX] = {
        [FAULT_DISCARD]         = "discard error",
        [FAULT_WRITE_IO]        = "write IO error",
        [FAULT_SLAB_ALLOC]      = "slab alloc",
+       [FAULT_DQUOT_INIT]      = "dquot initialize",
 };
 
 void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
@@ -592,7 +593,7 @@ static int f2fs_set_zstd_level(struct f2fs_sb_info *sbi, const char *str)
        if (kstrtouint(str + 1, 10, &level))
                return -EINVAL;
 
-       if (!level || level > ZSTD_maxCLevel()) {
+       if (!level || level > zstd_max_clevel()) {
                f2fs_info(sbi, "invalid zstd compress level: %d", level);
                return -EINVAL;
        }
@@ -817,6 +818,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
                                F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE;
                        } else if (!strcmp(name, "lfs")) {
                                F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS;
+                       } else if (!strcmp(name, "fragment:segment")) {
+                               F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_SEG;
+                       } else if (!strcmp(name, "fragment:block")) {
+                               F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_BLK;
                        } else {
                                kfree(name);
                                return -EINVAL;
@@ -1292,7 +1297,7 @@ default_check:
        /* Not pass down write hints if the number of active logs is lesser
         * than NR_CURSEG_PERSIST_TYPE.
         */
-       if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
+       if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_PERSIST_TYPE)
                F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
 
        if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) {
@@ -1896,6 +1901,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
                seq_puts(seq, "adaptive");
        else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS)
                seq_puts(seq, "lfs");
+       else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_SEG)
+               seq_puts(seq, "fragment:segment");
+       else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
+               seq_puts(seq, "fragment:block");
        seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs);
        if (test_opt(sbi, RESERVE_ROOT))
                seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u",
@@ -2491,6 +2500,16 @@ retry:
        return len - towrite;
 }
 
+int f2fs_dquot_initialize(struct inode *inode)
+{
+       if (time_to_inject(F2FS_I_SB(inode), FAULT_DQUOT_INIT)) {
+               f2fs_show_injection_info(F2FS_I_SB(inode), FAULT_DQUOT_INIT);
+               return -ESRCH;
+       }
+
+       return dquot_initialize(inode);
+}
+
 static struct dquot **f2fs_get_dquots(struct inode *inode)
 {
        return F2FS_I(inode)->i_dquot;
@@ -2875,6 +2894,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = {
        .get_nextdqblk  = dquot_get_next_dqblk,
 };
 #else
+int f2fs_dquot_initialize(struct inode *inode)
+{
+       return 0;
+}
+
 int f2fs_quota_sync(struct super_block *sb, int type)
 {
        return 0;
@@ -3486,7 +3510,7 @@ skip_cross:
                NR_CURSEG_PERSIST_TYPE + nat_bits_blocks >= blocks_per_seg)) {
                f2fs_warn(sbi, "Insane cp_payload: %u, nat_bits_blocks: %u)",
                          cp_payload, nat_bits_blocks);
-               return -EFSCORRUPTED;
+               return 1;
        }
 
        if (unlikely(f2fs_cp_error(sbi))) {
@@ -3522,6 +3546,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
        sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH;
        sbi->migration_granularity = sbi->segs_per_sec;
        sbi->seq_file_ra_mul = MIN_RA_MUL;
+       sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
+       sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
 
        sbi->dir_level = DEF_DIR_LEVEL;
        sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL;
@@ -3746,6 +3772,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 {
        struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
        unsigned int max_devices = MAX_DEVICES;
+       unsigned int logical_blksize;
        int i;
 
        /* Initialize single device information */
@@ -3766,6 +3793,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
        if (!sbi->devs)
                return -ENOMEM;
 
+       logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev);
+       sbi->aligned_blksize = true;
+
        for (i = 0; i < max_devices; i++) {
 
                if (i > 0 && !RDEV(i).path[0])
@@ -3802,6 +3832,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
                /* to release errored devices */
                sbi->s_ndevs = i + 1;
 
+               if (logical_blksize != bdev_logical_block_size(FDEV(i).bdev))
+                       sbi->aligned_blksize = false;
+
 #ifdef CONFIG_BLK_DEV_ZONED
                if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
                                !f2fs_sb_has_blkzoned(sbi)) {
@@ -4351,6 +4384,8 @@ free_node_inode:
 free_stats:
        f2fs_destroy_stats(sbi);
 free_nm:
+       /* stop discard thread before destroying node manager */
+       f2fs_stop_discard_thread(sbi);
        f2fs_destroy_node_manager(sbi);
 free_sm:
        f2fs_destroy_segment_manager(sbi);
index a32fe31..7d28924 100644 (file)
@@ -196,7 +196,7 @@ static ssize_t encoding_show(struct f2fs_attr *a,
        struct super_block *sb = sbi->sb;
 
        if (f2fs_sb_has_casefold(sbi))
-               return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n",
+               return sysfs_emit(buf, "%s (%d.%d.%d)\n",
                        sb->s_encoding->charset,
                        (sb->s_encoding->version >> 16) & 0xff,
                        (sb->s_encoding->version >> 8) & 0xff,
@@ -245,7 +245,7 @@ static ssize_t avg_vblocks_show(struct f2fs_attr *a,
 static ssize_t main_blkaddr_show(struct f2fs_attr *a,
                                struct f2fs_sb_info *sbi, char *buf)
 {
-       return snprintf(buf, PAGE_SIZE, "%llu\n",
+       return sysfs_emit(buf, "%llu\n",
                        (unsigned long long)MAIN_BLKADDR(sbi));
 }
 
@@ -551,6 +551,22 @@ out:
                return count;
        }
 
+       if (!strcmp(a->attr.name, "max_fragment_chunk")) {
+               if (t >= MIN_FRAGMENT_SIZE && t <= MAX_FRAGMENT_SIZE)
+                       sbi->max_fragment_chunk = t;
+               else
+                       return -EINVAL;
+               return count;
+       }
+
+       if (!strcmp(a->attr.name, "max_fragment_hole")) {
+               if (t >= MIN_FRAGMENT_SIZE && t <= MAX_FRAGMENT_SIZE)
+                       sbi->max_fragment_hole = t;
+               else
+                       return -EINVAL;
+               return count;
+       }
+
        *ui = (unsigned int)t;
 
        return count;
@@ -781,6 +797,8 @@ F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_threshold, age_threshold);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, seq_file_ra_mul, seq_file_ra_mul);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_segment_mode, gc_segment_mode);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_chunk, max_fragment_chunk);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_hole, max_fragment_hole);
 
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
@@ -859,6 +877,8 @@ static struct attribute *f2fs_attrs[] = {
        ATTR_LIST(seq_file_ra_mul),
        ATTR_LIST(gc_segment_mode),
        ATTR_LIST(gc_reclaimed_segments),
+       ATTR_LIST(max_fragment_chunk),
+       ATTR_LIST(max_fragment_hole),
        NULL,
 };
 ATTRIBUTE_GROUPS(f2fs);
index 03549b5..fe5acdc 100644 (file)
@@ -136,7 +136,7 @@ static int f2fs_begin_enable_verity(struct file *filp)
         * here and not rely on ->open() doing it.  This must be done before
         * evicting the inline data.
         */
-       err = dquot_initialize(inode);
+       err = f2fs_dquot_initialize(inode);
        if (err)
                return err;
 
index 1d2d29d..e348f33 100644 (file)
@@ -773,7 +773,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
        if (!f2fs_is_checkpoint_ready(sbi))
                return -ENOSPC;
 
-       err = dquot_initialize(inode);
+       err = f2fs_dquot_initialize(inode);
        if (err)
                return err;
 
index 7235d53..d671084 100644 (file)
@@ -940,7 +940,7 @@ do_alloc:
                else if (height == ip->i_height)
                        ret = gfs2_hole_size(inode, lblock, len, mp, iomap);
                else
-                       iomap->length = size - pos;
+                       iomap->length = size - iomap->offset;
        } else if (flags & IOMAP_WRITE) {
                u64 alloc_size;
 
index adafaaf..3e718cf 100644 (file)
@@ -773,8 +773,8 @@ static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i,
                                         size_t *prev_count,
                                         size_t *window_size)
 {
-       char __user *p = i->iov[0].iov_base + i->iov_offset;
        size_t count = iov_iter_count(i);
+       char __user *p;
        int pages = 1;
 
        if (likely(!count))
@@ -787,14 +787,14 @@ static inline bool should_fault_in_pages(ssize_t ret, struct iov_iter *i,
        if (*prev_count != count || !*window_size) {
                int pages, nr_dirtied;
 
-               pages = min_t(int, BIO_MAX_VECS,
-                             DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE));
+               pages = min_t(int, BIO_MAX_VECS, DIV_ROUND_UP(count, PAGE_SIZE));
                nr_dirtied = max(current->nr_dirtied_pause -
                                 current->nr_dirtied, 1);
                pages = min(pages, nr_dirtied);
        }
 
        *prev_count = count;
+       p = i->iov[0].iov_base + i->iov_offset;
        *window_size = (size_t)PAGE_SIZE * pages - offset_in_page(p);
        return true;
 }
@@ -1013,6 +1013,7 @@ static ssize_t gfs2_file_buffered_write(struct kiocb *iocb,
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct gfs2_holder *statfs_gh = NULL;
        size_t prev_count = 0, window_size = 0;
+       size_t orig_count = iov_iter_count(from);
        size_t read = 0;
        ssize_t ret;
 
@@ -1057,6 +1058,7 @@ retry_under_glock:
        if (inode == sdp->sd_rindex)
                gfs2_glock_dq_uninit(statfs_gh);
 
+       from->count = orig_count - read;
        if (should_fault_in_pages(ret, from, &prev_count, &window_size)) {
                size_t leftover;
 
@@ -1064,6 +1066,7 @@ retry_under_glock:
                leftover = fault_in_iov_iter_readable(from, window_size);
                gfs2_holder_disallow_demote(gh);
                if (leftover != window_size) {
+                       from->count = min(from->count, window_size - leftover);
                        if (!gfs2_holder_queued(gh)) {
                                if (read)
                                        goto out_uninit;
index 19f38ae..8dbd6fe 100644 (file)
@@ -411,14 +411,14 @@ static void do_error(struct gfs2_glock *gl, const int ret)
 static void demote_incompat_holders(struct gfs2_glock *gl,
                                    struct gfs2_holder *new_gh)
 {
-       struct gfs2_holder *gh;
+       struct gfs2_holder *gh, *tmp;
 
        /*
         * Demote incompatible holders before we make ourselves eligible.
         * (This holder may or may not allow auto-demoting, but we don't want
         * to demote the new holder before it's even granted.)
         */
-       list_for_each_entry(gh, &gl->gl_holders, gh_list) {
+       list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
                /*
                 * Since holders are at the front of the list, we stop when we
                 * find the first non-holder.
@@ -496,7 +496,7 @@ again:
         * Since we unlock the lockref lock, we set a flag to indicate
         * instantiate is in progress.
         */
-       if (test_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags)) {
+       if (test_and_set_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags)) {
                wait_on_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG,
                            TASK_UNINTERRUPTIBLE);
                /*
@@ -509,14 +509,10 @@ again:
                goto again;
        }
 
-       set_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags);
-
        ret = glops->go_instantiate(gh);
        if (!ret)
                clear_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags);
-       clear_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags);
-       smp_mb__after_atomic();
-       wake_up_bit(&gl->gl_flags, GLF_INSTANTIATE_IN_PROG);
+       clear_and_wake_up_bit(GLF_INSTANTIATE_IN_PROG, &gl->gl_flags);
        return ret;
 }
 
index 5b12137..0f93e8b 100644 (file)
@@ -1402,13 +1402,6 @@ out:
        gfs2_ordered_del_inode(ip);
        clear_inode(inode);
        gfs2_dir_hash_inval(ip);
-       if (ip->i_gl) {
-               glock_clear_object(ip->i_gl, ip);
-               wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
-               gfs2_glock_add_to_lru(ip->i_gl);
-               gfs2_glock_put_eventually(ip->i_gl);
-               ip->i_gl = NULL;
-       }
        if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
                struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
 
@@ -1421,6 +1414,13 @@ out:
                gfs2_holder_uninit(&ip->i_iopen_gh);
                gfs2_glock_put_eventually(gl);
        }
+       if (ip->i_gl) {
+               glock_clear_object(ip->i_gl, ip);
+               wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
+               gfs2_glock_add_to_lru(ip->i_gl);
+               gfs2_glock_put_eventually(ip->i_gl);
+               ip->i_gl = NULL;
+       }
 }
 
 static struct inode *gfs2_alloc_inode(struct super_block *sb)
index afd955d..88202de 100644 (file)
@@ -423,9 +423,10 @@ static inline unsigned int io_get_work_hash(struct io_wq_work *work)
        return work->flags >> IO_WQ_HASH_SHIFT;
 }
 
-static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
+static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
 {
        struct io_wq *wq = wqe->wq;
+       bool ret = false;
 
        spin_lock_irq(&wq->hash->wait.lock);
        if (list_empty(&wqe->wait.entry)) {
@@ -433,9 +434,11 @@ static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
                if (!test_bit(hash, &wq->hash->map)) {
                        __set_current_state(TASK_RUNNING);
                        list_del_init(&wqe->wait.entry);
+                       ret = true;
                }
        }
        spin_unlock_irq(&wq->hash->wait.lock);
+       return ret;
 }
 
 static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
@@ -475,14 +478,21 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
        }
 
        if (stall_hash != -1U) {
+               bool unstalled;
+
                /*
                 * Set this before dropping the lock to avoid racing with new
                 * work being added and clearing the stalled bit.
                 */
                set_bit(IO_ACCT_STALLED_BIT, &acct->flags);
                raw_spin_unlock(&wqe->lock);
-               io_wait_on_hash(wqe, stall_hash);
+               unstalled = io_wait_on_hash(wqe, stall_hash);
                raw_spin_lock(&wqe->lock);
+               if (unstalled) {
+                       clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+                       if (wq_has_sleeper(&wqe->wq->hash->wait))
+                               wake_up(&wqe->wq->hash->wait);
+               }
        }
 
        return NULL;
@@ -564,8 +574,11 @@ get_next:
                                io_wqe_enqueue(wqe, linked);
 
                        if (hash != -1U && !next_hashed) {
+                               /* serialize hash clear with wake_up() */
+                               spin_lock_irq(&wq->hash->wait.lock);
                                clear_bit(hash, &wq->hash->map);
                                clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
+                               spin_unlock_irq(&wq->hash->wait.lock);
                                if (wq_has_sleeper(&wq->hash->wait))
                                        wake_up(&wq->hash->wait);
                                raw_spin_lock(&wqe->lock);
index b83cbd7..e1fe177 100644 (file)
@@ -6,7 +6,6 @@ config SMB_SERVER
        select NLS
        select NLS_UTF8
        select CRYPTO
-       select CRYPTO_MD4
        select CRYPTO_MD5
        select CRYPTO_HMAC
        select CRYPTO_ECB
@@ -19,6 +18,7 @@ config SMB_SERVER
        select CRYPTO_GCM
        select ASN1
        select OID_REGISTRY
+       select CRC32
        default n
        help
          Choose Y here if you want to allow SMB3 compliant clients
index 30a92dd..3503b1c 100644 (file)
@@ -873,9 +873,9 @@ int ksmbd_gen_preauth_integrity_hash(struct ksmbd_conn *conn, char *buf,
                                     __u8 *pi_hash)
 {
        int rc;
-       struct smb2_hdr *rcv_hdr = (struct smb2_hdr *)buf;
+       struct smb2_hdr *rcv_hdr = smb2_get_msg(buf);
        char *all_bytes_msg = (char *)&rcv_hdr->ProtocolId;
-       int msg_size = be32_to_cpu(rcv_hdr->smb2_buf_length);
+       int msg_size = get_rfc1002_len(buf);
        struct ksmbd_crypto_ctx *ctx = NULL;
 
        if (conn->preauth_info->Preauth_HashId !=
@@ -983,7 +983,7 @@ static struct scatterlist *ksmbd_init_sg(struct kvec *iov, unsigned int nvec,
                                         u8 *sign)
 {
        struct scatterlist *sg;
-       unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
+       unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
        int i, nr_entries[3] = {0}, total_entries = 0, sg_idx = 0;
 
        if (!nvec)
@@ -1047,9 +1047,8 @@ static struct scatterlist *ksmbd_init_sg(struct kvec *iov, unsigned int nvec,
 int ksmbd_crypt_message(struct ksmbd_conn *conn, struct kvec *iov,
                        unsigned int nvec, int enc)
 {
-       struct smb2_transform_hdr *tr_hdr =
-               (struct smb2_transform_hdr *)iov[0].iov_base;
-       unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 24;
+       struct smb2_transform_hdr *tr_hdr = smb2_get_msg(iov[0].iov_base);
+       unsigned int assoc_data_len = sizeof(struct smb2_transform_hdr) - 20;
        int rc;
        struct scatterlist *sg;
        u8 sign[SMB2_SIGNATURE_SIZE] = {};
index b57a0d8..83a94d0 100644 (file)
@@ -158,26 +158,25 @@ void ksmbd_conn_wait_idle(struct ksmbd_conn *conn)
 int ksmbd_conn_write(struct ksmbd_work *work)
 {
        struct ksmbd_conn *conn = work->conn;
-       struct smb_hdr *rsp_hdr = work->response_buf;
        size_t len = 0;
        int sent;
        struct kvec iov[3];
        int iov_idx = 0;
 
        ksmbd_conn_try_dequeue_request(work);
-       if (!rsp_hdr) {
+       if (!work->response_buf) {
                pr_err("NULL response header\n");
                return -EINVAL;
        }
 
        if (work->tr_buf) {
                iov[iov_idx] = (struct kvec) { work->tr_buf,
-                               sizeof(struct smb2_transform_hdr) };
+                               sizeof(struct smb2_transform_hdr) + 4 };
                len += iov[iov_idx++].iov_len;
        }
 
        if (work->aux_payload_sz) {
-               iov[iov_idx] = (struct kvec) { rsp_hdr, work->resp_hdr_sz };
+               iov[iov_idx] = (struct kvec) { work->response_buf, work->resp_hdr_sz };
                len += iov[iov_idx++].iov_len;
                iov[iov_idx] = (struct kvec) { work->aux_payload_buf, work->aux_payload_sz };
                len += iov[iov_idx++].iov_len;
@@ -185,8 +184,8 @@ int ksmbd_conn_write(struct ksmbd_work *work)
                if (work->tr_buf)
                        iov[iov_idx].iov_len = work->resp_hdr_sz;
                else
-                       iov[iov_idx].iov_len = get_rfc1002_len(rsp_hdr) + 4;
-               iov[iov_idx].iov_base = rsp_hdr;
+                       iov[iov_idx].iov_len = get_rfc1002_len(work->response_buf) + 4;
+               iov[iov_idx].iov_base = work->response_buf;
                len += iov[iov_idx++].iov_len;
        }
 
index fd58eb4..14b9cae 100644 (file)
@@ -69,7 +69,6 @@ int ksmbd_workqueue_init(void)
 
 void ksmbd_workqueue_destroy(void)
 {
-       flush_workqueue(ksmbd_wq);
        destroy_workqueue(ksmbd_wq);
        ksmbd_wq = NULL;
 }
index f7156bc..5ece58e 100644 (file)
@@ -92,7 +92,7 @@ struct ksmbd_work {
  */
 static inline void *ksmbd_resp_buf_next(struct ksmbd_work *work)
 {
-       return work->response_buf + work->next_smb2_rsp_hdr_off;
+       return work->response_buf + work->next_smb2_rsp_hdr_off + 4;
 }
 
 /**
@@ -101,7 +101,7 @@ static inline void *ksmbd_resp_buf_next(struct ksmbd_work *work)
  */
 static inline void *ksmbd_req_buf_next(struct ksmbd_work *work)
 {
-       return work->request_buf + work->next_smb2_rcv_hdr_off;
+       return work->request_buf + work->next_smb2_rcv_hdr_off + 4;
 }
 
 struct ksmbd_work *ksmbd_alloc_work_struct(void);
index f9dae6e..077b876 100644 (file)
@@ -629,10 +629,10 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
                return;
        }
 
-       rsp_hdr = work->response_buf;
+       rsp_hdr = smb2_get_msg(work->response_buf);
        memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
-       rsp_hdr->smb2_buf_length =
-               cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
+       *(__be32 *)work->response_buf =
+               cpu_to_be32(conn->vals->header_size);
        rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
        rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
        rsp_hdr->CreditRequest = cpu_to_le16(0);
@@ -645,7 +645,7 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
        rsp_hdr->SessionId = 0;
        memset(rsp_hdr->Signature, 0, 16);
 
-       rsp = work->response_buf;
+       rsp = smb2_get_msg(work->response_buf);
 
        rsp->StructureSize = cpu_to_le16(24);
        if (!br_info->open_trunc &&
@@ -659,7 +659,7 @@ static void __smb2_oplock_break_noti(struct work_struct *wk)
        rsp->PersistentFid = cpu_to_le64(fp->persistent_id);
        rsp->VolatileFid = cpu_to_le64(fp->volatile_id);
 
-       inc_rfc1001_len(rsp, 24);
+       inc_rfc1001_len(work->response_buf, 24);
 
        ksmbd_debug(OPLOCK,
                    "sending oplock break v_id %llu p_id = %llu lock level = %d\n",
@@ -736,10 +736,10 @@ static void __smb2_lease_break_noti(struct work_struct *wk)
                return;
        }
 
-       rsp_hdr = work->response_buf;
+       rsp_hdr = smb2_get_msg(work->response_buf);
        memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
-       rsp_hdr->smb2_buf_length =
-               cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
+       *(__be32 *)work->response_buf =
+               cpu_to_be32(conn->vals->header_size);
        rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
        rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
        rsp_hdr->CreditRequest = cpu_to_le16(0);
@@ -752,7 +752,7 @@ static void __smb2_lease_break_noti(struct work_struct *wk)
        rsp_hdr->SessionId = 0;
        memset(rsp_hdr->Signature, 0, 16);
 
-       rsp = work->response_buf;
+       rsp = smb2_get_msg(work->response_buf);
        rsp->StructureSize = cpu_to_le16(44);
        rsp->Epoch = br_info->epoch;
        rsp->Flags = 0;
@@ -768,7 +768,7 @@ static void __smb2_lease_break_noti(struct work_struct *wk)
        rsp->AccessMaskHint = 0;
        rsp->ShareMaskHint = 0;
 
-       inc_rfc1001_len(rsp, 44);
+       inc_rfc1001_len(work->response_buf, 44);
 
        ksmbd_conn_write(work);
        ksmbd_free_work_struct(work);
@@ -1335,19 +1335,16 @@ __u8 smb2_map_lease_to_oplock(__le32 lease_state)
  */
 void create_lease_buf(u8 *rbuf, struct lease *lease)
 {
-       char *LeaseKey = (char *)&lease->lease_key;
-
        if (lease->version == 2) {
                struct create_lease_v2 *buf = (struct create_lease_v2 *)rbuf;
-               char *ParentLeaseKey = (char *)&lease->parent_lease_key;
 
                memset(buf, 0, sizeof(struct create_lease_v2));
-               buf->lcontext.LeaseKeyLow = *((__le64 *)LeaseKey);
-               buf->lcontext.LeaseKeyHigh = *((__le64 *)(LeaseKey + 8));
+               memcpy(buf->lcontext.LeaseKey, lease->lease_key,
+                      SMB2_LEASE_KEY_SIZE);
                buf->lcontext.LeaseFlags = lease->flags;
                buf->lcontext.LeaseState = lease->state;
-               buf->lcontext.ParentLeaseKeyLow = *((__le64 *)ParentLeaseKey);
-               buf->lcontext.ParentLeaseKeyHigh = *((__le64 *)(ParentLeaseKey + 8));
+               memcpy(buf->lcontext.ParentLeaseKey, lease->parent_lease_key,
+                      SMB2_LEASE_KEY_SIZE);
                buf->ccontext.DataOffset = cpu_to_le16(offsetof
                                (struct create_lease_v2, lcontext));
                buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context_v2));
@@ -1362,8 +1359,7 @@ void create_lease_buf(u8 *rbuf, struct lease *lease)
                struct create_lease *buf = (struct create_lease *)rbuf;
 
                memset(buf, 0, sizeof(struct create_lease));
-               buf->lcontext.LeaseKeyLow = *((__le64 *)LeaseKey);
-               buf->lcontext.LeaseKeyHigh = *((__le64 *)(LeaseKey + 8));
+               memcpy(buf->lcontext.LeaseKey, lease->lease_key, SMB2_LEASE_KEY_SIZE);
                buf->lcontext.LeaseFlags = lease->flags;
                buf->lcontext.LeaseState = lease->state;
                buf->ccontext.DataOffset = cpu_to_le16(offsetof
@@ -1398,7 +1394,7 @@ struct lease_ctx_info *parse_lease_state(void *open_req)
        if (!lreq)
                return NULL;
 
-       data_offset = (char *)req + 4 + le32_to_cpu(req->CreateContextsOffset);
+       data_offset = (char *)req + le32_to_cpu(req->CreateContextsOffset);
        cc = (struct create_context *)data_offset;
        do {
                cc = (struct create_context *)((char *)cc + next);
@@ -1416,19 +1412,17 @@ struct lease_ctx_info *parse_lease_state(void *open_req)
                if (sizeof(struct lease_context_v2) == le32_to_cpu(cc->DataLength)) {
                        struct create_lease_v2 *lc = (struct create_lease_v2 *)cc;
 
-                       *((__le64 *)lreq->lease_key) = lc->lcontext.LeaseKeyLow;
-                       *((__le64 *)(lreq->lease_key + 8)) = lc->lcontext.LeaseKeyHigh;
+                       memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE);
                        lreq->req_state = lc->lcontext.LeaseState;
                        lreq->flags = lc->lcontext.LeaseFlags;
                        lreq->duration = lc->lcontext.LeaseDuration;
-                       *((__le64 *)lreq->parent_lease_key) = lc->lcontext.ParentLeaseKeyLow;
-                       *((__le64 *)(lreq->parent_lease_key + 8)) = lc->lcontext.ParentLeaseKeyHigh;
+                       memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey,
+                              SMB2_LEASE_KEY_SIZE);
                        lreq->version = 2;
                } else {
                        struct create_lease *lc = (struct create_lease *)cc;
 
-                       *((__le64 *)lreq->lease_key) = lc->lcontext.LeaseKeyLow;
-                       *((__le64 *)(lreq->lease_key + 8)) = lc->lcontext.LeaseKeyHigh;
+                       memcpy(lreq->lease_key, lc->lcontext.LeaseKey, SMB2_LEASE_KEY_SIZE);
                        lreq->req_state = lc->lcontext.LeaseState;
                        lreq->flags = lc->lcontext.LeaseFlags;
                        lreq->duration = lc->lcontext.LeaseDuration;
@@ -1462,7 +1456,7 @@ struct create_context *smb2_find_context_vals(void *open_req, const char *tag)
         * CreateContextsOffset and CreateContextsLength are guaranteed to
         * be valid because of ksmbd_smb2_check_message().
         */
-       cc = (struct create_context *)((char *)req + 4 +
+       cc = (struct create_context *)((char *)req +
                                       le32_to_cpu(req->CreateContextsOffset));
        remain_len = le32_to_cpu(req->CreateContextsLength);
        do {
index 119b804..0cf7a2b 100644 (file)
@@ -28,8 +28,6 @@
 #define OPLOCK_WRITE_TO_NONE           0x04
 #define OPLOCK_READ_TO_NONE            0x08
 
-#define SMB2_LEASE_KEY_SIZE            16
-
 struct lease_ctx_info {
        __u8                    lease_key[SMB2_LEASE_KEY_SIZE];
        __le32                  req_state;
index 2a2b213..2e12f6d 100644 (file)
@@ -622,7 +622,6 @@ MODULE_DESCRIPTION("Linux kernel CIFS/SMB SERVER");
 MODULE_LICENSE("GPL");
 MODULE_SOFTDEP("pre: ecb");
 MODULE_SOFTDEP("pre: hmac");
-MODULE_SOFTDEP("pre: md4");
 MODULE_SOFTDEP("pre: md5");
 MODULE_SOFTDEP("pre: nls");
 MODULE_SOFTDEP("pre: aes");
@@ -632,5 +631,6 @@ MODULE_SOFTDEP("pre: sha512");
 MODULE_SOFTDEP("pre: aead2");
 MODULE_SOFTDEP("pre: ccm");
 MODULE_SOFTDEP("pre: gcm");
+MODULE_SOFTDEP("pre: crc32");
 module_init(ksmbd_server_init)
 module_exit(ksmbd_server_exit)
index 030ca57..50d0b10 100644 (file)
@@ -6,7 +6,6 @@
 
 #include "glob.h"
 #include "nterr.h"
-#include "smb2pdu.h"
 #include "smb_common.h"
 #include "smbstatus.h"
 #include "mgmt/user_session.h"
@@ -347,23 +346,16 @@ static int smb2_validate_credit_charge(struct ksmbd_conn *conn,
 
 int ksmbd_smb2_check_message(struct ksmbd_work *work)
 {
-       struct smb2_pdu *pdu = work->request_buf;
+       struct smb2_pdu *pdu = ksmbd_req_buf_next(work);
        struct smb2_hdr *hdr = &pdu->hdr;
        int command;
        __u32 clc_len;  /* calculated length */
-       __u32 len = get_rfc1002_len(pdu);
+       __u32 len = get_rfc1002_len(work->request_buf);
 
-       if (work->next_smb2_rcv_hdr_off) {
-               pdu = ksmbd_req_buf_next(work);
-               hdr = &pdu->hdr;
-       }
-
-       if (le32_to_cpu(hdr->NextCommand) > 0) {
+       if (le32_to_cpu(hdr->NextCommand) > 0)
                len = le32_to_cpu(hdr->NextCommand);
-       } else if (work->next_smb2_rcv_hdr_off) {
+       else if (work->next_smb2_rcv_hdr_off)
                len -= work->next_smb2_rcv_hdr_off;
-               len = round_up(len, 8);
-       }
 
        if (check_smb2_hdr(hdr))
                return 1;
index fb6a65d..0a5d845 100644 (file)
@@ -6,7 +6,6 @@
 
 #include <linux/slab.h>
 #include "glob.h"
-#include "smb2pdu.h"
 
 #include "auth.h"
 #include "connection.h"
@@ -199,7 +198,7 @@ void init_smb2_1_server(struct ksmbd_conn *conn)
        conn->cmds = smb2_0_server_cmds;
        conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
        conn->max_credits = SMB2_MAX_CREDITS;
-       conn->signing_algorithm = SIGNING_ALG_HMAC_SHA256;
+       conn->signing_algorithm = SIGNING_ALG_HMAC_SHA256_LE;
 
        if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
                conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
@@ -217,7 +216,7 @@ void init_smb3_0_server(struct ksmbd_conn *conn)
        conn->cmds = smb2_0_server_cmds;
        conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
        conn->max_credits = SMB2_MAX_CREDITS;
-       conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
+       conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
 
        if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
                conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
@@ -242,7 +241,7 @@ void init_smb3_02_server(struct ksmbd_conn *conn)
        conn->cmds = smb2_0_server_cmds;
        conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
        conn->max_credits = SMB2_MAX_CREDITS;
-       conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
+       conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
 
        if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
                conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
@@ -267,7 +266,7 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
        conn->cmds = smb2_0_server_cmds;
        conn->max_cmds = ARRAY_SIZE(smb2_0_server_cmds);
        conn->max_credits = SMB2_MAX_CREDITS;
-       conn->signing_algorithm = SIGNING_ALG_AES_CMAC;
+       conn->signing_algorithm = SIGNING_ALG_AES_CMAC_LE;
 
        if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
                conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
index 7e448df..121f8e8 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/falloc.h>
 
 #include "glob.h"
-#include "smb2pdu.h"
 #include "smbfsctl.h"
 #include "oplock.h"
 #include "smbacl.h"
@@ -44,8 +43,8 @@ static void __wbuf(struct ksmbd_work *work, void **req, void **rsp)
                *req = ksmbd_req_buf_next(work);
                *rsp = ksmbd_resp_buf_next(work);
        } else {
-               *req = work->request_buf;
-               *rsp = work->response_buf;
+               *req = smb2_get_msg(work->request_buf);
+               *rsp = smb2_get_msg(work->response_buf);
        }
 }
 
@@ -93,13 +92,14 @@ struct channel *lookup_chann_list(struct ksmbd_session *sess, struct ksmbd_conn
  */
 int smb2_get_ksmbd_tcon(struct ksmbd_work *work)
 {
-       struct smb2_hdr *req_hdr = work->request_buf;
+       struct smb2_hdr *req_hdr = smb2_get_msg(work->request_buf);
+       unsigned int cmd = le16_to_cpu(req_hdr->Command);
        int tree_id;
 
        work->tcon = NULL;
-       if (work->conn->ops->get_cmd_val(work) == SMB2_TREE_CONNECT_HE ||
-           work->conn->ops->get_cmd_val(work) ==  SMB2_CANCEL_HE ||
-           work->conn->ops->get_cmd_val(work) ==  SMB2_LOGOFF_HE) {
+       if (cmd == SMB2_TREE_CONNECT_HE ||
+           cmd ==  SMB2_CANCEL_HE ||
+           cmd ==  SMB2_LOGOFF_HE) {
                ksmbd_debug(SMB, "skip to check tree connect request\n");
                return 0;
        }
@@ -130,7 +130,7 @@ void smb2_set_err_rsp(struct ksmbd_work *work)
        if (work->next_smb2_rcv_hdr_off)
                err_rsp = ksmbd_resp_buf_next(work);
        else
-               err_rsp = work->response_buf;
+               err_rsp = smb2_get_msg(work->response_buf);
 
        if (err_rsp->hdr.Status != STATUS_STOPPED_ON_SYMLINK) {
                err_rsp->StructureSize = SMB2_ERROR_STRUCTURE_SIZE2_LE;
@@ -150,7 +150,7 @@ void smb2_set_err_rsp(struct ksmbd_work *work)
  */
 bool is_smb2_neg_cmd(struct ksmbd_work *work)
 {
-       struct smb2_hdr *hdr = work->request_buf;
+       struct smb2_hdr *hdr = smb2_get_msg(work->request_buf);
 
        /* is it SMB2 header ? */
        if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
@@ -174,7 +174,7 @@ bool is_smb2_neg_cmd(struct ksmbd_work *work)
  */
 bool is_smb2_rsp(struct ksmbd_work *work)
 {
-       struct smb2_hdr *hdr = work->response_buf;
+       struct smb2_hdr *hdr = smb2_get_msg(work->response_buf);
 
        /* is it SMB2 header ? */
        if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
@@ -200,7 +200,7 @@ u16 get_smb2_cmd_val(struct ksmbd_work *work)
        if (work->next_smb2_rcv_hdr_off)
                rcv_hdr = ksmbd_req_buf_next(work);
        else
-               rcv_hdr = work->request_buf;
+               rcv_hdr = smb2_get_msg(work->request_buf);
        return le16_to_cpu(rcv_hdr->Command);
 }
 
@@ -216,7 +216,7 @@ void set_smb2_rsp_status(struct ksmbd_work *work, __le32 err)
        if (work->next_smb2_rcv_hdr_off)
                rsp_hdr = ksmbd_resp_buf_next(work);
        else
-               rsp_hdr = work->response_buf;
+               rsp_hdr = smb2_get_msg(work->response_buf);
        rsp_hdr->Status = err;
        smb2_set_err_rsp(work);
 }
@@ -237,13 +237,11 @@ int init_smb2_neg_rsp(struct ksmbd_work *work)
        if (conn->need_neg == false)
                return -EINVAL;
 
-       rsp_hdr = work->response_buf;
+       *(__be32 *)work->response_buf =
+               cpu_to_be32(conn->vals->header_size);
 
+       rsp_hdr = smb2_get_msg(work->response_buf);
        memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
-
-       rsp_hdr->smb2_buf_length =
-               cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
-
        rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
        rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
        rsp_hdr->CreditRequest = cpu_to_le16(2);
@@ -256,7 +254,7 @@ int init_smb2_neg_rsp(struct ksmbd_work *work)
        rsp_hdr->SessionId = 0;
        memset(rsp_hdr->Signature, 0, 16);
 
-       rsp = work->response_buf;
+       rsp = smb2_get_msg(work->response_buf);
 
        WARN_ON(ksmbd_conn_good(work));
 
@@ -277,12 +275,12 @@ int init_smb2_neg_rsp(struct ksmbd_work *work)
 
        rsp->SecurityBufferOffset = cpu_to_le16(128);
        rsp->SecurityBufferLength = cpu_to_le16(AUTH_GSS_LENGTH);
-       ksmbd_copy_gss_neg_header(((char *)(&rsp->hdr) +
-               sizeof(rsp->hdr.smb2_buf_length)) +
+       ksmbd_copy_gss_neg_header((char *)(&rsp->hdr) +
                le16_to_cpu(rsp->SecurityBufferOffset));
-       inc_rfc1001_len(rsp, sizeof(struct smb2_negotiate_rsp) -
-               sizeof(struct smb2_hdr) - sizeof(rsp->Buffer) +
-               AUTH_GSS_LENGTH);
+       inc_rfc1001_len(work->response_buf,
+                       sizeof(struct smb2_negotiate_rsp) -
+                       sizeof(struct smb2_hdr) - sizeof(rsp->Buffer) +
+                       AUTH_GSS_LENGTH);
        rsp->SecurityMode = SMB2_NEGOTIATE_SIGNING_ENABLED_LE;
        if (server_conf.signing == KSMBD_CONFIG_OPT_MANDATORY)
                rsp->SecurityMode |= SMB2_NEGOTIATE_SIGNING_REQUIRED_LE;
@@ -387,8 +385,8 @@ static void init_chained_smb2_rsp(struct ksmbd_work *work)
        next_hdr_offset = le32_to_cpu(req->NextCommand);
 
        new_len = ALIGN(len, 8);
-       inc_rfc1001_len(work->response_buf, ((sizeof(struct smb2_hdr) - 4)
-                       + new_len - len));
+       inc_rfc1001_len(work->response_buf,
+                       sizeof(struct smb2_hdr) + new_len - len);
        rsp->NextCommand = cpu_to_le32(new_len);
 
        work->next_smb2_rcv_hdr_off += next_hdr_offset;
@@ -406,7 +404,7 @@ static void init_chained_smb2_rsp(struct ksmbd_work *work)
                work->compound_fid = KSMBD_NO_FID;
                work->compound_pfid = KSMBD_NO_FID;
        }
-       memset((char *)rsp_hdr + 4, 0, sizeof(struct smb2_hdr) + 2);
+       memset((char *)rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
        rsp_hdr->ProtocolId = SMB2_PROTO_NUMBER;
        rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
        rsp_hdr->Command = rcv_hdr->Command;
@@ -432,7 +430,7 @@ static void init_chained_smb2_rsp(struct ksmbd_work *work)
  */
 bool is_chained_smb2_message(struct ksmbd_work *work)
 {
-       struct smb2_hdr *hdr = work->request_buf;
+       struct smb2_hdr *hdr = smb2_get_msg(work->request_buf);
        unsigned int len, next_cmd;
 
        if (hdr->ProtocolId != SMB2_PROTO_NUMBER)
@@ -483,13 +481,13 @@ bool is_chained_smb2_message(struct ksmbd_work *work)
  */
 int init_smb2_rsp_hdr(struct ksmbd_work *work)
 {
-       struct smb2_hdr *rsp_hdr = work->response_buf;
-       struct smb2_hdr *rcv_hdr = work->request_buf;
+       struct smb2_hdr *rsp_hdr = smb2_get_msg(work->response_buf);
+       struct smb2_hdr *rcv_hdr = smb2_get_msg(work->request_buf);
        struct ksmbd_conn *conn = work->conn;
 
        memset(rsp_hdr, 0, sizeof(struct smb2_hdr) + 2);
-       rsp_hdr->smb2_buf_length =
-               cpu_to_be32(smb2_hdr_size_no_buflen(conn->vals));
+       *(__be32 *)work->response_buf =
+               cpu_to_be32(conn->vals->header_size);
        rsp_hdr->ProtocolId = rcv_hdr->ProtocolId;
        rsp_hdr->StructureSize = SMB2_HEADER_STRUCTURE_SIZE;
        rsp_hdr->Command = rcv_hdr->Command;
@@ -522,7 +520,7 @@ int init_smb2_rsp_hdr(struct ksmbd_work *work)
  */
 int smb2_allocate_rsp_buf(struct ksmbd_work *work)
 {
-       struct smb2_hdr *hdr = work->request_buf;
+       struct smb2_hdr *hdr = smb2_get_msg(work->request_buf);
        size_t small_sz = MAX_CIFS_SMALL_BUFFER_SIZE;
        size_t large_sz = small_sz + work->conn->vals->max_trans_size;
        size_t sz = small_sz;
@@ -534,7 +532,7 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work)
        if (cmd == SMB2_QUERY_INFO_HE) {
                struct smb2_query_info_req *req;
 
-               req = work->request_buf;
+               req = smb2_get_msg(work->request_buf);
                if (req->InfoType == SMB2_O_INFO_FILE &&
                    (req->FileInfoClass == FILE_FULL_EA_INFORMATION ||
                     req->FileInfoClass == FILE_ALL_INFORMATION))
@@ -561,7 +559,7 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work)
  */
 int smb2_check_user_session(struct ksmbd_work *work)
 {
-       struct smb2_hdr *req_hdr = work->request_buf;
+       struct smb2_hdr *req_hdr = smb2_get_msg(work->request_buf);
        struct ksmbd_conn *conn = work->conn;
        unsigned int cmd = conn->ops->get_cmd_val(work);
        unsigned long long sess_id;
@@ -642,7 +640,7 @@ int setup_async_work(struct ksmbd_work *work, void (*fn)(void **), void **arg)
        struct ksmbd_conn *conn = work->conn;
        int id;
 
-       rsp_hdr = work->response_buf;
+       rsp_hdr = smb2_get_msg(work->response_buf);
        rsp_hdr->Flags |= SMB2_FLAGS_ASYNC_COMMAND;
 
        id = ksmbd_acquire_async_msg_id(&conn->async_ida);
@@ -674,7 +672,7 @@ void smb2_send_interim_resp(struct ksmbd_work *work, __le32 status)
 {
        struct smb2_hdr *rsp_hdr;
 
-       rsp_hdr = work->response_buf;
+       rsp_hdr = smb2_get_msg(work->response_buf);
        smb2_set_err_rsp(work);
        rsp_hdr->Status = status;
 
@@ -715,17 +713,17 @@ static int smb2_get_dos_mode(struct kstat *stat, int attribute)
        int attr = 0;
 
        if (S_ISDIR(stat->mode)) {
-               attr = ATTR_DIRECTORY |
-                       (attribute & (ATTR_HIDDEN | ATTR_SYSTEM));
+               attr = FILE_ATTRIBUTE_DIRECTORY |
+                       (attribute & (FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM));
        } else {
-               attr = (attribute & 0x00005137) | ATTR_ARCHIVE;
-               attr &= ~(ATTR_DIRECTORY);
+               attr = (attribute & 0x00005137) | FILE_ATTRIBUTE_ARCHIVE;
+               attr &= ~(FILE_ATTRIBUTE_DIRECTORY);
                if (S_ISREG(stat->mode) && (server_conf.share_fake_fscaps &
                                FILE_SUPPORTS_SPARSE_FILES))
-                       attr |= ATTR_SPARSE;
+                       attr |= FILE_ATTRIBUTE_SPARSE_FILE;
 
                if (smb2_get_reparse_tag_special_file(stat->mode))
-                       attr |= ATTR_REPARSE;
+                       attr |= FILE_ATTRIBUTE_REPARSE_POINT;
        }
 
        return attr;
@@ -753,16 +751,16 @@ static void build_encrypt_ctxt(struct smb2_encryption_neg_context *pneg_ctxt,
        pneg_ctxt->Ciphers[0] = cipher_type;
 }
 
-static void build_compression_ctxt(struct smb2_compression_ctx *pneg_ctxt,
+static void build_compression_ctxt(struct smb2_compression_capabilities_context *pneg_ctxt,
                                   __le16 comp_algo)
 {
        pneg_ctxt->ContextType = SMB2_COMPRESSION_CAPABILITIES;
        pneg_ctxt->DataLength =
-               cpu_to_le16(sizeof(struct smb2_compression_ctx)
+               cpu_to_le16(sizeof(struct smb2_compression_capabilities_context)
                        - sizeof(struct smb2_neg_context));
        pneg_ctxt->Reserved = cpu_to_le32(0);
        pneg_ctxt->CompressionAlgorithmCount = cpu_to_le16(1);
-       pneg_ctxt->Reserved1 = cpu_to_le32(0);
+       pneg_ctxt->Flags = cpu_to_le32(0);
        pneg_ctxt->CompressionAlgorithms[0] = comp_algo;
 }
 
@@ -802,11 +800,11 @@ static void build_posix_ctxt(struct smb2_posix_neg_context *pneg_ctxt)
 }
 
 static void assemble_neg_contexts(struct ksmbd_conn *conn,
-                                 struct smb2_negotiate_rsp *rsp)
+                                 struct smb2_negotiate_rsp *rsp,
+                                 void *smb2_buf_len)
 {
-       /* +4 is to account for the RFC1001 len field */
        char *pneg_ctxt = (char *)rsp +
-                       le32_to_cpu(rsp->NegotiateContextOffset) + 4;
+                       le32_to_cpu(rsp->NegotiateContextOffset);
        int neg_ctxt_cnt = 1;
        int ctxt_size;
 
@@ -815,7 +813,7 @@ static void assemble_neg_contexts(struct ksmbd_conn *conn,
        build_preauth_ctxt((struct smb2_preauth_neg_context *)pneg_ctxt,
                           conn->preauth_info->Preauth_HashId);
        rsp->NegotiateContextCount = cpu_to_le16(neg_ctxt_cnt);
-       inc_rfc1001_len(rsp, AUTH_GSS_PADDING);
+       inc_rfc1001_len(smb2_buf_len, AUTH_GSS_PADDING);
        ctxt_size = sizeof(struct smb2_preauth_neg_context);
        /* Round to 8 byte boundary */
        pneg_ctxt += round_up(sizeof(struct smb2_preauth_neg_context), 8);
@@ -839,12 +837,12 @@ static void assemble_neg_contexts(struct ksmbd_conn *conn,
                ksmbd_debug(SMB,
                            "assemble SMB2_COMPRESSION_CAPABILITIES context\n");
                /* Temporarily set to SMB3_COMPRESS_NONE */
-               build_compression_ctxt((struct smb2_compression_ctx *)pneg_ctxt,
+               build_compression_ctxt((struct smb2_compression_capabilities_context *)pneg_ctxt,
                                       conn->compress_algorithm);
                rsp->NegotiateContextCount = cpu_to_le16(++neg_ctxt_cnt);
-               ctxt_size += sizeof(struct smb2_compression_ctx) + 2;
+               ctxt_size += sizeof(struct smb2_compression_capabilities_context) + 2;
                /* Round to 8 byte boundary */
-               pneg_ctxt += round_up(sizeof(struct smb2_compression_ctx) + 2,
+               pneg_ctxt += round_up(sizeof(struct smb2_compression_capabilities_context) + 2,
                                      8);
        }
 
@@ -869,7 +867,7 @@ static void assemble_neg_contexts(struct ksmbd_conn *conn,
                ctxt_size += sizeof(struct smb2_signing_capabilities) + 2;
        }
 
-       inc_rfc1001_len(rsp, ctxt_size);
+       inc_rfc1001_len(smb2_buf_len, ctxt_size);
 }
 
 static __le32 decode_preauth_ctxt(struct ksmbd_conn *conn,
@@ -918,7 +916,7 @@ static void decode_encrypt_ctxt(struct ksmbd_conn *conn,
 }
 
 static void decode_compress_ctxt(struct ksmbd_conn *conn,
-                                struct smb2_compression_ctx *pneg_ctxt)
+                                struct smb2_compression_capabilities_context *pneg_ctxt)
 {
        conn->compress_algorithm = SMB3_COMPRESS_NONE;
 }
@@ -939,8 +937,8 @@ static void decode_sign_cap_ctxt(struct ksmbd_conn *conn,
        }
 
        for (i = 0; i < sign_algo_cnt; i++) {
-               if (pneg_ctxt->SigningAlgorithms[i] == SIGNING_ALG_HMAC_SHA256 ||
-                   pneg_ctxt->SigningAlgorithms[i] == SIGNING_ALG_AES_CMAC) {
+               if (pneg_ctxt->SigningAlgorithms[i] == SIGNING_ALG_HMAC_SHA256_LE ||
+                   pneg_ctxt->SigningAlgorithms[i] == SIGNING_ALG_AES_CMAC_LE) {
                        ksmbd_debug(SMB, "Signing Algorithm ID = 0x%x\n",
                                    pneg_ctxt->SigningAlgorithms[i]);
                        conn->signing_negotiated = true;
@@ -952,14 +950,14 @@ static void decode_sign_cap_ctxt(struct ksmbd_conn *conn,
 }
 
 static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
-                                     struct smb2_negotiate_req *req)
+                                     struct smb2_negotiate_req *req,
+                                     int len_of_smb)
 {
        /* +4 is to account for the RFC1001 len field */
-       struct smb2_neg_context *pctx = (struct smb2_neg_context *)((char *)req + 4);
+       struct smb2_neg_context *pctx = (struct smb2_neg_context *)req;
        int i = 0, len_of_ctxts;
        int offset = le32_to_cpu(req->NegotiateContextOffset);
        int neg_ctxt_cnt = le16_to_cpu(req->NegotiateContextCount);
-       int len_of_smb = be32_to_cpu(req->hdr.smb2_buf_length);
        __le32 status = STATUS_INVALID_PARAMETER;
 
        ksmbd_debug(SMB, "decoding %d negotiate contexts\n", neg_ctxt_cnt);
@@ -1011,7 +1009,7 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
                                break;
 
                        decode_compress_ctxt(conn,
-                                            (struct smb2_compression_ctx *)pctx);
+                                            (struct smb2_compression_capabilities_context *)pctx);
                } else if (pctx->ContextType == SMB2_NETNAME_NEGOTIATE_CONTEXT_ID) {
                        ksmbd_debug(SMB,
                                    "deassemble SMB2_NETNAME_NEGOTIATE_CONTEXT_ID context\n");
@@ -1044,8 +1042,8 @@ static __le32 deassemble_neg_contexts(struct ksmbd_conn *conn,
 int smb2_handle_negotiate(struct ksmbd_work *work)
 {
        struct ksmbd_conn *conn = work->conn;
-       struct smb2_negotiate_req *req = work->request_buf;
-       struct smb2_negotiate_rsp *rsp = work->response_buf;
+       struct smb2_negotiate_req *req = smb2_get_msg(work->request_buf);
+       struct smb2_negotiate_rsp *rsp = smb2_get_msg(work->response_buf);
        int rc = 0;
        unsigned int smb2_buf_len, smb2_neg_size;
        __le32 status;
@@ -1066,7 +1064,7 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
        }
 
        smb2_buf_len = get_rfc1002_len(work->request_buf);
-       smb2_neg_size = offsetof(struct smb2_negotiate_req, Dialects) - 4;
+       smb2_neg_size = offsetof(struct smb2_negotiate_req, Dialects);
        if (smb2_neg_size > smb2_buf_len) {
                rsp->hdr.Status = STATUS_INVALID_PARAMETER;
                rc = -EINVAL;
@@ -1115,7 +1113,8 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
                        goto err_out;
                }
 
-               status = deassemble_neg_contexts(conn, req);
+               status = deassemble_neg_contexts(conn, req,
+                                                get_rfc1002_len(work->request_buf));
                if (status != STATUS_SUCCESS) {
                        pr_err("deassemble_neg_contexts error(0x%x)\n",
                               status);
@@ -1135,7 +1134,7 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
                                                 conn->preauth_info->Preauth_HashValue);
                rsp->NegotiateContextOffset =
                                cpu_to_le32(OFFSET_OF_NEG_CONTEXT);
-               assemble_neg_contexts(conn, rsp);
+               assemble_neg_contexts(conn, rsp, work->response_buf);
                break;
        case SMB302_PROT_ID:
                init_smb3_02_server(conn);
@@ -1183,10 +1182,9 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
 
        rsp->SecurityBufferOffset = cpu_to_le16(128);
        rsp->SecurityBufferLength = cpu_to_le16(AUTH_GSS_LENGTH);
-       ksmbd_copy_gss_neg_header(((char *)(&rsp->hdr) +
-                                 sizeof(rsp->hdr.smb2_buf_length)) +
-                                  le16_to_cpu(rsp->SecurityBufferOffset));
-       inc_rfc1001_len(rsp, sizeof(struct smb2_negotiate_rsp) -
+       ksmbd_copy_gss_neg_header((char *)(&rsp->hdr) +
+                                 le16_to_cpu(rsp->SecurityBufferOffset));
+       inc_rfc1001_len(work->response_buf, sizeof(struct smb2_negotiate_rsp) -
                        sizeof(struct smb2_hdr) - sizeof(rsp->Buffer) +
                         AUTH_GSS_LENGTH);
        rsp->SecurityMode = SMB2_NEGOTIATE_SIGNING_ENABLED_LE;
@@ -1278,7 +1276,7 @@ static int ntlm_negotiate(struct ksmbd_work *work,
                          struct negotiate_message *negblob,
                          size_t negblob_len)
 {
-       struct smb2_sess_setup_rsp *rsp = work->response_buf;
+       struct smb2_sess_setup_rsp *rsp = smb2_get_msg(work->response_buf);
        struct challenge_message *chgblob;
        unsigned char *spnego_blob = NULL;
        u16 spnego_blob_len;
@@ -1386,8 +1384,8 @@ static struct ksmbd_user *session_user(struct ksmbd_conn *conn,
 
 static int ntlm_authenticate(struct ksmbd_work *work)
 {
-       struct smb2_sess_setup_req *req = work->request_buf;
-       struct smb2_sess_setup_rsp *rsp = work->response_buf;
+       struct smb2_sess_setup_req *req = smb2_get_msg(work->request_buf);
+       struct smb2_sess_setup_rsp *rsp = smb2_get_msg(work->response_buf);
        struct ksmbd_conn *conn = work->conn;
        struct ksmbd_session *sess = work->sess;
        struct channel *chann = NULL;
@@ -1410,7 +1408,7 @@ static int ntlm_authenticate(struct ksmbd_work *work)
                memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len);
                rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len);
                kfree(spnego_blob);
-               inc_rfc1001_len(rsp, spnego_blob_len - 1);
+               inc_rfc1001_len(work->response_buf, spnego_blob_len - 1);
        }
 
        user = session_user(conn, req);
@@ -1522,8 +1520,8 @@ binding_session:
 #ifdef CONFIG_SMB_SERVER_KERBEROS5
 static int krb5_authenticate(struct ksmbd_work *work)
 {
-       struct smb2_sess_setup_req *req = work->request_buf;
-       struct smb2_sess_setup_rsp *rsp = work->response_buf;
+       struct smb2_sess_setup_req *req = smb2_get_msg(work->request_buf);
+       struct smb2_sess_setup_rsp *rsp = smb2_get_msg(work->response_buf);
        struct ksmbd_conn *conn = work->conn;
        struct ksmbd_session *sess = work->sess;
        char *in_blob, *out_blob;
@@ -1538,8 +1536,7 @@ static int krb5_authenticate(struct ksmbd_work *work)
        out_blob = (char *)&rsp->hdr.ProtocolId +
                le16_to_cpu(rsp->SecurityBufferOffset);
        out_len = work->response_sz -
-               offsetof(struct smb2_hdr, smb2_buf_length) -
-               le16_to_cpu(rsp->SecurityBufferOffset);
+               (le16_to_cpu(rsp->SecurityBufferOffset) + 4);
 
        /* Check previous session */
        prev_sess_id = le64_to_cpu(req->PreviousSessionId);
@@ -1556,7 +1553,7 @@ static int krb5_authenticate(struct ksmbd_work *work)
                return -EINVAL;
        }
        rsp->SecurityBufferLength = cpu_to_le16(out_len);
-       inc_rfc1001_len(rsp, out_len - 1);
+       inc_rfc1001_len(work->response_buf, out_len - 1);
 
        if ((conn->sign || server_conf.enforced_signing) ||
            (req->SecurityMode & SMB2_NEGOTIATE_SIGNING_REQUIRED))
@@ -1612,8 +1609,8 @@ static int krb5_authenticate(struct ksmbd_work *work)
 int smb2_sess_setup(struct ksmbd_work *work)
 {
        struct ksmbd_conn *conn = work->conn;
-       struct smb2_sess_setup_req *req = work->request_buf;
-       struct smb2_sess_setup_rsp *rsp = work->response_buf;
+       struct smb2_sess_setup_req *req = smb2_get_msg(work->request_buf);
+       struct smb2_sess_setup_rsp *rsp = smb2_get_msg(work->response_buf);
        struct ksmbd_session *sess;
        struct negotiate_message *negblob;
        unsigned int negblob_len, negblob_off;
@@ -1625,7 +1622,7 @@ int smb2_sess_setup(struct ksmbd_work *work)
        rsp->SessionFlags = 0;
        rsp->SecurityBufferOffset = cpu_to_le16(72);
        rsp->SecurityBufferLength = 0;
-       inc_rfc1001_len(rsp, 9);
+       inc_rfc1001_len(work->response_buf, 9);
 
        if (!req->hdr.SessionId) {
                sess = ksmbd_smb2_session_create();
@@ -1699,7 +1696,7 @@ int smb2_sess_setup(struct ksmbd_work *work)
 
        negblob_off = le16_to_cpu(req->SecurityBufferOffset);
        negblob_len = le16_to_cpu(req->SecurityBufferLength);
-       if (negblob_off < (offsetof(struct smb2_sess_setup_req, Buffer) - 4) ||
+       if (negblob_off < offsetof(struct smb2_sess_setup_req, Buffer) ||
            negblob_len < offsetof(struct negotiate_message, NegotiateFlags))
                return -EINVAL;
 
@@ -1739,7 +1736,8 @@ int smb2_sess_setup(struct ksmbd_work *work)
                                 * Note: here total size -1 is done as an
                                 * adjustment for 0 size blob
                                 */
-                               inc_rfc1001_len(rsp, le16_to_cpu(rsp->SecurityBufferLength) - 1);
+                               inc_rfc1001_len(work->response_buf,
+                                               le16_to_cpu(rsp->SecurityBufferLength) - 1);
 
                        } else if (negblob->MessageType == NtLmAuthenticate) {
                                rc = ntlm_authenticate(work);
@@ -1828,8 +1826,8 @@ out_err:
 int smb2_tree_connect(struct ksmbd_work *work)
 {
        struct ksmbd_conn *conn = work->conn;
-       struct smb2_tree_connect_req *req = work->request_buf;
-       struct smb2_tree_connect_rsp *rsp = work->response_buf;
+       struct smb2_tree_connect_req *req = smb2_get_msg(work->request_buf);
+       struct smb2_tree_connect_rsp *rsp = smb2_get_msg(work->response_buf);
        struct ksmbd_session *sess = work->sess;
        char *treename = NULL, *name = NULL;
        struct ksmbd_tree_conn_status status;
@@ -1894,7 +1892,7 @@ out_err1:
        rsp->Reserved = 0;
        /* default manual caching */
        rsp->ShareFlags = SMB2_SHAREFLAG_MANUAL_CACHING;
-       inc_rfc1001_len(rsp, 16);
+       inc_rfc1001_len(work->response_buf, 16);
 
        if (!IS_ERR(treename))
                kfree(treename);
@@ -1999,17 +1997,18 @@ static int smb2_create_open_flags(bool file_present, __le32 access,
  */
 int smb2_tree_disconnect(struct ksmbd_work *work)
 {
-       struct smb2_tree_disconnect_rsp *rsp = work->response_buf;
+       struct smb2_tree_disconnect_rsp *rsp = smb2_get_msg(work->response_buf);
        struct ksmbd_session *sess = work->sess;
        struct ksmbd_tree_connect *tcon = work->tcon;
 
        rsp->StructureSize = cpu_to_le16(4);
-       inc_rfc1001_len(rsp, 4);
+       inc_rfc1001_len(work->response_buf, 4);
 
        ksmbd_debug(SMB, "request\n");
 
        if (!tcon) {
-               struct smb2_tree_disconnect_req *req = work->request_buf;
+               struct smb2_tree_disconnect_req *req =
+                       smb2_get_msg(work->request_buf);
 
                ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId);
                rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED;
@@ -2031,11 +2030,11 @@ int smb2_tree_disconnect(struct ksmbd_work *work)
 int smb2_session_logoff(struct ksmbd_work *work)
 {
        struct ksmbd_conn *conn = work->conn;
-       struct smb2_logoff_rsp *rsp = work->response_buf;
+       struct smb2_logoff_rsp *rsp = smb2_get_msg(work->response_buf);
        struct ksmbd_session *sess = work->sess;
 
        rsp->StructureSize = cpu_to_le16(4);
-       inc_rfc1001_len(rsp, 4);
+       inc_rfc1001_len(work->response_buf, 4);
 
        ksmbd_debug(SMB, "request\n");
 
@@ -2048,7 +2047,7 @@ int smb2_session_logoff(struct ksmbd_work *work)
        ksmbd_conn_wait_idle(conn);
 
        if (ksmbd_tree_conn_session_logoff(sess)) {
-               struct smb2_logoff_req *req = work->request_buf;
+               struct smb2_logoff_req *req = smb2_get_msg(work->request_buf);
 
                ksmbd_debug(SMB, "Invalid tid %d\n", req->hdr.Id.SyncId.TreeId);
                rsp->hdr.Status = STATUS_NETWORK_NAME_DELETED;
@@ -2075,8 +2074,8 @@ int smb2_session_logoff(struct ksmbd_work *work)
  */
 static noinline int create_smb2_pipe(struct ksmbd_work *work)
 {
-       struct smb2_create_rsp *rsp = work->response_buf;
-       struct smb2_create_req *req = work->request_buf;
+       struct smb2_create_rsp *rsp = smb2_get_msg(work->response_buf);
+       struct smb2_create_req *req = smb2_get_msg(work->request_buf);
        int id;
        int err;
        char *name;
@@ -2099,7 +2098,7 @@ static noinline int create_smb2_pipe(struct ksmbd_work *work)
        rsp->hdr.Status = STATUS_SUCCESS;
        rsp->StructureSize = cpu_to_le16(89);
        rsp->OplockLevel = SMB2_OPLOCK_LEVEL_NONE;
-       rsp->Reserved = 0;
+       rsp->Flags = 0;
        rsp->CreateAction = cpu_to_le32(FILE_OPENED);
 
        rsp->CreationTime = cpu_to_le64(0);
@@ -2107,14 +2106,14 @@ static noinline int create_smb2_pipe(struct ksmbd_work *work)
        rsp->ChangeTime = cpu_to_le64(0);
        rsp->AllocationSize = cpu_to_le64(0);
        rsp->EndofFile = cpu_to_le64(0);
-       rsp->FileAttributes = ATTR_NORMAL_LE;
+       rsp->FileAttributes = FILE_ATTRIBUTE_NORMAL_LE;
        rsp->Reserved2 = 0;
        rsp->VolatileFileId = cpu_to_le64(id);
        rsp->PersistentFileId = 0;
        rsp->CreateContextsOffset = 0;
        rsp->CreateContextsLength = 0;
 
-       inc_rfc1001_len(rsp, 88); /* StructureSize - 1*/
+       inc_rfc1001_len(work->response_buf, 88); /* StructureSize - 1*/
        kfree(name);
        return 0;
 
@@ -2353,7 +2352,7 @@ static void smb2_update_xattrs(struct ksmbd_tree_connect *tcon,
        struct xattr_dos_attrib da;
        int rc;
 
-       fp->f_ci->m_fattr &= ~(ATTR_HIDDEN_LE | ATTR_SYSTEM_LE);
+       fp->f_ci->m_fattr &= ~(FILE_ATTRIBUTE_HIDDEN_LE | FILE_ATTRIBUTE_SYSTEM_LE);
 
        /* get FileAttributes from XATTR_NAME_DOS_ATTRIBUTE */
        if (!test_share_config_flag(tcon->share_conf,
@@ -2463,7 +2462,7 @@ int smb2_open(struct ksmbd_work *work)
        struct ksmbd_session *sess = work->sess;
        struct ksmbd_tree_connect *tcon = work->tcon;
        struct smb2_create_req *req;
-       struct smb2_create_rsp *rsp, *rsp_org;
+       struct smb2_create_rsp *rsp;
        struct path path;
        struct ksmbd_share_config *share = tcon->share_conf;
        struct ksmbd_file *fp = NULL;
@@ -2489,7 +2488,6 @@ int smb2_open(struct ksmbd_work *work)
        umode_t posix_mode = 0;
        __le32 daccess, maximal_access = 0;
 
-       rsp_org = work->response_buf;
        WORK_BUFFERS(work, req, rsp);
 
        if (req->hdr.NextCommand && !work->next_smb2_rcv_hdr_off &&
@@ -2559,7 +2557,7 @@ int smb2_open(struct ksmbd_work *work)
        if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE)
                lc = parse_lease_state(req);
 
-       if (le32_to_cpu(req->ImpersonationLevel) > le32_to_cpu(IL_DELEGATE_LE)) {
+       if (le32_to_cpu(req->ImpersonationLevel) > le32_to_cpu(IL_DELEGATE)) {
                pr_err("Invalid impersonationlevel : 0x%x\n",
                       le32_to_cpu(req->ImpersonationLevel));
                rc = -EIO;
@@ -2567,7 +2565,7 @@ int smb2_open(struct ksmbd_work *work)
                goto err_out1;
        }
 
-       if (req->CreateOptions && !(req->CreateOptions & CREATE_OPTIONS_MASK)) {
+       if (req->CreateOptions && !(req->CreateOptions & CREATE_OPTIONS_MASK_LE)) {
                pr_err("Invalid create options : 0x%x\n",
                       le32_to_cpu(req->CreateOptions));
                rc = -EINVAL;
@@ -2609,7 +2607,7 @@ int smb2_open(struct ksmbd_work *work)
                goto err_out1;
        }
 
-       if (req->FileAttributes && !(req->FileAttributes & ATTR_MASK_LE)) {
+       if (req->FileAttributes && !(req->FileAttributes & FILE_ATTRIBUTE_MASK_LE)) {
                pr_err("Invalid file attribute : 0x%x\n",
                       le32_to_cpu(req->FileAttributes));
                rc = -EINVAL;
@@ -2740,7 +2738,7 @@ int smb2_open(struct ksmbd_work *work)
                }
 
                if (req->CreateOptions & FILE_DIRECTORY_FILE_LE &&
-                   req->FileAttributes & ATTR_NORMAL_LE) {
+                   req->FileAttributes & FILE_ATTRIBUTE_NORMAL_LE) {
                        rsp->hdr.Status = STATUS_NOT_A_DIRECTORY;
                        rc = -EIO;
                }
@@ -3119,7 +3117,7 @@ int smb2_open(struct ksmbd_work *work)
        opinfo = rcu_dereference(fp->f_opinfo);
        rsp->OplockLevel = opinfo != NULL ? opinfo->level : 0;
        rcu_read_unlock();
-       rsp->Reserved = 0;
+       rsp->Flags = 0;
        rsp->CreateAction = cpu_to_le32(file_info);
        rsp->CreationTime = cpu_to_le64(fp->create_time);
        time = ksmbd_UnixTimeToNT(stat.atime);
@@ -3140,7 +3138,7 @@ int smb2_open(struct ksmbd_work *work)
 
        rsp->CreateContextsOffset = 0;
        rsp->CreateContextsLength = 0;
-       inc_rfc1001_len(rsp_org, 88); /* StructureSize - 1*/
+       inc_rfc1001_len(work->response_buf, 88); /* StructureSize - 1*/
 
        /* If lease is request send lease context response */
        if (opinfo && opinfo->is_lease) {
@@ -3155,7 +3153,8 @@ int smb2_open(struct ksmbd_work *work)
                create_lease_buf(rsp->Buffer, opinfo->o_lease);
                le32_add_cpu(&rsp->CreateContextsLength,
                             conn->vals->create_lease_size);
-               inc_rfc1001_len(rsp_org, conn->vals->create_lease_size);
+               inc_rfc1001_len(work->response_buf,
+                               conn->vals->create_lease_size);
                next_ptr = &lease_ccontext->Next;
                next_off = conn->vals->create_lease_size;
        }
@@ -3175,7 +3174,8 @@ int smb2_open(struct ksmbd_work *work)
                                le32_to_cpu(maximal_access));
                le32_add_cpu(&rsp->CreateContextsLength,
                             conn->vals->create_mxac_size);
-               inc_rfc1001_len(rsp_org, conn->vals->create_mxac_size);
+               inc_rfc1001_len(work->response_buf,
+                               conn->vals->create_mxac_size);
                if (next_ptr)
                        *next_ptr = cpu_to_le32(next_off);
                next_ptr = &mxac_ccontext->Next;
@@ -3193,7 +3193,8 @@ int smb2_open(struct ksmbd_work *work)
                                stat.ino, tcon->id);
                le32_add_cpu(&rsp->CreateContextsLength,
                             conn->vals->create_disk_id_size);
-               inc_rfc1001_len(rsp_org, conn->vals->create_disk_id_size);
+               inc_rfc1001_len(work->response_buf,
+                               conn->vals->create_disk_id_size);
                if (next_ptr)
                        *next_ptr = cpu_to_le32(next_off);
                next_ptr = &disk_id_ccontext->Next;
@@ -3207,15 +3208,15 @@ int smb2_open(struct ksmbd_work *work)
                                fp);
                le32_add_cpu(&rsp->CreateContextsLength,
                             conn->vals->create_posix_size);
-               inc_rfc1001_len(rsp_org, conn->vals->create_posix_size);
+               inc_rfc1001_len(work->response_buf,
+                               conn->vals->create_posix_size);
                if (next_ptr)
                        *next_ptr = cpu_to_le32(next_off);
        }
 
        if (contxt_cnt > 0) {
                rsp->CreateContextsOffset =
-                       cpu_to_le32(offsetof(struct smb2_create_rsp, Buffer)
-                       - 4);
+                       cpu_to_le32(offsetof(struct smb2_create_rsp, Buffer));
        }
 
 err_out:
@@ -3422,9 +3423,9 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
                ffdinfo->EaSize =
                        smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
                if (ffdinfo->EaSize)
-                       ffdinfo->ExtFileAttributes = ATTR_REPARSE_POINT_LE;
+                       ffdinfo->ExtFileAttributes = FILE_ATTRIBUTE_REPARSE_POINT_LE;
                if (d_info->hide_dot_file && d_info->name[0] == '.')
-                       ffdinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+                       ffdinfo->ExtFileAttributes |= FILE_ATTRIBUTE_HIDDEN_LE;
                memcpy(ffdinfo->FileName, conv_name, conv_len);
                ffdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
                break;
@@ -3438,11 +3439,11 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
                fbdinfo->EaSize =
                        smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
                if (fbdinfo->EaSize)
-                       fbdinfo->ExtFileAttributes = ATTR_REPARSE_POINT_LE;
+                       fbdinfo->ExtFileAttributes = FILE_ATTRIBUTE_REPARSE_POINT_LE;
                fbdinfo->ShortNameLength = 0;
                fbdinfo->Reserved = 0;
                if (d_info->hide_dot_file && d_info->name[0] == '.')
-                       fbdinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+                       fbdinfo->ExtFileAttributes |= FILE_ATTRIBUTE_HIDDEN_LE;
                memcpy(fbdinfo->FileName, conv_name, conv_len);
                fbdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
                break;
@@ -3454,7 +3455,7 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
                fdinfo = (struct file_directory_info *)kstat;
                fdinfo->FileNameLength = cpu_to_le32(conv_len);
                if (d_info->hide_dot_file && d_info->name[0] == '.')
-                       fdinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+                       fdinfo->ExtFileAttributes |= FILE_ATTRIBUTE_HIDDEN_LE;
                memcpy(fdinfo->FileName, conv_name, conv_len);
                fdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
                break;
@@ -3478,11 +3479,11 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
                dinfo->EaSize =
                        smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
                if (dinfo->EaSize)
-                       dinfo->ExtFileAttributes = ATTR_REPARSE_POINT_LE;
+                       dinfo->ExtFileAttributes = FILE_ATTRIBUTE_REPARSE_POINT_LE;
                dinfo->Reserved = 0;
                dinfo->UniqueId = cpu_to_le64(ksmbd_kstat->kstat->ino);
                if (d_info->hide_dot_file && d_info->name[0] == '.')
-                       dinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+                       dinfo->ExtFileAttributes |= FILE_ATTRIBUTE_HIDDEN_LE;
                memcpy(dinfo->FileName, conv_name, conv_len);
                dinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
                break;
@@ -3496,13 +3497,13 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
                fibdinfo->EaSize =
                        smb2_get_reparse_tag_special_file(ksmbd_kstat->kstat->mode);
                if (fibdinfo->EaSize)
-                       fibdinfo->ExtFileAttributes = ATTR_REPARSE_POINT_LE;
+                       fibdinfo->ExtFileAttributes = FILE_ATTRIBUTE_REPARSE_POINT_LE;
                fibdinfo->UniqueId = cpu_to_le64(ksmbd_kstat->kstat->ino);
                fibdinfo->ShortNameLength = 0;
                fibdinfo->Reserved = 0;
                fibdinfo->Reserved2 = cpu_to_le16(0);
                if (d_info->hide_dot_file && d_info->name[0] == '.')
-                       fibdinfo->ExtFileAttributes |= ATTR_HIDDEN_LE;
+                       fibdinfo->ExtFileAttributes |= FILE_ATTRIBUTE_HIDDEN_LE;
                memcpy(fibdinfo->FileName, conv_name, conv_len);
                fibdinfo->NextEntryOffset = cpu_to_le32(next_entry_offset);
                break;
@@ -3528,9 +3529,10 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
                posix_info->Mode = cpu_to_le32(ksmbd_kstat->kstat->mode);
                posix_info->Inode = cpu_to_le64(ksmbd_kstat->kstat->ino);
                posix_info->DosAttributes =
-                       S_ISDIR(ksmbd_kstat->kstat->mode) ? ATTR_DIRECTORY_LE : ATTR_ARCHIVE_LE;
+                       S_ISDIR(ksmbd_kstat->kstat->mode) ?
+                               FILE_ATTRIBUTE_DIRECTORY_LE : FILE_ATTRIBUTE_ARCHIVE_LE;
                if (d_info->hide_dot_file && d_info->name[0] == '.')
-                       posix_info->DosAttributes |= ATTR_HIDDEN_LE;
+                       posix_info->DosAttributes |= FILE_ATTRIBUTE_HIDDEN_LE;
                id_to_sid(from_kuid_munged(&init_user_ns, ksmbd_kstat->kstat->uid),
                          SIDNFS_USER, (struct smb_sid *)&posix_info->SidBuffer[0]);
                id_to_sid(from_kgid_munged(&init_user_ns, ksmbd_kstat->kstat->gid),
@@ -3816,7 +3818,7 @@ int smb2_query_dir(struct ksmbd_work *work)
 {
        struct ksmbd_conn *conn = work->conn;
        struct smb2_query_directory_req *req;
-       struct smb2_query_directory_rsp *rsp, *rsp_org;
+       struct smb2_query_directory_rsp *rsp;
        struct ksmbd_share_config *share = work->tcon->share_conf;
        struct ksmbd_file *dir_fp = NULL;
        struct ksmbd_dir_info d_info;
@@ -3826,7 +3828,6 @@ int smb2_query_dir(struct ksmbd_work *work)
        int buffer_sz;
        struct smb2_query_dir_private query_dir_private = {NULL, };
 
-       rsp_org = work->response_buf;
        WORK_BUFFERS(work, req, rsp);
 
        if (ksmbd_override_fsids(work)) {
@@ -3947,7 +3948,7 @@ int smb2_query_dir(struct ksmbd_work *work)
                rsp->OutputBufferOffset = cpu_to_le16(0);
                rsp->OutputBufferLength = cpu_to_le32(0);
                rsp->Buffer[0] = 0;
-               inc_rfc1001_len(rsp_org, 9);
+               inc_rfc1001_len(work->response_buf, 9);
        } else {
                ((struct file_directory_info *)
                ((char *)rsp->Buffer + d_info.last_entry_offset))
@@ -3956,7 +3957,7 @@ int smb2_query_dir(struct ksmbd_work *work)
                rsp->StructureSize = cpu_to_le16(9);
                rsp->OutputBufferOffset = cpu_to_le16(72);
                rsp->OutputBufferLength = cpu_to_le32(d_info.data_count);
-               inc_rfc1001_len(rsp_org, 8 + d_info.data_count);
+               inc_rfc1001_len(work->response_buf, 8 + d_info.data_count);
        }
 
        kfree(srch_ptr);
@@ -3999,26 +4000,28 @@ err_out2:
  * Return:     0 on success, otherwise error
  */
 static int buffer_check_err(int reqOutputBufferLength,
-                           struct smb2_query_info_rsp *rsp, int infoclass_size)
+                           struct smb2_query_info_rsp *rsp,
+                           void *rsp_org, int infoclass_size)
 {
        if (reqOutputBufferLength < le32_to_cpu(rsp->OutputBufferLength)) {
                if (reqOutputBufferLength < infoclass_size) {
                        pr_err("Invalid Buffer Size Requested\n");
                        rsp->hdr.Status = STATUS_INFO_LENGTH_MISMATCH;
-                       rsp->hdr.smb2_buf_length = cpu_to_be32(sizeof(struct smb2_hdr) - 4);
+                       *(__be32 *)rsp_org = cpu_to_be32(sizeof(struct smb2_hdr));
                        return -EINVAL;
                }
 
                ksmbd_debug(SMB, "Buffer Overflow\n");
                rsp->hdr.Status = STATUS_BUFFER_OVERFLOW;
-               rsp->hdr.smb2_buf_length = cpu_to_be32(sizeof(struct smb2_hdr) - 4 +
+               *(__be32 *)rsp_org = cpu_to_be32(sizeof(struct smb2_hdr) +
                                reqOutputBufferLength);
                rsp->OutputBufferLength = cpu_to_le32(reqOutputBufferLength);
        }
        return 0;
 }
 
-static void get_standard_info_pipe(struct smb2_query_info_rsp *rsp)
+static void get_standard_info_pipe(struct smb2_query_info_rsp *rsp,
+                                  void *rsp_org)
 {
        struct smb2_file_standard_info *sinfo;
 
@@ -4031,10 +4034,11 @@ static void get_standard_info_pipe(struct smb2_query_info_rsp *rsp)
        sinfo->Directory = 0;
        rsp->OutputBufferLength =
                cpu_to_le32(sizeof(struct smb2_file_standard_info));
-       inc_rfc1001_len(rsp, sizeof(struct smb2_file_standard_info));
+       inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_standard_info));
 }
 
-static void get_internal_info_pipe(struct smb2_query_info_rsp *rsp, u64 num)
+static void get_internal_info_pipe(struct smb2_query_info_rsp *rsp, u64 num,
+                                  void *rsp_org)
 {
        struct smb2_file_internal_info *file_info;
 
@@ -4044,12 +4048,13 @@ static void get_internal_info_pipe(struct smb2_query_info_rsp *rsp, u64 num)
        file_info->IndexNumber = cpu_to_le64(num | (1ULL << 63));
        rsp->OutputBufferLength =
                cpu_to_le32(sizeof(struct smb2_file_internal_info));
-       inc_rfc1001_len(rsp, sizeof(struct smb2_file_internal_info));
+       inc_rfc1001_len(rsp_org, sizeof(struct smb2_file_internal_info));
 }
 
 static int smb2_get_info_file_pipe(struct ksmbd_session *sess,
                                   struct smb2_query_info_req *req,
-                                  struct smb2_query_info_rsp *rsp)
+                                  struct smb2_query_info_rsp *rsp,
+                                  void *rsp_org)
 {
        u64 id;
        int rc;
@@ -4067,14 +4072,16 @@ static int smb2_get_info_file_pipe(struct ksmbd_session *sess,
 
        switch (req->FileInfoClass) {
        case FILE_STANDARD_INFORMATION:
-               get_standard_info_pipe(rsp);
+               get_standard_info_pipe(rsp, rsp_org);
                rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
-                                     rsp, FILE_STANDARD_INFORMATION_SIZE);
+                                     rsp, rsp_org,
+                                     FILE_STANDARD_INFORMATION_SIZE);
                break;
        case FILE_INTERNAL_INFORMATION:
-               get_internal_info_pipe(rsp, id);
+               get_internal_info_pipe(rsp, id, rsp_org);
                rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
-                                     rsp, FILE_INTERNAL_INFORMATION_SIZE);
+                                     rsp, rsp_org,
+                                     FILE_INTERNAL_INFORMATION_SIZE);
                break;
        default:
                ksmbd_debug(SMB, "smb2_info_file_pipe for %u not supported\n",
@@ -4688,7 +4695,7 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp,
 
 static int smb2_get_info_file(struct ksmbd_work *work,
                              struct smb2_query_info_req *req,
-                             struct smb2_query_info_rsp *rsp, void *rsp_org)
+                             struct smb2_query_info_rsp *rsp)
 {
        struct ksmbd_file *fp;
        int fileinfoclass = 0;
@@ -4699,7 +4706,8 @@ static int smb2_get_info_file(struct ksmbd_work *work,
        if (test_share_config_flag(work->tcon->share_conf,
                                   KSMBD_SHARE_FLAG_PIPE)) {
                /* smb2 info file called for pipe */
-               return smb2_get_info_file_pipe(work->sess, req, rsp);
+               return smb2_get_info_file_pipe(work->sess, req, rsp,
+                                              work->response_buf);
        }
 
        if (work->next_smb2_rcv_hdr_off) {
@@ -4724,77 +4732,77 @@ static int smb2_get_info_file(struct ksmbd_work *work,
 
        switch (fileinfoclass) {
        case FILE_ACCESS_INFORMATION:
-               get_file_access_info(rsp, fp, rsp_org);
+               get_file_access_info(rsp, fp, work->response_buf);
                file_infoclass_size = FILE_ACCESS_INFORMATION_SIZE;
                break;
 
        case FILE_BASIC_INFORMATION:
-               rc = get_file_basic_info(rsp, fp, rsp_org);
+               rc = get_file_basic_info(rsp, fp, work->response_buf);
                file_infoclass_size = FILE_BASIC_INFORMATION_SIZE;
                break;
 
        case FILE_STANDARD_INFORMATION:
-               get_file_standard_info(rsp, fp, rsp_org);
+               get_file_standard_info(rsp, fp, work->response_buf);
                file_infoclass_size = FILE_STANDARD_INFORMATION_SIZE;
                break;
 
        case FILE_ALIGNMENT_INFORMATION:
-               get_file_alignment_info(rsp, rsp_org);
+               get_file_alignment_info(rsp, work->response_buf);
                file_infoclass_size = FILE_ALIGNMENT_INFORMATION_SIZE;
                break;
 
        case FILE_ALL_INFORMATION:
-               rc = get_file_all_info(work, rsp, fp, rsp_org);
+               rc = get_file_all_info(work, rsp, fp, work->response_buf);
                file_infoclass_size = FILE_ALL_INFORMATION_SIZE;
                break;
 
        case FILE_ALTERNATE_NAME_INFORMATION:
-               get_file_alternate_info(work, rsp, fp, rsp_org);
+               get_file_alternate_info(work, rsp, fp, work->response_buf);
                file_infoclass_size = FILE_ALTERNATE_NAME_INFORMATION_SIZE;
                break;
 
        case FILE_STREAM_INFORMATION:
-               get_file_stream_info(work, rsp, fp, rsp_org);
+               get_file_stream_info(work, rsp, fp, work->response_buf);
                file_infoclass_size = FILE_STREAM_INFORMATION_SIZE;
                break;
 
        case FILE_INTERNAL_INFORMATION:
-               get_file_internal_info(rsp, fp, rsp_org);
+               get_file_internal_info(rsp, fp, work->response_buf);
                file_infoclass_size = FILE_INTERNAL_INFORMATION_SIZE;
                break;
 
        case FILE_NETWORK_OPEN_INFORMATION:
-               rc = get_file_network_open_info(rsp, fp, rsp_org);
+               rc = get_file_network_open_info(rsp, fp, work->response_buf);
                file_infoclass_size = FILE_NETWORK_OPEN_INFORMATION_SIZE;
                break;
 
        case FILE_EA_INFORMATION:
-               get_file_ea_info(rsp, rsp_org);
+               get_file_ea_info(rsp, work->response_buf);
                file_infoclass_size = FILE_EA_INFORMATION_SIZE;
                break;
 
        case FILE_FULL_EA_INFORMATION:
-               rc = smb2_get_ea(work, fp, req, rsp, rsp_org);
+               rc = smb2_get_ea(work, fp, req, rsp, work->response_buf);
                file_infoclass_size = FILE_FULL_EA_INFORMATION_SIZE;
                break;
 
        case FILE_POSITION_INFORMATION:
-               get_file_position_info(rsp, fp, rsp_org);
+               get_file_position_info(rsp, fp, work->response_buf);
                file_infoclass_size = FILE_POSITION_INFORMATION_SIZE;
                break;
 
        case FILE_MODE_INFORMATION:
-               get_file_mode_info(rsp, fp, rsp_org);
+               get_file_mode_info(rsp, fp, work->response_buf);
                file_infoclass_size = FILE_MODE_INFORMATION_SIZE;
                break;
 
        case FILE_COMPRESSION_INFORMATION:
-               get_file_compression_info(rsp, fp, rsp_org);
+               get_file_compression_info(rsp, fp, work->response_buf);
                file_infoclass_size = FILE_COMPRESSION_INFORMATION_SIZE;
                break;
 
        case FILE_ATTRIBUTE_TAG_INFORMATION:
-               rc = get_file_attribute_tag_info(rsp, fp, rsp_org);
+               rc = get_file_attribute_tag_info(rsp, fp, work->response_buf);
                file_infoclass_size = FILE_ATTRIBUTE_TAG_INFORMATION_SIZE;
                break;
        case SMB_FIND_FILE_POSIX_INFO:
@@ -4802,7 +4810,7 @@ static int smb2_get_info_file(struct ksmbd_work *work,
                        pr_err("client doesn't negotiate with SMB3.1.1 POSIX Extensions\n");
                        rc = -EOPNOTSUPP;
                } else {
-                       rc = find_file_posix_info(rsp, fp, rsp_org);
+                       rc = find_file_posix_info(rsp, fp, work->response_buf);
                        file_infoclass_size = sizeof(struct smb311_posix_qinfo);
                }
                break;
@@ -4813,7 +4821,7 @@ static int smb2_get_info_file(struct ksmbd_work *work,
        }
        if (!rc)
                rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
-                                     rsp,
+                                     rsp, work->response_buf,
                                      file_infoclass_size);
        ksmbd_fd_put(work, fp);
        return rc;
@@ -4821,7 +4829,7 @@ static int smb2_get_info_file(struct ksmbd_work *work,
 
 static int smb2_get_info_filesystem(struct ksmbd_work *work,
                                    struct smb2_query_info_req *req,
-                                   struct smb2_query_info_rsp *rsp, void *rsp_org)
+                                   struct smb2_query_info_rsp *rsp)
 {
        struct ksmbd_session *sess = work->sess;
        struct ksmbd_conn *conn = sess->conn;
@@ -4857,7 +4865,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
                info->DeviceType = cpu_to_le32(stfs.f_type);
                info->DeviceCharacteristics = cpu_to_le32(0x00000020);
                rsp->OutputBufferLength = cpu_to_le32(8);
-               inc_rfc1001_len(rsp_org, 8);
+               inc_rfc1001_len(work->response_buf, 8);
                fs_infoclass_size = FS_DEVICE_INFORMATION_SIZE;
                break;
        }
@@ -4883,7 +4891,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
                info->FileSystemNameLen = cpu_to_le32(len);
                sz = sizeof(struct filesystem_attribute_info) - 2 + len;
                rsp->OutputBufferLength = cpu_to_le32(sz);
-               inc_rfc1001_len(rsp_org, sz);
+               inc_rfc1001_len(work->response_buf, sz);
                fs_infoclass_size = FS_ATTRIBUTE_INFORMATION_SIZE;
                break;
        }
@@ -4891,11 +4899,18 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
        {
                struct filesystem_vol_info *info;
                size_t sz;
+               unsigned int serial_crc = 0;
 
                info = (struct filesystem_vol_info *)(rsp->Buffer);
                info->VolumeCreationTime = 0;
+               serial_crc = crc32_le(serial_crc, share->name,
+                                     strlen(share->name));
+               serial_crc = crc32_le(serial_crc, share->path,
+                                     strlen(share->path));
+               serial_crc = crc32_le(serial_crc, ksmbd_netbios_name(),
+                                     strlen(ksmbd_netbios_name()));
                /* Taking dummy value of serial number*/
-               info->SerialNumber = cpu_to_le32(0xbc3ac512);
+               info->SerialNumber = cpu_to_le32(serial_crc);
                len = smbConvertToUTF16((__le16 *)info->VolumeLabel,
                                        share->name, PATH_MAX,
                                        conn->local_nls, 0);
@@ -4904,7 +4919,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
                info->Reserved = 0;
                sz = sizeof(struct filesystem_vol_info) - 2 + len;
                rsp->OutputBufferLength = cpu_to_le32(sz);
-               inc_rfc1001_len(rsp_org, sz);
+               inc_rfc1001_len(work->response_buf, sz);
                fs_infoclass_size = FS_VOLUME_INFORMATION_SIZE;
                break;
        }
@@ -4918,7 +4933,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
                info->SectorsPerAllocationUnit = cpu_to_le32(1);
                info->BytesPerSector = cpu_to_le32(stfs.f_bsize);
                rsp->OutputBufferLength = cpu_to_le32(24);
-               inc_rfc1001_len(rsp_org, 24);
+               inc_rfc1001_len(work->response_buf, 24);
                fs_infoclass_size = FS_SIZE_INFORMATION_SIZE;
                break;
        }
@@ -4935,7 +4950,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
                info->SectorsPerAllocationUnit = cpu_to_le32(1);
                info->BytesPerSector = cpu_to_le32(stfs.f_bsize);
                rsp->OutputBufferLength = cpu_to_le32(32);
-               inc_rfc1001_len(rsp_org, 32);
+               inc_rfc1001_len(work->response_buf, 32);
                fs_infoclass_size = FS_FULL_SIZE_INFORMATION_SIZE;
                break;
        }
@@ -4956,7 +4971,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
                info->extended_info.rel_date = 0;
                memcpy(info->extended_info.version_string, "1.1.0", strlen("1.1.0"));
                rsp->OutputBufferLength = cpu_to_le32(64);
-               inc_rfc1001_len(rsp_org, 64);
+               inc_rfc1001_len(work->response_buf, 64);
                fs_infoclass_size = FS_OBJECT_ID_INFORMATION_SIZE;
                break;
        }
@@ -4977,7 +4992,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
                info->ByteOffsetForSectorAlignment = 0;
                info->ByteOffsetForPartitionAlignment = 0;
                rsp->OutputBufferLength = cpu_to_le32(28);
-               inc_rfc1001_len(rsp_org, 28);
+               inc_rfc1001_len(work->response_buf, 28);
                fs_infoclass_size = FS_SECTOR_SIZE_INFORMATION_SIZE;
                break;
        }
@@ -4999,7 +5014,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
                info->DefaultQuotaLimit = cpu_to_le64(SMB2_NO_FID);
                info->Padding = 0;
                rsp->OutputBufferLength = cpu_to_le32(48);
-               inc_rfc1001_len(rsp_org, 48);
+               inc_rfc1001_len(work->response_buf, 48);
                fs_infoclass_size = FS_CONTROL_INFORMATION_SIZE;
                break;
        }
@@ -5020,7 +5035,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
                        info->TotalFileNodes = cpu_to_le64(stfs.f_files);
                        info->FreeFileNodes = cpu_to_le64(stfs.f_ffree);
                        rsp->OutputBufferLength = cpu_to_le32(56);
-                       inc_rfc1001_len(rsp_org, 56);
+                       inc_rfc1001_len(work->response_buf, 56);
                        fs_infoclass_size = FS_POSIX_INFORMATION_SIZE;
                }
                break;
@@ -5030,7 +5045,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
                return -EOPNOTSUPP;
        }
        rc = buffer_check_err(le32_to_cpu(req->OutputBufferLength),
-                             rsp,
+                             rsp, work->response_buf,
                              fs_infoclass_size);
        path_put(&path);
        return rc;
@@ -5038,7 +5053,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work,
 
 static int smb2_get_info_sec(struct ksmbd_work *work,
                             struct smb2_query_info_req *req,
-                            struct smb2_query_info_rsp *rsp, void *rsp_org)
+                            struct smb2_query_info_rsp *rsp)
 {
        struct ksmbd_file *fp;
        struct user_namespace *user_ns;
@@ -5065,7 +5080,7 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
 
                secdesclen = sizeof(struct smb_ntsd);
                rsp->OutputBufferLength = cpu_to_le32(secdesclen);
-               inc_rfc1001_len(rsp_org, secdesclen);
+               inc_rfc1001_len(work->response_buf, secdesclen);
 
                return 0;
        }
@@ -5107,7 +5122,7 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
                return rc;
 
        rsp->OutputBufferLength = cpu_to_le32(secdesclen);
-       inc_rfc1001_len(rsp_org, secdesclen);
+       inc_rfc1001_len(work->response_buf, secdesclen);
        return 0;
 }
 
@@ -5120,10 +5135,9 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
 int smb2_query_info(struct ksmbd_work *work)
 {
        struct smb2_query_info_req *req;
-       struct smb2_query_info_rsp *rsp, *rsp_org;
+       struct smb2_query_info_rsp *rsp;
        int rc = 0;
 
-       rsp_org = work->response_buf;
        WORK_BUFFERS(work, req, rsp);
 
        ksmbd_debug(SMB, "GOT query info request\n");
@@ -5131,15 +5145,15 @@ int smb2_query_info(struct ksmbd_work *work)
        switch (req->InfoType) {
        case SMB2_O_INFO_FILE:
                ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILE\n");
-               rc = smb2_get_info_file(work, req, rsp, (void *)rsp_org);
+               rc = smb2_get_info_file(work, req, rsp);
                break;
        case SMB2_O_INFO_FILESYSTEM:
                ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILESYSTEM\n");
-               rc = smb2_get_info_filesystem(work, req, rsp, (void *)rsp_org);
+               rc = smb2_get_info_filesystem(work, req, rsp);
                break;
        case SMB2_O_INFO_SECURITY:
                ksmbd_debug(SMB, "GOT SMB2_O_INFO_SECURITY\n");
-               rc = smb2_get_info_sec(work, req, rsp, (void *)rsp_org);
+               rc = smb2_get_info_sec(work, req, rsp);
                break;
        default:
                ksmbd_debug(SMB, "InfoType %d not supported yet\n",
@@ -5164,7 +5178,7 @@ int smb2_query_info(struct ksmbd_work *work)
        }
        rsp->StructureSize = cpu_to_le16(9);
        rsp->OutputBufferOffset = cpu_to_le16(72);
-       inc_rfc1001_len(rsp_org, 8);
+       inc_rfc1001_len(work->response_buf, 8);
        return 0;
 }
 
@@ -5177,8 +5191,8 @@ int smb2_query_info(struct ksmbd_work *work)
 static noinline int smb2_close_pipe(struct ksmbd_work *work)
 {
        u64 id;
-       struct smb2_close_req *req = work->request_buf;
-       struct smb2_close_rsp *rsp = work->response_buf;
+       struct smb2_close_req *req = smb2_get_msg(work->request_buf);
+       struct smb2_close_rsp *rsp = smb2_get_msg(work->response_buf);
 
        id = le64_to_cpu(req->VolatileFileId);
        ksmbd_session_rpc_close(work->sess, id);
@@ -5193,7 +5207,7 @@ static noinline int smb2_close_pipe(struct ksmbd_work *work)
        rsp->AllocationSize = 0;
        rsp->EndOfFile = 0;
        rsp->Attributes = 0;
-       inc_rfc1001_len(rsp, 60);
+       inc_rfc1001_len(work->response_buf, 60);
        return 0;
 }
 
@@ -5209,14 +5223,12 @@ int smb2_close(struct ksmbd_work *work)
        u64 sess_id;
        struct smb2_close_req *req;
        struct smb2_close_rsp *rsp;
-       struct smb2_close_rsp *rsp_org;
        struct ksmbd_conn *conn = work->conn;
        struct ksmbd_file *fp;
        struct inode *inode;
        u64 time;
        int err = 0;
 
-       rsp_org = work->response_buf;
        WORK_BUFFERS(work, req, rsp);
 
        if (test_share_config_flag(work->tcon->share_conf,
@@ -5306,7 +5318,7 @@ out:
                        rsp->hdr.Status = STATUS_FILE_CLOSED;
                smb2_set_err_rsp(work);
        } else {
-               inc_rfc1001_len(rsp_org, 60);
+               inc_rfc1001_len(work->response_buf, 60);
        }
 
        return 0;
@@ -5320,11 +5332,11 @@ out:
  */
 int smb2_echo(struct ksmbd_work *work)
 {
-       struct smb2_echo_rsp *rsp = work->response_buf;
+       struct smb2_echo_rsp *rsp = smb2_get_msg(work->response_buf);
 
        rsp->StructureSize = cpu_to_le16(4);
        rsp->Reserved = 0;
-       inc_rfc1001_len(rsp, 4);
+       inc_rfc1001_len(work->response_buf, 4);
        return 0;
 }
 
@@ -5566,14 +5578,14 @@ static int set_file_basic_info(struct ksmbd_file *fp,
 
        if (file_info->Attributes) {
                if (!S_ISDIR(inode->i_mode) &&
-                   file_info->Attributes & ATTR_DIRECTORY_LE) {
+                   file_info->Attributes & FILE_ATTRIBUTE_DIRECTORY_LE) {
                        pr_err("can't change a file to a directory\n");
                        return -EINVAL;
                }
 
-               if (!(S_ISDIR(inode->i_mode) && file_info->Attributes == ATTR_NORMAL_LE))
+               if (!(S_ISDIR(inode->i_mode) && file_info->Attributes == FILE_ATTRIBUTE_NORMAL_LE))
                        fp->f_ci->m_fattr = file_info->Attributes |
-                               (fp->f_ci->m_fattr & ATTR_DIRECTORY_LE);
+                               (fp->f_ci->m_fattr & FILE_ATTRIBUTE_DIRECTORY_LE);
        }
 
        if (test_share_config_flag(share, KSMBD_SHARE_FLAG_STORE_DOS_ATTRS) &&
@@ -5794,9 +5806,7 @@ static int set_file_mode_info(struct ksmbd_file *fp,
 
        mode = file_info->Mode;
 
-       if ((mode & ~FILE_MODE_INFO_MASK) ||
-           (mode & FILE_SYNCHRONOUS_IO_ALERT_LE &&
-            mode & FILE_SYNCHRONOUS_IO_NONALERT_LE)) {
+       if ((mode & ~FILE_MODE_INFO_MASK)) {
                pr_err("Mode is not valid : 0x%x\n", le32_to_cpu(mode));
                return -EINVAL;
        }
@@ -5943,14 +5953,13 @@ static int smb2_set_info_sec(struct ksmbd_file *fp, int addition_info,
 int smb2_set_info(struct ksmbd_work *work)
 {
        struct smb2_set_info_req *req;
-       struct smb2_set_info_rsp *rsp, *rsp_org;
+       struct smb2_set_info_rsp *rsp;
        struct ksmbd_file *fp;
        int rc = 0;
        unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
 
        ksmbd_debug(SMB, "Received set info request\n");
 
-       rsp_org = work->response_buf;
        if (work->next_smb2_rcv_hdr_off) {
                req = ksmbd_req_buf_next(work);
                rsp = ksmbd_resp_buf_next(work);
@@ -5961,8 +5970,8 @@ int smb2_set_info(struct ksmbd_work *work)
                        pid = work->compound_pfid;
                }
        } else {
-               req = work->request_buf;
-               rsp = work->response_buf;
+               req = smb2_get_msg(work->request_buf);
+               rsp = smb2_get_msg(work->response_buf);
        }
 
        if (!has_file_id(id)) {
@@ -6002,7 +6011,7 @@ int smb2_set_info(struct ksmbd_work *work)
                goto err_out;
 
        rsp->StructureSize = cpu_to_le16(2);
-       inc_rfc1001_len(rsp_org, 2);
+       inc_rfc1001_len(work->response_buf, 2);
        ksmbd_fd_put(work, fp);
        return 0;
 
@@ -6042,12 +6051,12 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work)
        int nbytes = 0, err;
        u64 id;
        struct ksmbd_rpc_command *rpc_resp;
-       struct smb2_read_req *req = work->request_buf;
-       struct smb2_read_rsp *rsp = work->response_buf;
+       struct smb2_read_req *req = smb2_get_msg(work->request_buf);
+       struct smb2_read_rsp *rsp = smb2_get_msg(work->response_buf);
 
        id = le64_to_cpu(req->VolatileFileId);
 
-       inc_rfc1001_len(rsp, 16);
+       inc_rfc1001_len(work->response_buf, 16);
        rpc_resp = ksmbd_rpc_read(work->sess, id);
        if (rpc_resp) {
                if (rpc_resp->flags != KSMBD_RPC_OK) {
@@ -6066,7 +6075,7 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work)
                       rpc_resp->payload_sz);
 
                nbytes = rpc_resp->payload_sz;
-               work->resp_hdr_sz = get_rfc1002_len(rsp) + 4;
+               work->resp_hdr_sz = get_rfc1002_len(work->response_buf) + 4;
                work->aux_payload_sz = nbytes;
                kvfree(rpc_resp);
        }
@@ -6076,8 +6085,8 @@ static noinline int smb2_read_pipe(struct ksmbd_work *work)
        rsp->Reserved = 0;
        rsp->DataLength = cpu_to_le32(nbytes);
        rsp->DataRemaining = 0;
-       rsp->Reserved2 = 0;
-       inc_rfc1001_len(rsp, nbytes);
+       rsp->Flags = 0;
+       inc_rfc1001_len(work->response_buf, nbytes);
        return 0;
 
 out:
@@ -6127,14 +6136,13 @@ int smb2_read(struct ksmbd_work *work)
 {
        struct ksmbd_conn *conn = work->conn;
        struct smb2_read_req *req;
-       struct smb2_read_rsp *rsp, *rsp_org;
+       struct smb2_read_rsp *rsp;
        struct ksmbd_file *fp;
        loff_t offset;
        size_t length, mincount;
        ssize_t nbytes = 0, remain_bytes = 0;
        int err = 0;
 
-       rsp_org = work->response_buf;
        WORK_BUFFERS(work, req, rsp);
 
        if (test_share_config_flag(work->tcon->share_conf,
@@ -6215,11 +6223,11 @@ int smb2_read(struct ksmbd_work *work)
        rsp->Reserved = 0;
        rsp->DataLength = cpu_to_le32(nbytes);
        rsp->DataRemaining = cpu_to_le32(remain_bytes);
-       rsp->Reserved2 = 0;
-       inc_rfc1001_len(rsp_org, 16);
-       work->resp_hdr_sz = get_rfc1002_len(rsp_org) + 4;
+       rsp->Flags = 0;
+       inc_rfc1001_len(work->response_buf, 16);
+       work->resp_hdr_sz = get_rfc1002_len(work->response_buf) + 4;
        work->aux_payload_sz = nbytes;
-       inc_rfc1001_len(rsp_org, nbytes);
+       inc_rfc1001_len(work->response_buf, nbytes);
        ksmbd_fd_put(work, fp);
        return 0;
 
@@ -6254,8 +6262,8 @@ out:
  */
 static noinline int smb2_write_pipe(struct ksmbd_work *work)
 {
-       struct smb2_write_req *req = work->request_buf;
-       struct smb2_write_rsp *rsp = work->response_buf;
+       struct smb2_write_req *req = smb2_get_msg(work->request_buf);
+       struct smb2_write_rsp *rsp = smb2_get_msg(work->response_buf);
        struct ksmbd_rpc_command *rpc_resp;
        u64 id = 0;
        int err = 0, ret = 0;
@@ -6266,13 +6274,14 @@ static noinline int smb2_write_pipe(struct ksmbd_work *work)
        id = le64_to_cpu(req->VolatileFileId);
 
        if (le16_to_cpu(req->DataOffset) ==
-           (offsetof(struct smb2_write_req, Buffer) - 4)) {
+           offsetof(struct smb2_write_req, Buffer)) {
                data_buf = (char *)&req->Buffer[0];
        } else {
-               if ((u64)le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req)) {
+               if ((u64)le16_to_cpu(req->DataOffset) + length >
+                   get_rfc1002_len(work->request_buf)) {
                        pr_err("invalid write data offset %u, smb_len %u\n",
                               le16_to_cpu(req->DataOffset),
-                              get_rfc1002_len(req));
+                              get_rfc1002_len(work->request_buf));
                        err = -EINVAL;
                        goto out;
                }
@@ -6304,7 +6313,7 @@ static noinline int smb2_write_pipe(struct ksmbd_work *work)
        rsp->DataLength = cpu_to_le32(length);
        rsp->DataRemaining = 0;
        rsp->Reserved2 = 0;
-       inc_rfc1001_len(rsp, 16);
+       inc_rfc1001_len(work->response_buf, 16);
        return 0;
 out:
        if (err) {
@@ -6372,7 +6381,7 @@ static ssize_t smb2_write_rdma_channel(struct ksmbd_work *work,
 int smb2_write(struct ksmbd_work *work)
 {
        struct smb2_write_req *req;
-       struct smb2_write_rsp *rsp, *rsp_org;
+       struct smb2_write_rsp *rsp;
        struct ksmbd_file *fp = NULL;
        loff_t offset;
        size_t length;
@@ -6381,7 +6390,6 @@ int smb2_write(struct ksmbd_work *work)
        bool writethrough = false;
        int err = 0;
 
-       rsp_org = work->response_buf;
        WORK_BUFFERS(work, req, rsp);
 
        if (test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_PIPE)) {
@@ -6424,13 +6432,14 @@ int smb2_write(struct ksmbd_work *work)
        if (req->Channel != SMB2_CHANNEL_RDMA_V1 &&
            req->Channel != SMB2_CHANNEL_RDMA_V1_INVALIDATE) {
                if (le16_to_cpu(req->DataOffset) ==
-                   (offsetof(struct smb2_write_req, Buffer) - 4)) {
+                   offsetof(struct smb2_write_req, Buffer)) {
                        data_buf = (char *)&req->Buffer[0];
                } else {
-                       if ((u64)le16_to_cpu(req->DataOffset) + length > get_rfc1002_len(req)) {
+                       if ((u64)le16_to_cpu(req->DataOffset) + length >
+                           get_rfc1002_len(work->request_buf)) {
                                pr_err("invalid write data offset %u, smb_len %u\n",
                                       le16_to_cpu(req->DataOffset),
-                                      get_rfc1002_len(req));
+                                      get_rfc1002_len(work->request_buf));
                                err = -EINVAL;
                                goto out;
                        }
@@ -6468,7 +6477,7 @@ int smb2_write(struct ksmbd_work *work)
        rsp->DataLength = cpu_to_le32(nbytes);
        rsp->DataRemaining = 0;
        rsp->Reserved2 = 0;
-       inc_rfc1001_len(rsp_org, 16);
+       inc_rfc1001_len(work->response_buf, 16);
        ksmbd_fd_put(work, fp);
        return 0;
 
@@ -6502,10 +6511,9 @@ out:
 int smb2_flush(struct ksmbd_work *work)
 {
        struct smb2_flush_req *req;
-       struct smb2_flush_rsp *rsp, *rsp_org;
+       struct smb2_flush_rsp *rsp;
        int err;
 
-       rsp_org = work->response_buf;
        WORK_BUFFERS(work, req, rsp);
 
        ksmbd_debug(SMB, "SMB2_FLUSH called for fid %llu\n",
@@ -6519,7 +6527,7 @@ int smb2_flush(struct ksmbd_work *work)
 
        rsp->StructureSize = cpu_to_le16(4);
        rsp->Reserved = 0;
-       inc_rfc1001_len(rsp_org, 4);
+       inc_rfc1001_len(work->response_buf, 4);
        return 0;
 
 out:
@@ -6540,7 +6548,7 @@ out:
 int smb2_cancel(struct ksmbd_work *work)
 {
        struct ksmbd_conn *conn = work->conn;
-       struct smb2_hdr *hdr = work->request_buf;
+       struct smb2_hdr *hdr = smb2_get_msg(work->request_buf);
        struct smb2_hdr *chdr;
        struct ksmbd_work *cancel_work = NULL;
        int canceled = 0;
@@ -6555,7 +6563,7 @@ int smb2_cancel(struct ksmbd_work *work)
                spin_lock(&conn->request_lock);
                list_for_each_entry(cancel_work, command_list,
                                    async_request_entry) {
-                       chdr = cancel_work->request_buf;
+                       chdr = smb2_get_msg(cancel_work->request_buf);
 
                        if (cancel_work->async_id !=
                            le64_to_cpu(hdr->Id.AsyncId))
@@ -6574,7 +6582,7 @@ int smb2_cancel(struct ksmbd_work *work)
 
                spin_lock(&conn->request_lock);
                list_for_each_entry(cancel_work, command_list, request_entry) {
-                       chdr = cancel_work->request_buf;
+                       chdr = smb2_get_msg(cancel_work->request_buf);
 
                        if (chdr->MessageId != hdr->MessageId ||
                            cancel_work == work)
@@ -6709,8 +6717,8 @@ static inline bool lock_defer_pending(struct file_lock *fl)
  */
 int smb2_lock(struct ksmbd_work *work)
 {
-       struct smb2_lock_req *req = work->request_buf;
-       struct smb2_lock_rsp *rsp = work->response_buf;
+       struct smb2_lock_req *req = smb2_get_msg(work->request_buf);
+       struct smb2_lock_rsp *rsp = smb2_get_msg(work->response_buf);
        struct smb2_lock_element *lock_ele;
        struct ksmbd_file *fp = NULL;
        struct file_lock *flock = NULL;
@@ -7017,7 +7025,7 @@ skip:
        ksmbd_debug(SMB, "successful in taking lock\n");
        rsp->hdr.Status = STATUS_SUCCESS;
        rsp->Reserved = 0;
-       inc_rfc1001_len(rsp, 4);
+       inc_rfc1001_len(work->response_buf, 4);
        ksmbd_fd_put(work, fp);
        return 0;
 
@@ -7312,7 +7320,7 @@ static int fsctl_validate_negotiate_info(struct ksmbd_conn *conn,
        int ret = 0;
        int dialect;
 
-       if (in_buf_len < sizeof(struct validate_negotiate_info_req) +
+       if (in_buf_len < offsetof(struct validate_negotiate_info_req, Dialects) +
                        le16_to_cpu(neg_req->DialectCount) * sizeof(__le16))
                return -EINVAL;
 
@@ -7435,9 +7443,9 @@ static inline int fsctl_set_sparse(struct ksmbd_work *work, u64 id,
 
        old_fattr = fp->f_ci->m_fattr;
        if (sparse->SetSparse)
-               fp->f_ci->m_fattr |= ATTR_SPARSE_FILE_LE;
+               fp->f_ci->m_fattr |= FILE_ATTRIBUTE_SPARSE_FILE_LE;
        else
-               fp->f_ci->m_fattr &= ~ATTR_SPARSE_FILE_LE;
+               fp->f_ci->m_fattr &= ~FILE_ATTRIBUTE_SPARSE_FILE_LE;
 
        if (fp->f_ci->m_fattr != old_fattr &&
            test_share_config_flag(work->tcon->share_conf,
@@ -7490,13 +7498,12 @@ static int fsctl_request_resume_key(struct ksmbd_work *work,
 int smb2_ioctl(struct ksmbd_work *work)
 {
        struct smb2_ioctl_req *req;
-       struct smb2_ioctl_rsp *rsp, *rsp_org;
+       struct smb2_ioctl_rsp *rsp;
        unsigned int cnt_code, nbytes = 0, out_buf_len, in_buf_len;
        u64 id = KSMBD_NO_FID;
        struct ksmbd_conn *conn = work->conn;
        int ret = 0;
 
-       rsp_org = work->response_buf;
        if (work->next_smb2_rcv_hdr_off) {
                req = ksmbd_req_buf_next(work);
                rsp = ksmbd_resp_buf_next(work);
@@ -7506,8 +7513,8 @@ int smb2_ioctl(struct ksmbd_work *work)
                        id = work->compound_fid;
                }
        } else {
-               req = work->request_buf;
-               rsp = work->response_buf;
+               req = smb2_get_msg(work->request_buf);
+               rsp = smb2_get_msg(work->response_buf);
        }
 
        if (!has_file_id(id))
@@ -7787,7 +7794,7 @@ dup_ext_out:
        rsp->Reserved = cpu_to_le16(0);
        rsp->Flags = cpu_to_le32(0);
        rsp->Reserved2 = cpu_to_le32(0);
-       inc_rfc1001_len(rsp_org, 48 + nbytes);
+       inc_rfc1001_len(work->response_buf, 48 + nbytes);
 
        return 0;
 
@@ -7814,8 +7821,8 @@ out:
  */
 static void smb20_oplock_break_ack(struct ksmbd_work *work)
 {
-       struct smb2_oplock_break *req = work->request_buf;
-       struct smb2_oplock_break *rsp = work->response_buf;
+       struct smb2_oplock_break *req = smb2_get_msg(work->request_buf);
+       struct smb2_oplock_break *rsp = smb2_get_msg(work->response_buf);
        struct ksmbd_file *fp;
        struct oplock_info *opinfo = NULL;
        __le32 err = 0;
@@ -7922,7 +7929,7 @@ static void smb20_oplock_break_ack(struct ksmbd_work *work)
        rsp->Reserved2 = 0;
        rsp->VolatileFid = cpu_to_le64(volatile_id);
        rsp->PersistentFid = cpu_to_le64(persistent_id);
-       inc_rfc1001_len(rsp, 24);
+       inc_rfc1001_len(work->response_buf, 24);
        return;
 
 err_out:
@@ -7958,8 +7965,8 @@ static int check_lease_state(struct lease *lease, __le32 req_state)
 static void smb21_lease_break_ack(struct ksmbd_work *work)
 {
        struct ksmbd_conn *conn = work->conn;
-       struct smb2_lease_ack *req = work->request_buf;
-       struct smb2_lease_ack *rsp = work->response_buf;
+       struct smb2_lease_ack *req = smb2_get_msg(work->request_buf);
+       struct smb2_lease_ack *rsp = smb2_get_msg(work->response_buf);
        struct oplock_info *opinfo;
        __le32 err = 0;
        int ret = 0;
@@ -8071,7 +8078,7 @@ static void smb21_lease_break_ack(struct ksmbd_work *work)
        memcpy(rsp->LeaseKey, req->LeaseKey, 16);
        rsp->LeaseState = lease_state;
        rsp->LeaseDuration = 0;
-       inc_rfc1001_len(rsp, 36);
+       inc_rfc1001_len(work->response_buf, 36);
        return;
 
 err_out:
@@ -8092,8 +8099,8 @@ err_out:
  */
 int smb2_oplock_break(struct ksmbd_work *work)
 {
-       struct smb2_oplock_break *req = work->request_buf;
-       struct smb2_oplock_break *rsp = work->response_buf;
+       struct smb2_oplock_break *req = smb2_get_msg(work->request_buf);
+       struct smb2_oplock_break *rsp = smb2_get_msg(work->response_buf);
 
        switch (le16_to_cpu(req->StructureSize)) {
        case OP_BREAK_STRUCT_SIZE_20:
@@ -8120,8 +8127,8 @@ int smb2_oplock_break(struct ksmbd_work *work)
  */
 int smb2_notify(struct ksmbd_work *work)
 {
-       struct smb2_notify_req *req;
-       struct smb2_notify_rsp *rsp;
+       struct smb2_change_notify_req *req;
+       struct smb2_change_notify_rsp *rsp;
 
        WORK_BUFFERS(work, req, rsp);
 
@@ -8145,7 +8152,7 @@ int smb2_notify(struct ksmbd_work *work)
  */
 bool smb2_is_sign_req(struct ksmbd_work *work, unsigned int command)
 {
-       struct smb2_hdr *rcv_hdr2 = work->request_buf;
+       struct smb2_hdr *rcv_hdr2 = smb2_get_msg(work->request_buf);
 
        if ((rcv_hdr2->Flags & SMB2_FLAGS_SIGNED) &&
            command != SMB2_NEGOTIATE_HE &&
@@ -8164,22 +8171,22 @@ bool smb2_is_sign_req(struct ksmbd_work *work, unsigned int command)
  */
 int smb2_check_sign_req(struct ksmbd_work *work)
 {
-       struct smb2_hdr *hdr, *hdr_org;
+       struct smb2_hdr *hdr;
        char signature_req[SMB2_SIGNATURE_SIZE];
        char signature[SMB2_HMACSHA256_SIZE];
        struct kvec iov[1];
        size_t len;
 
-       hdr_org = hdr = work->request_buf;
+       hdr = smb2_get_msg(work->request_buf);
        if (work->next_smb2_rcv_hdr_off)
                hdr = ksmbd_req_buf_next(work);
 
        if (!hdr->NextCommand && !work->next_smb2_rcv_hdr_off)
-               len = be32_to_cpu(hdr_org->smb2_buf_length);
+               len = get_rfc1002_len(work->request_buf);
        else if (hdr->NextCommand)
                len = le32_to_cpu(hdr->NextCommand);
        else
-               len = be32_to_cpu(hdr_org->smb2_buf_length) -
+               len = get_rfc1002_len(work->request_buf) -
                        work->next_smb2_rcv_hdr_off;
 
        memcpy(signature_req, hdr->Signature, SMB2_SIGNATURE_SIZE);
@@ -8207,25 +8214,26 @@ int smb2_check_sign_req(struct ksmbd_work *work)
  */
 void smb2_set_sign_rsp(struct ksmbd_work *work)
 {
-       struct smb2_hdr *hdr, *hdr_org;
+       struct smb2_hdr *hdr;
        struct smb2_hdr *req_hdr;
        char signature[SMB2_HMACSHA256_SIZE];
        struct kvec iov[2];
        size_t len;
        int n_vec = 1;
 
-       hdr_org = hdr = work->response_buf;
+       hdr = smb2_get_msg(work->response_buf);
        if (work->next_smb2_rsp_hdr_off)
                hdr = ksmbd_resp_buf_next(work);
 
        req_hdr = ksmbd_req_buf_next(work);
 
        if (!work->next_smb2_rsp_hdr_off) {
-               len = get_rfc1002_len(hdr_org);
+               len = get_rfc1002_len(work->response_buf);
                if (req_hdr->NextCommand)
                        len = ALIGN(len, 8);
        } else {
-               len = get_rfc1002_len(hdr_org) - work->next_smb2_rsp_hdr_off;
+               len = get_rfc1002_len(work->response_buf) -
+                       work->next_smb2_rsp_hdr_off;
                len = ALIGN(len, 8);
        }
 
@@ -8261,23 +8269,23 @@ int smb3_check_sign_req(struct ksmbd_work *work)
 {
        struct ksmbd_conn *conn = work->conn;
        char *signing_key;
-       struct smb2_hdr *hdr, *hdr_org;
+       struct smb2_hdr *hdr;
        struct channel *chann;
        char signature_req[SMB2_SIGNATURE_SIZE];
        char signature[SMB2_CMACAES_SIZE];
        struct kvec iov[1];
        size_t len;
 
-       hdr_org = hdr = work->request_buf;
+       hdr = smb2_get_msg(work->request_buf);
        if (work->next_smb2_rcv_hdr_off)
                hdr = ksmbd_req_buf_next(work);
 
        if (!hdr->NextCommand && !work->next_smb2_rcv_hdr_off)
-               len = be32_to_cpu(hdr_org->smb2_buf_length);
+               len = get_rfc1002_len(work->request_buf);
        else if (hdr->NextCommand)
                len = le32_to_cpu(hdr->NextCommand);
        else
-               len = be32_to_cpu(hdr_org->smb2_buf_length) -
+               len = get_rfc1002_len(work->request_buf) -
                        work->next_smb2_rcv_hdr_off;
 
        if (le16_to_cpu(hdr->Command) == SMB2_SESSION_SETUP_HE) {
@@ -8318,8 +8326,7 @@ int smb3_check_sign_req(struct ksmbd_work *work)
 void smb3_set_sign_rsp(struct ksmbd_work *work)
 {
        struct ksmbd_conn *conn = work->conn;
-       struct smb2_hdr *req_hdr;
-       struct smb2_hdr *hdr, *hdr_org;
+       struct smb2_hdr *req_hdr, *hdr;
        struct channel *chann;
        char signature[SMB2_CMACAES_SIZE];
        struct kvec iov[2];
@@ -8327,18 +8334,19 @@ void smb3_set_sign_rsp(struct ksmbd_work *work)
        size_t len;
        char *signing_key;
 
-       hdr_org = hdr = work->response_buf;
+       hdr = smb2_get_msg(work->response_buf);
        if (work->next_smb2_rsp_hdr_off)
                hdr = ksmbd_resp_buf_next(work);
 
        req_hdr = ksmbd_req_buf_next(work);
 
        if (!work->next_smb2_rsp_hdr_off) {
-               len = get_rfc1002_len(hdr_org);
+               len = get_rfc1002_len(work->response_buf);
                if (req_hdr->NextCommand)
                        len = ALIGN(len, 8);
        } else {
-               len = get_rfc1002_len(hdr_org) - work->next_smb2_rsp_hdr_off;
+               len = get_rfc1002_len(work->response_buf) -
+                       work->next_smb2_rsp_hdr_off;
                len = ALIGN(len, 8);
        }
 
@@ -8391,7 +8399,7 @@ void smb3_preauth_hash_rsp(struct ksmbd_work *work)
 
        if (le16_to_cpu(req->Command) == SMB2_NEGOTIATE_HE &&
            conn->preauth_info)
-               ksmbd_gen_preauth_integrity_hash(conn, (char *)rsp,
+               ksmbd_gen_preauth_integrity_hash(conn, work->response_buf,
                                                 conn->preauth_info->Preauth_HashValue);
 
        if (le16_to_cpu(rsp->Command) == SMB2_SESSION_SETUP_HE && sess) {
@@ -8409,35 +8417,34 @@ void smb3_preauth_hash_rsp(struct ksmbd_work *work)
                        if (!hash_value)
                                return;
                }
-               ksmbd_gen_preauth_integrity_hash(conn, (char *)rsp,
+               ksmbd_gen_preauth_integrity_hash(conn, work->response_buf,
                                                 hash_value);
        }
 }
 
-static void fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, char *old_buf,
-                              __le16 cipher_type)
+static void fill_transform_hdr(void *tr_buf, char *old_buf, __le16 cipher_type)
 {
-       struct smb2_hdr *hdr = (struct smb2_hdr *)old_buf;
+       struct smb2_transform_hdr *tr_hdr = tr_buf + 4;
+       struct smb2_hdr *hdr = smb2_get_msg(old_buf);
        unsigned int orig_len = get_rfc1002_len(old_buf);
 
-       memset(tr_hdr, 0, sizeof(struct smb2_transform_hdr));
+       memset(tr_buf, 0, sizeof(struct smb2_transform_hdr) + 4);
        tr_hdr->ProtocolId = SMB2_TRANSFORM_PROTO_NUM;
        tr_hdr->OriginalMessageSize = cpu_to_le32(orig_len);
-       tr_hdr->Flags = cpu_to_le16(0x01);
+       tr_hdr->Flags = cpu_to_le16(TRANSFORM_FLAG_ENCRYPTED);
        if (cipher_type == SMB2_ENCRYPTION_AES128_GCM ||
            cipher_type == SMB2_ENCRYPTION_AES256_GCM)
                get_random_bytes(&tr_hdr->Nonce, SMB3_AES_GCM_NONCE);
        else
                get_random_bytes(&tr_hdr->Nonce, SMB3_AES_CCM_NONCE);
        memcpy(&tr_hdr->SessionId, &hdr->SessionId, 8);
-       inc_rfc1001_len(tr_hdr, sizeof(struct smb2_transform_hdr) - 4);
-       inc_rfc1001_len(tr_hdr, orig_len);
+       inc_rfc1001_len(tr_buf, sizeof(struct smb2_transform_hdr));
+       inc_rfc1001_len(tr_buf, orig_len);
 }
 
 int smb3_encrypt_resp(struct ksmbd_work *work)
 {
        char *buf = work->response_buf;
-       struct smb2_transform_hdr *tr_hdr;
        struct kvec iov[3];
        int rc = -ENOMEM;
        int buf_size = 0, rq_nvec = 2 + (work->aux_payload_sz ? 1 : 0);
@@ -8445,15 +8452,15 @@ int smb3_encrypt_resp(struct ksmbd_work *work)
        if (ARRAY_SIZE(iov) < rq_nvec)
                return -ENOMEM;
 
-       tr_hdr = kzalloc(sizeof(struct smb2_transform_hdr), GFP_KERNEL);
-       if (!tr_hdr)
+       work->tr_buf = kzalloc(sizeof(struct smb2_transform_hdr) + 4, GFP_KERNEL);
+       if (!work->tr_buf)
                return rc;
 
        /* fill transform header */
-       fill_transform_hdr(tr_hdr, buf, work->conn->cipher_type);
+       fill_transform_hdr(work->tr_buf, buf, work->conn->cipher_type);
 
-       iov[0].iov_base = tr_hdr;
-       iov[0].iov_len = sizeof(struct smb2_transform_hdr);
+       iov[0].iov_base = work->tr_buf;
+       iov[0].iov_len = sizeof(struct smb2_transform_hdr) + 4;
        buf_size += iov[0].iov_len - 4;
 
        iov[1].iov_base = buf + 4;
@@ -8473,15 +8480,14 @@ int smb3_encrypt_resp(struct ksmbd_work *work)
                return rc;
 
        memmove(buf, iov[1].iov_base, iov[1].iov_len);
-       tr_hdr->smb2_buf_length = cpu_to_be32(buf_size);
-       work->tr_buf = tr_hdr;
+       *(__be32 *)work->tr_buf = cpu_to_be32(buf_size);
 
        return rc;
 }
 
 bool smb3_is_transform_hdr(void *buf)
 {
-       struct smb2_transform_hdr *trhdr = buf;
+       struct smb2_transform_hdr *trhdr = smb2_get_msg(buf);
 
        return trhdr->ProtocolId == SMB2_TRANSFORM_PROTO_NUM;
 }
@@ -8491,12 +8497,10 @@ int smb3_decrypt_req(struct ksmbd_work *work)
        struct ksmbd_conn *conn = work->conn;
        struct ksmbd_session *sess;
        char *buf = work->request_buf;
-       struct smb2_hdr *hdr;
        unsigned int pdu_length = get_rfc1002_len(buf);
        struct kvec iov[2];
-       int buf_data_size = pdu_length + 4 -
-               sizeof(struct smb2_transform_hdr);
-       struct smb2_transform_hdr *tr_hdr = (struct smb2_transform_hdr *)buf;
+       int buf_data_size = pdu_length - sizeof(struct smb2_transform_hdr);
+       struct smb2_transform_hdr *tr_hdr = smb2_get_msg(buf);
        int rc = 0;
 
        if (buf_data_size < sizeof(struct smb2_hdr)) {
@@ -8518,16 +8522,15 @@ int smb3_decrypt_req(struct ksmbd_work *work)
        }
 
        iov[0].iov_base = buf;
-       iov[0].iov_len = sizeof(struct smb2_transform_hdr);
-       iov[1].iov_base = buf + sizeof(struct smb2_transform_hdr);
+       iov[0].iov_len = sizeof(struct smb2_transform_hdr) + 4;
+       iov[1].iov_base = buf + sizeof(struct smb2_transform_hdr) + 4;
        iov[1].iov_len = buf_data_size;
        rc = ksmbd_crypt_message(conn, iov, 2, 0);
        if (rc)
                return rc;
 
        memmove(buf + 4, iov[1].iov_base, buf_data_size);
-       hdr = (struct smb2_hdr *)buf;
-       hdr->smb2_buf_length = cpu_to_be32(buf_data_size);
+       *(__be32 *)buf = cpu_to_be32(buf_data_size);
 
        return rc;
 }
@@ -8535,7 +8538,7 @@ int smb3_decrypt_req(struct ksmbd_work *work)
 bool smb3_11_final_sess_setup_resp(struct ksmbd_work *work)
 {
        struct ksmbd_conn *conn = work->conn;
-       struct smb2_hdr *rsp = work->response_buf;
+       struct smb2_hdr *rsp = smb2_get_msg(work->response_buf);
 
        if (conn->dialect < SMB30_PROT_ID)
                return false;
index ff5a2f0..4a3e433 100644 (file)
 #include "ntlmssp.h"
 #include "smbacl.h"
 
-/*
- * Note that, due to trying to use names similar to the protocol specifications,
- * there are many mixed case field names in the structures below.  Although
- * this does not match typical Linux kernel style, it is necessary to be
- * able to match against the protocol specfication.
- *
- * SMB2 commands
- * Some commands have minimal (wct=0,bcc=0), or uninteresting, responses
- * (ie no useful data other than the SMB error code itself) and are marked such.
- * Knowing this helps avoid response buffer allocations and copy in some cases.
- */
-
-/* List of commands in host endian */
-#define SMB2_NEGOTIATE_HE      0x0000
-#define SMB2_SESSION_SETUP_HE  0x0001
-#define SMB2_LOGOFF_HE         0x0002 /* trivial request/resp */
-#define SMB2_TREE_CONNECT_HE   0x0003
-#define SMB2_TREE_DISCONNECT_HE        0x0004 /* trivial req/resp */
-#define SMB2_CREATE_HE         0x0005
-#define SMB2_CLOSE_HE          0x0006
-#define SMB2_FLUSH_HE          0x0007 /* trivial resp */
-#define SMB2_READ_HE           0x0008
-#define SMB2_WRITE_HE          0x0009
-#define SMB2_LOCK_HE           0x000A
-#define SMB2_IOCTL_HE          0x000B
-#define SMB2_CANCEL_HE         0x000C
-#define SMB2_ECHO_HE           0x000D
-#define SMB2_QUERY_DIRECTORY_HE        0x000E
-#define SMB2_CHANGE_NOTIFY_HE  0x000F
-#define SMB2_QUERY_INFO_HE     0x0010
-#define SMB2_SET_INFO_HE       0x0011
-#define SMB2_OPLOCK_BREAK_HE   0x0012
-
-/* The same list in little endian */
-#define SMB2_NEGOTIATE         cpu_to_le16(SMB2_NEGOTIATE_HE)
-#define SMB2_SESSION_SETUP     cpu_to_le16(SMB2_SESSION_SETUP_HE)
-#define SMB2_LOGOFF            cpu_to_le16(SMB2_LOGOFF_HE)
-#define SMB2_TREE_CONNECT      cpu_to_le16(SMB2_TREE_CONNECT_HE)
-#define SMB2_TREE_DISCONNECT   cpu_to_le16(SMB2_TREE_DISCONNECT_HE)
-#define SMB2_CREATE            cpu_to_le16(SMB2_CREATE_HE)
-#define SMB2_CLOSE             cpu_to_le16(SMB2_CLOSE_HE)
-#define SMB2_FLUSH             cpu_to_le16(SMB2_FLUSH_HE)
-#define SMB2_READ              cpu_to_le16(SMB2_READ_HE)
-#define SMB2_WRITE             cpu_to_le16(SMB2_WRITE_HE)
-#define SMB2_LOCK              cpu_to_le16(SMB2_LOCK_HE)
-#define SMB2_IOCTL             cpu_to_le16(SMB2_IOCTL_HE)
-#define SMB2_CANCEL            cpu_to_le16(SMB2_CANCEL_HE)
-#define SMB2_ECHO              cpu_to_le16(SMB2_ECHO_HE)
-#define SMB2_QUERY_DIRECTORY   cpu_to_le16(SMB2_QUERY_DIRECTORY_HE)
-#define SMB2_CHANGE_NOTIFY     cpu_to_le16(SMB2_CHANGE_NOTIFY_HE)
-#define SMB2_QUERY_INFO                cpu_to_le16(SMB2_QUERY_INFO_HE)
-#define SMB2_SET_INFO          cpu_to_le16(SMB2_SET_INFO_HE)
-#define SMB2_OPLOCK_BREAK      cpu_to_le16(SMB2_OPLOCK_BREAK_HE)
-
 /*Create Action Flags*/
 #define FILE_SUPERSEDED                0x00000000
 #define FILE_OPENED            0x00000001
@@ -96,9 +42,6 @@
 /* SMB2 Max Credits */
 #define SMB2_MAX_CREDITS               8192
 
-#define SMB2_CLIENT_GUID_SIZE          16
-#define SMB2_CREATE_GUID_SIZE          16
-
 /* Maximum buffer size value we can send with 1 credit */
 #define SMB2_MAX_BUFFER_SIZE 65536
 
 /* BB FIXME - analyze following length BB */
 #define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
 
-#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe) /* 'B''M''S' */
-#define SMB2_TRANSFORM_PROTO_NUM cpu_to_le32(0x424d53fd)
-
 #define SMB21_DEFAULT_IOSIZE   (1024 * 1024)
 #define SMB3_DEFAULT_IOSIZE    (4 * 1024 * 1024)
 #define SMB3_DEFAULT_TRANS_SIZE        (1024 * 1024)
 #define SMB3_MAX_IOSIZE        (8 * 1024 * 1024)
 
 /*
- * SMB2 Header Definition
- *
- * "MBZ" :  Must be Zero
- * "BB"  :  BugBug, Something to check/review/analyze later
- * "PDU" :  "Protocol Data Unit" (ie a network "frame")
- *
- */
-
-#define __SMB2_HEADER_STRUCTURE_SIZE   64
-#define SMB2_HEADER_STRUCTURE_SIZE                             \
-       cpu_to_le16(__SMB2_HEADER_STRUCTURE_SIZE)
-
-struct smb2_hdr {
-       __be32 smb2_buf_length; /* big endian on wire */
-                               /*
-                                * length is only two or three bytes - with
-                                * one or two byte type preceding it that MBZ
-                                */
-       __le32 ProtocolId;      /* 0xFE 'S' 'M' 'B' */
-       __le16 StructureSize;   /* 64 */
-       __le16 CreditCharge;    /* MBZ */
-       __le32 Status;          /* Error from server */
-       __le16 Command;
-       __le16 CreditRequest;   /* CreditResponse */
-       __le32 Flags;
-       __le32 NextCommand;
-       __le64 MessageId;
-       union {
-               struct {
-                       __le32 ProcessId;
-                       __le32  TreeId;
-               } __packed SyncId;
-               __le64  AsyncId;
-       } __packed Id;
-       __le64  SessionId;
-       __u8   Signature[16];
-} __packed;
-
-struct smb2_pdu {
-       struct smb2_hdr hdr;
-       __le16 StructureSize2; /* size of wct area (varies, request specific) */
-} __packed;
-
-#define SMB3_AES_CCM_NONCE 11
-#define SMB3_AES_GCM_NONCE 12
-
-struct smb2_transform_hdr {
-       __be32 smb2_buf_length; /* big endian on wire */
-       /*
-        * length is only two or three bytes - with
-        * one or two byte type preceding it that MBZ
-        */
-       __le32 ProtocolId;      /* 0xFD 'S' 'M' 'B' */
-       __u8   Signature[16];
-       __u8   Nonce[16];
-       __le32 OriginalMessageSize;
-       __u16  Reserved1;
-       __le16 Flags; /* EncryptionAlgorithm */
-       __le64  SessionId;
-} __packed;
-
-/*
- *     SMB2 flag definitions
- */
-#define SMB2_FLAGS_SERVER_TO_REDIR     cpu_to_le32(0x00000001)
-#define SMB2_FLAGS_ASYNC_COMMAND       cpu_to_le32(0x00000002)
-#define SMB2_FLAGS_RELATED_OPERATIONS  cpu_to_le32(0x00000004)
-#define SMB2_FLAGS_SIGNED              cpu_to_le32(0x00000008)
-#define SMB2_FLAGS_DFS_OPERATIONS      cpu_to_le32(0x10000000)
-#define SMB2_FLAGS_REPLAY_OPERATIONS   cpu_to_le32(0x20000000)
-
-/*
  *     Definitions for SMB2 Protocol Data Units (network frames)
  *
  *  See MS-SMB2.PDF specification for protocol details.
@@ -209,425 +77,30 @@ struct smb2_err_rsp {
        __u8   ErrorData[1];  /* variable length */
 } __packed;
 
-struct smb2_negotiate_req {
-       struct smb2_hdr hdr;
-       __le16 StructureSize; /* Must be 36 */
-       __le16 DialectCount;
-       __le16 SecurityMode;
-       __le16 Reserved;        /* MBZ */
-       __le32 Capabilities;
-       __u8   ClientGUID[SMB2_CLIENT_GUID_SIZE];
-       /* In SMB3.02 and earlier next three were MBZ le64 ClientStartTime */
-       __le32 NegotiateContextOffset; /* SMB3.1.1 only. MBZ earlier */
-       __le16 NegotiateContextCount;  /* SMB3.1.1 only. MBZ earlier */
-       __le16 Reserved2;
-       __le16 Dialects[1]; /* One dialect (vers=) at a time for now */
-} __packed;
-
-/* SecurityMode flags */
-#define SMB2_NEGOTIATE_SIGNING_ENABLED_LE      cpu_to_le16(0x0001)
-#define SMB2_NEGOTIATE_SIGNING_REQUIRED                0x0002
-#define SMB2_NEGOTIATE_SIGNING_REQUIRED_LE     cpu_to_le16(0x0002)
-/* Capabilities flags */
-#define SMB2_GLOBAL_CAP_DFS            0x00000001
-#define SMB2_GLOBAL_CAP_LEASING                0x00000002 /* Resp only New to SMB2.1 */
-#define SMB2_GLOBAL_CAP_LARGE_MTU      0X00000004 /* Resp only New to SMB2.1 */
-#define SMB2_GLOBAL_CAP_MULTI_CHANNEL  0x00000008 /* New to SMB3 */
-#define SMB2_GLOBAL_CAP_PERSISTENT_HANDLES 0x00000010 /* New to SMB3 */
-#define SMB2_GLOBAL_CAP_DIRECTORY_LEASING  0x00000020 /* New to SMB3 */
-#define SMB2_GLOBAL_CAP_ENCRYPTION     0x00000040 /* New to SMB3 */
-/* Internal types */
-#define SMB2_NT_FIND                   0x00100000
-#define SMB2_LARGE_FILES               0x00200000
-
-#define SMB311_SALT_SIZE                       32
-/* Hash Algorithm Types */
-#define SMB2_PREAUTH_INTEGRITY_SHA512  cpu_to_le16(0x0001)
-
-#define PREAUTH_HASHVALUE_SIZE         64
-
 struct preauth_integrity_info {
        /* PreAuth integrity Hash ID */
        __le16                  Preauth_HashId;
        /* PreAuth integrity Hash Value */
-       __u8                    Preauth_HashValue[PREAUTH_HASHVALUE_SIZE];
+       __u8                    Preauth_HashValue[SMB2_PREAUTH_HASH_SIZE];
 };
 
-/* offset is sizeof smb2_negotiate_rsp - 4 but rounded up to 8 bytes. */
+/* offset is sizeof smb2_negotiate_rsp but rounded up to 8 bytes. */
 #ifdef CONFIG_SMB_SERVER_KERBEROS5
-/* sizeof(struct smb2_negotiate_rsp) - 4 =
+/* sizeof(struct smb2_negotiate_rsp) =
  * header(64) + response(64) + GSS_LENGTH(96) + GSS_PADDING(0)
  */
 #define OFFSET_OF_NEG_CONTEXT  0xe0
 #else
-/* sizeof(struct smb2_negotiate_rsp) - 4 =
+/* sizeof(struct smb2_negotiate_rsp) =
  * header(64) + response(64) + GSS_LENGTH(74) + GSS_PADDING(6)
  */
 #define OFFSET_OF_NEG_CONTEXT  0xd0
 #endif
 
-#define SMB2_PREAUTH_INTEGRITY_CAPABILITIES    cpu_to_le16(1)
-#define SMB2_ENCRYPTION_CAPABILITIES           cpu_to_le16(2)
-#define SMB2_COMPRESSION_CAPABILITIES          cpu_to_le16(3)
-#define SMB2_NETNAME_NEGOTIATE_CONTEXT_ID      cpu_to_le16(5)
-#define SMB2_SIGNING_CAPABILITIES              cpu_to_le16(8)
-#define SMB2_POSIX_EXTENSIONS_AVAILABLE                cpu_to_le16(0x100)
-
-struct smb2_neg_context {
-       __le16  ContextType;
-       __le16  DataLength;
-       __le32  Reserved;
-       /* Followed by array of data */
-} __packed;
-
-struct smb2_preauth_neg_context {
-       __le16  ContextType; /* 1 */
-       __le16  DataLength;
-       __le32  Reserved;
-       __le16  HashAlgorithmCount; /* 1 */
-       __le16  SaltLength;
-       __le16  HashAlgorithms; /* HashAlgorithms[0] since only one defined */
-       __u8    Salt[SMB311_SALT_SIZE];
-} __packed;
-
-/* Encryption Algorithms Ciphers */
-#define SMB2_ENCRYPTION_AES128_CCM     cpu_to_le16(0x0001)
-#define SMB2_ENCRYPTION_AES128_GCM     cpu_to_le16(0x0002)
-#define SMB2_ENCRYPTION_AES256_CCM     cpu_to_le16(0x0003)
-#define SMB2_ENCRYPTION_AES256_GCM     cpu_to_le16(0x0004)
-
-struct smb2_encryption_neg_context {
-       __le16  ContextType; /* 2 */
-       __le16  DataLength;
-       __le32  Reserved;
-       /* CipherCount usally 2, but can be 3 when AES256-GCM enabled */
-       __le16  CipherCount; /* AES-128-GCM and AES-128-CCM by default */
-       __le16  Ciphers[];
-} __packed;
-
-#define SMB3_COMPRESS_NONE     cpu_to_le16(0x0000)
-#define SMB3_COMPRESS_LZNT1    cpu_to_le16(0x0001)
-#define SMB3_COMPRESS_LZ77     cpu_to_le16(0x0002)
-#define SMB3_COMPRESS_LZ77_HUFF        cpu_to_le16(0x0003)
-
-struct smb2_compression_ctx {
-       __le16  ContextType; /* 3 */
-       __le16  DataLength;
-       __le32  Reserved;
-       __le16  CompressionAlgorithmCount;
-       __u16   Padding;
-       __le32  Reserved1;
-       __le16  CompressionAlgorithms[];
-} __packed;
-
-#define POSIX_CTXT_DATA_LEN     16
-struct smb2_posix_neg_context {
-       __le16  ContextType; /* 0x100 */
-       __le16  DataLength;
-       __le32  Reserved;
-       __u8    Name[16]; /* POSIX ctxt GUID 93AD25509CB411E7B42383DE968BCD7C */
-} __packed;
-
-struct smb2_netname_neg_context {
-       __le16  ContextType; /* 0x100 */
-       __le16  DataLength;
-       __le32  Reserved;
-       __le16  NetName[]; /* hostname of target converted to UCS-2 */
-} __packed;
-
-/* Signing algorithms */
-#define SIGNING_ALG_HMAC_SHA256                cpu_to_le16(0)
-#define SIGNING_ALG_AES_CMAC           cpu_to_le16(1)
-#define SIGNING_ALG_AES_GMAC           cpu_to_le16(2)
-
-struct smb2_signing_capabilities {
-       __le16  ContextType; /* 8 */
-       __le16  DataLength;
-       __le32  Reserved;
-       __le16  SigningAlgorithmCount;
-       __le16  SigningAlgorithms[];
-} __packed;
-
-struct smb2_negotiate_rsp {
-       struct smb2_hdr hdr;
-       __le16 StructureSize;   /* Must be 65 */
-       __le16 SecurityMode;
-       __le16 DialectRevision;
-       __le16 NegotiateContextCount; /* Prior to SMB3.1.1 was Reserved & MBZ */
-       __u8   ServerGUID[16];
-       __le32 Capabilities;
-       __le32 MaxTransactSize;
-       __le32 MaxReadSize;
-       __le32 MaxWriteSize;
-       __le64 SystemTime;      /* MBZ */
-       __le64 ServerStartTime;
-       __le16 SecurityBufferOffset;
-       __le16 SecurityBufferLength;
-       __le32 NegotiateContextOffset;  /* Pre:SMB3.1.1 was reserved/ignored */
-       __u8   Buffer[1];       /* variable length GSS security buffer */
-} __packed;
-
-/* Flags */
-#define SMB2_SESSION_REQ_FLAG_BINDING          0x01
-#define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA     0x04
-
 #define SMB2_SESSION_EXPIRED           (0)
 #define SMB2_SESSION_IN_PROGRESS       BIT(0)
 #define SMB2_SESSION_VALID             BIT(1)
 
-/* Flags */
-#define SMB2_SESSION_REQ_FLAG_BINDING          0x01
-#define SMB2_SESSION_REQ_FLAG_ENCRYPT_DATA     0x04
-
-struct smb2_sess_setup_req {
-       struct smb2_hdr hdr;
-       __le16 StructureSize; /* Must be 25 */
-       __u8   Flags;
-       __u8   SecurityMode;
-       __le32 Capabilities;
-       __le32 Channel;
-       __le16 SecurityBufferOffset;
-       __le16 SecurityBufferLength;
-       __le64 PreviousSessionId;
-       __u8   Buffer[1];       /* variable length GSS security buffer */
-} __packed;
-
-/* Flags/Reserved for SMB3.1.1 */
-#define SMB2_SHAREFLAG_CLUSTER_RECONNECT       0x0001
-
-/* Currently defined SessionFlags */
-#define SMB2_SESSION_FLAG_IS_GUEST_LE          cpu_to_le16(0x0001)
-#define SMB2_SESSION_FLAG_IS_NULL_LE           cpu_to_le16(0x0002)
-#define SMB2_SESSION_FLAG_ENCRYPT_DATA_LE      cpu_to_le16(0x0004)
-struct smb2_sess_setup_rsp {
-       struct smb2_hdr hdr;
-       __le16 StructureSize; /* Must be 9 */
-       __le16 SessionFlags;
-       __le16 SecurityBufferOffset;
-       __le16 SecurityBufferLength;
-       __u8   Buffer[1];       /* variable length GSS security buffer */
-} __packed;
-
-struct smb2_logoff_req {
-       struct smb2_hdr hdr;
-       __le16 StructureSize;   /* Must be 4 */
-       __le16 Reserved;
-} __packed;
-
-struct smb2_logoff_rsp {
-       struct smb2_hdr hdr;
-       __le16 StructureSize;   /* Must be 4 */
-       __le16 Reserved;
-} __packed;
-
-struct smb2_tree_connect_req {
-       struct smb2_hdr hdr;
-       __le16 StructureSize;   /* Must be 9 */
-       __le16 Reserved;        /* Flags in SMB3.1.1 */
-       __le16 PathOffset;
-       __le16 PathLength;
-       __u8   Buffer[1];       /* variable length */
-} __packed;
-
-struct smb2_tree_connect_rsp {
-       struct smb2_hdr hdr;
-       __le16 StructureSize;   /* Must be 16 */
-       __u8   ShareType;  /* see below */
-       __u8   Reserved;
-       __le32 ShareFlags; /* see below */
-       __le32 Capabilities; /* see below */
-       __le32 MaximalAccess;
-} __packed;
-
-/* Possible ShareType values */
-#define SMB2_SHARE_TYPE_DISK   0x01
-#define SMB2_SHARE_TYPE_PIPE   0x02
-#define        SMB2_SHARE_TYPE_PRINT   0x03
-
-/*
- * Possible ShareFlags - exactly one and only one of the first 4 caching flags
- * must be set (any of the remaining, SHI1005, flags may be set individually
- * or in combination.
- */
-#define SMB2_SHAREFLAG_MANUAL_CACHING                  0x00000000
-#define SMB2_SHAREFLAG_AUTO_CACHING                    0x00000010
-#define SMB2_SHAREFLAG_VDO_CACHING                     0x00000020
-#define SMB2_SHAREFLAG_NO_CACHING                      0x00000030
-#define SHI1005_FLAGS_DFS                              0x00000001
-#define SHI1005_FLAGS_DFS_ROOT                         0x00000002
-#define SHI1005_FLAGS_RESTRICT_EXCLUSIVE_OPENS         0x00000100
-#define SHI1005_FLAGS_FORCE_SHARED_DELETE              0x00000200
-#define SHI1005_FLAGS_ALLOW_NAMESPACE_CACHING          0x00000400
-#define SHI1005_FLAGS_ACCESS_BASED_DIRECTORY_ENUM      0x00000800
-#define SHI1005_FLAGS_FORCE_LEVELII_OPLOCK             0x00001000
-#define SHI1005_FLAGS_ENABLE_HASH                      0x00002000
-
-/* Possible share capabilities */
-#define SMB2_SHARE_CAP_DFS     cpu_to_le32(0x00000008)
-
-struct smb2_tree_disconnect_req {
-       struct smb2_hdr hdr;
-       __le16 StructureSize;   /* Must be 4 */
-       __le16 Reserved;
-} __packed;
-
-struct smb2_tree_disconnect_rsp {
-       struct smb2_hdr hdr;
-       __le16 StructureSize;   /* Must be 4 */
-       __le16 Reserved;
-} __packed;
-
-#define ATTR_READONLY_LE       cpu_to_le32(ATTR_READONLY)
-#define ATTR_HIDDEN_LE         cpu_to_le32(ATTR_HIDDEN)
-#define ATTR_SYSTEM_LE         cpu_to_le32(ATTR_SYSTEM)
-#define ATTR_DIRECTORY_LE      cpu_to_le32(ATTR_DIRECTORY)
-#define ATTR_ARCHIVE_LE                cpu_to_le32(ATTR_ARCHIVE)
-#define ATTR_NORMAL_LE         cpu_to_le32(ATTR_NORMAL)
-#define ATTR_TEMPORARY_LE      cpu_to_le32(ATTR_TEMPORARY)
-#define ATTR_SPARSE_FILE_LE    cpu_to_le32(ATTR_SPARSE)
-#define ATTR_REPARSE_POINT_LE  cpu_to_le32(ATTR_REPARSE)
-#define ATTR_COMPRESSED_LE     cpu_to_le32(ATTR_COMPRESSED)
-#define ATTR_OFFLINE_LE                cpu_to_le32(ATTR_OFFLINE)
-#define ATTR_NOT_CONTENT_INDEXED_LE    cpu_to_le32(ATTR_NOT_CONTENT_INDEXED)
-#define ATTR_ENCRYPTED_LE      cpu_to_le32(ATTR_ENCRYPTED)
-#define ATTR_INTEGRITY_STREAML_LE      cpu_to_le32(0x00008000)
-#define ATTR_NO_SCRUB_DATA_LE  cpu_to_le32(0x00020000)
-#define ATTR_MASK_LE           cpu_to_le32(0x00007FB7)
-
-/* Oplock levels */
-#define SMB2_OPLOCK_LEVEL_NONE         0x00
-#define SMB2_OPLOCK_LEVEL_II           0x01
-#define SMB2_OPLOCK_LEVEL_EXCLUSIVE    0x08
-#define SMB2_OPLOCK_LEVEL_BATCH                0x09
-#define SMB2_OPLOCK_LEVEL_LEASE                0xFF
-/* Non-spec internal type */
-#define SMB2_OPLOCK_LEVEL_NOCHANGE     0x99
-
-/* Desired Access Flags */
-#define FILE_READ_DATA_LE              cpu_to_le32(0x00000001)
-#define FILE_LIST_DIRECTORY_LE         cpu_to_le32(0x00000001)
-#define FILE_WRITE_DATA_LE             cpu_to_le32(0x00000002)
-#define FILE_ADD_FILE_LE               cpu_to_le32(0x00000002)
-#define FILE_APPEND_DATA_LE            cpu_to_le32(0x00000004)
-#define FILE_ADD_SUBDIRECTORY_LE       cpu_to_le32(0x00000004)
-#define FILE_READ_EA_LE                        cpu_to_le32(0x00000008)
-#define FILE_WRITE_EA_LE               cpu_to_le32(0x00000010)
-#define FILE_EXECUTE_LE                        cpu_to_le32(0x00000020)
-#define FILE_TRAVERSE_LE               cpu_to_le32(0x00000020)
-#define FILE_DELETE_CHILD_LE           cpu_to_le32(0x00000040)
-#define FILE_READ_ATTRIBUTES_LE                cpu_to_le32(0x00000080)
-#define FILE_WRITE_ATTRIBUTES_LE       cpu_to_le32(0x00000100)
-#define FILE_DELETE_LE                 cpu_to_le32(0x00010000)
-#define FILE_READ_CONTROL_LE           cpu_to_le32(0x00020000)
-#define FILE_WRITE_DAC_LE              cpu_to_le32(0x00040000)
-#define FILE_WRITE_OWNER_LE            cpu_to_le32(0x00080000)
-#define FILE_SYNCHRONIZE_LE            cpu_to_le32(0x00100000)
-#define FILE_ACCESS_SYSTEM_SECURITY_LE cpu_to_le32(0x01000000)
-#define FILE_MAXIMAL_ACCESS_LE         cpu_to_le32(0x02000000)
-#define FILE_GENERIC_ALL_LE            cpu_to_le32(0x10000000)
-#define FILE_GENERIC_EXECUTE_LE                cpu_to_le32(0x20000000)
-#define FILE_GENERIC_WRITE_LE          cpu_to_le32(0x40000000)
-#define FILE_GENERIC_READ_LE           cpu_to_le32(0x80000000)
-#define DESIRED_ACCESS_MASK            cpu_to_le32(0xF21F01FF)
-
-/* ShareAccess Flags */
-#define FILE_SHARE_READ_LE             cpu_to_le32(0x00000001)
-#define FILE_SHARE_WRITE_LE            cpu_to_le32(0x00000002)
-#define FILE_SHARE_DELETE_LE           cpu_to_le32(0x00000004)
-#define FILE_SHARE_ALL_LE              cpu_to_le32(0x00000007)
-
-/* CreateDisposition Flags */
-#define FILE_SUPERSEDE_LE              cpu_to_le32(0x00000000)
-#define FILE_OPEN_LE                   cpu_to_le32(0x00000001)
-#define FILE_CREATE_LE                 cpu_to_le32(0x00000002)
-#define        FILE_OPEN_IF_LE                 cpu_to_le32(0x00000003)
-#define FILE_OVERWRITE_LE              cpu_to_le32(0x00000004)
-#define FILE_OVERWRITE_IF_LE           cpu_to_le32(0x00000005)
-#define FILE_CREATE_MASK_LE            cpu_to_le32(0x00000007)
-
-#define FILE_READ_DESIRED_ACCESS_LE    (FILE_READ_DATA_LE |            \
-                                       FILE_READ_EA_LE |               \
-                                       FILE_GENERIC_READ_LE)
-#define FILE_WRITE_DESIRE_ACCESS_LE    (FILE_WRITE_DATA_LE |           \
-                                       FILE_APPEND_DATA_LE |           \
-                                       FILE_WRITE_EA_LE |              \
-                                       FILE_WRITE_ATTRIBUTES_LE |      \
-                                       FILE_GENERIC_WRITE_LE)
-
-/* Impersonation Levels */
-#define IL_ANONYMOUS_LE                cpu_to_le32(0x00000000)
-#define IL_IDENTIFICATION_LE   cpu_to_le32(0x00000001)
-#define IL_IMPERSONATION_LE    cpu_to_le32(0x00000002)
-#define IL_DELEGATE_LE         cpu_to_le32(0x00000003)
-
-/* Create Context Values */
-#define SMB2_CREATE_EA_BUFFER                  "ExtA" /* extended attributes */
-#define SMB2_CREATE_SD_BUFFER                  "SecD" /* security descriptor */
-#define SMB2_CREATE_DURABLE_HANDLE_REQUEST     "DHnQ"
-#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT   "DHnC"
-#define SMB2_CREATE_ALLOCATION_SIZE            "AlSi"
-#define SMB2_CREATE_QUERY_MAXIMAL_ACCESS_REQUEST "MxAc"
-#define SMB2_CREATE_TIMEWARP_REQUEST           "TWrp"
-#define SMB2_CREATE_QUERY_ON_DISK_ID           "QFid"
-#define SMB2_CREATE_REQUEST_LEASE              "RqLs"
-#define SMB2_CREATE_DURABLE_HANDLE_REQUEST_V2   "DH2Q"
-#define SMB2_CREATE_DURABLE_HANDLE_RECONNECT_V2 "DH2C"
-#define SMB2_CREATE_APP_INSTANCE_ID     "\x45\xBC\xA6\x6A\xEF\xA7\xF7\x4A\x90\x08\xFA\x46\x2E\x14\x4D\x74"
- #define SMB2_CREATE_APP_INSTANCE_VERSION      "\xB9\x82\xD0\xB7\x3B\x56\x07\x4F\xA0\x7B\x52\x4A\x81\x16\xA0\x10"
-#define SVHDX_OPEN_DEVICE_CONTEXT       0x83CE6F1AD851E0986E34401CC9BCFCE9
-#define SMB2_CREATE_TAG_POSIX          "\x93\xAD\x25\x50\x9C\xB4\x11\xE7\xB4\x23\x83\xDE\x96\x8B\xCD\x7C"
-
-struct smb2_create_req {
-       struct smb2_hdr hdr;
-       __le16 StructureSize;   /* Must be 57 */
-       __u8   SecurityFlags;
-       __u8   RequestedOplockLevel;
-       __le32 ImpersonationLevel;
-       __le64 SmbCreateFlags;
-       __le64 Reserved;
-       __le32 DesiredAccess;
-       __le32 FileAttributes;
-       __le32 ShareAccess;
-       __le32 CreateDisposition;
-       __le32 CreateOptions;
-       __le16 NameOffset;
-       __le16 NameLength;
-       __le32 CreateContextsOffset;
-       __le32 CreateContextsLength;
-       __u8   Buffer[0];
-} __packed;
-
-struct smb2_create_rsp {
-       struct smb2_hdr hdr;
-       __le16 StructureSize;   /* Must be 89 */
-       __u8   OplockLevel;
-       __u8   Reserved;
-       __le32 CreateAction;
-       __le64 CreationTime;
-       __le64 LastAccessTime;
-       __le64 LastWriteTime;
-       __le64 ChangeTime;
-       __le64 AllocationSize;
-       __le64 EndofFile;
-       __le32 FileAttributes;
-       __le32 Reserved2;
-       __le64  PersistentFileId;
-       __le64  VolatileFileId;
-       __le32 CreateContextsOffset;
-       __le32 CreateContextsLength;
-       __u8   Buffer[1];
-} __packed;
-
-struct create_context {
-       __le32 Next;
-       __le16 NameOffset;
-       __le16 NameLength;
-       __le16 Reserved;
-       __le16 DataOffset;
-       __le32 DataLength;
-       __u8 Buffer[0];
-} __packed;
-
 struct create_durable_req_v2 {
        struct create_context ccontext;
        __u8   Name[8];
@@ -743,22 +216,21 @@ struct create_posix_rsp {
 
 #define SMB2_LEASE_FLAG_BREAK_IN_PROGRESS_LE   cpu_to_le32(0x02)
 
+#define SMB2_LEASE_KEY_SIZE                    16
+
 struct lease_context {
-       __le64 LeaseKeyLow;
-       __le64 LeaseKeyHigh;
+       __u8 LeaseKey[SMB2_LEASE_KEY_SIZE];
        __le32 LeaseState;
        __le32 LeaseFlags;
        __le64 LeaseDuration;
 } __packed;
 
 struct lease_context_v2 {
-       __le64 LeaseKeyLow;
-       __le64 LeaseKeyHigh;
+       __u8 LeaseKey[SMB2_LEASE_KEY_SIZE];
        __le32 LeaseState;
        __le32 LeaseFlags;
        __le64 LeaseDuration;
-       __le64 ParentLeaseKeyLow;
-       __le64 ParentLeaseKeyHigh;
+       __u8 ParentLeaseKey[SMB2_LEASE_KEY_SIZE];
        __le16 Epoch;
        __le16 Reserved;
 } __packed;
@@ -776,114 +248,12 @@ struct create_lease_v2 {
        __u8   Pad[4];
 } __packed;
 
-/* Currently defined values for close flags */
-#define SMB2_CLOSE_FLAG_POSTQUERY_ATTRIB       cpu_to_le16(0x0001)
-struct smb2_close_req {
-       struct smb2_hdr hdr;
-       __le16 StructureSize;   /* Must be 24 */
-       __le16 Flags;
-       __le32 Reserved;
-       __le64  PersistentFileId;
-       __le64  VolatileFileId;
-} __packed;
-
-struct smb2_close_rsp {
-       struct smb2_hdr hdr;
-       __le16 StructureSize; /* 60 */
-       __le16 Flags;
-       __le32 Reserved;
-       __le64 CreationTime;
-       __le64 LastAccessTime;
-       __le64 LastWriteTime;
-       __le64 ChangeTime;
-       __le64 AllocationSize;  /* Beginning of FILE_STANDARD_INFO equivalent */
-       __le64 EndOfFile;
-       __le32 Attributes;
-} __packed;
-
-struct smb2_flush_req {
-       struct smb2_hdr hdr;
-       __le16 StructureSize;   /* Must be 24 */
-       __le16 Reserved1;
-       __le32 Reserved2;
-       __le64  PersistentFileId;
-       __le64  VolatileFileId;
-} __packed;
-
-struct smb2_flush_rsp {
-       struct smb2_hdr hdr;
-       __le16 StructureSize;
-       __le16 Reserved;
-} __packed;
-
 struct smb2_buffer_desc_v1 {
        __le64 offset;
        __le32 token;
        __le32 length;
 } __packed;
 
-#define SMB2_CHANNEL_NONE              cpu_to_le32(0x00000000)
-#define SMB2_CHANNEL_RDMA_V1           cpu_to_le32(0x00000001)
-#define SMB2_CHANNEL_RDMA_V1_INVALIDATE cpu_to_le32(0x00000002)
-
-struct smb2_read_req {
-       struct smb2_hdr hdr;
-       __le16 StructureSize; /* Must be 49 */
-       __u8   Padding; /* offset from start of SMB2 header to place read */
-       __u8   Reserved;
-       __le32 Length;
-       __le64 Offset;
-       __le64  PersistentFileId;
-       __le64  VolatileFileId;
-       __le32 MinimumCount;
-       __le32 Channel; /* Reserved MBZ */
-       __le32 RemainingBytes;
-       __le16 ReadChannelInfoOffset; /* Reserved MBZ */
-       __le16 ReadChannelInfoLength; /* Reserved MBZ */
-       __u8   Buffer[1];
-} __packed;
-
-struct smb2_read_rsp {
-       struct smb2_hdr hdr;
-       __le16 StructureSize; /* Must be 17 */
-       __u8   DataOffset;
-       __u8   Reserved;
-       __le32 DataLength;
-       __le32 DataRemaining;
-       __u32  Reserved2;
-       __u8   Buffer[1];
-} __packed;
-
-/* For write request Flags field below the following flag is defined: */
-#define SMB2_WRITEFLAG_WRITE_THROUGH 0x00000001
-
-struct smb2_write_req {
-       struct smb2_hdr hdr;
-       __le16 StructureSize; /* Must be 49 */
-       __le16 DataOffset; /* offset from start of SMB2 header to write data */
-       __le32 Length;
-       __le64 Offset;
-       __le64  PersistentFileId;
-       __le64  VolatileFileId;
-       __le32 Channel; /* Reserved MBZ */
-       __le32 RemainingBytes;
-       __le16 WriteChannelInfoOffset; /* Reserved MBZ */
-       __le16 WriteChannelInfoLength; /* Reserved MBZ */
-       __le32 Flags;
-       __u8   Buffer[1];
-} __packed;
-
-struct smb2_write_rsp {
-       struct smb2_hdr hdr;
-       __le16 StructureSize; /* Must be 17 */
-       __u8   DataOffset;
-       __u8   Reserved;
-       __le32 DataLength;
-       __le32 DataRemaining;
-       __u32  Reserved2;
-       __u8   Buffer[1];
-} __packed;
-
 #define SMB2_0_IOCTL_IS_FSCTL 0x00000001
 
 struct duplicate_extents_to_file {
@@ -1033,43 +403,6 @@ struct reparse_data_buffer {
        __u8    DataBuffer[]; /* Variable Length */
 } __packed;
 
-/* Completion Filter flags for Notify */
-#define FILE_NOTIFY_CHANGE_FILE_NAME   0x00000001
-#define FILE_NOTIFY_CHANGE_DIR_NAME    0x00000002
-#define FILE_NOTIFY_CHANGE_NAME                0x00000003
-#define FILE_NOTIFY_CHANGE_ATTRIBUTES  0x00000004
-#define FILE_NOTIFY_CHANGE_SIZE                0x00000008
-#define FILE_NOTIFY_CHANGE_LAST_WRITE  0x00000010
-#define FILE_NOTIFY_CHANGE_LAST_ACCESS 0x00000020
-#define FILE_NOTIFY_CHANGE_CREATION    0x00000040
-#define FILE_NOTIFY_CHANGE_EA          0x00000080
-#define FILE_NOTIFY_CHANGE_SECURITY    0x00000100
-#define FILE_NOTIFY_CHANGE_STREAM_NAME 0x00000200
-#define FILE_NOTIFY_CHANGE_STREAM_SIZE 0x00000400
-#define FILE_NOTIFY_CHANGE_STREAM_WRITE        0x00000800
-
-/* Flags */
-#define SMB2_WATCH_TREE        0x0001
-
-struct smb2_notify_req {
-       struct smb2_hdr hdr;
-       __le16 StructureSize; /* Must be 32 */
-       __le16 Flags;
-       __le32 OutputBufferLength;
-       __le64 PersistentFileId;
-       __le64 VolatileFileId;
-       __u32 CompletionFileter;
-       __u32 Reserved;
-} __packed;
-
-struct smb2_notify_rsp {
-       struct smb2_hdr hdr;
-       __le16 StructureSize; /* Must be 9 */
-       __le16 OutputBufferOffset;
-       __le32 OutputBufferLength;
-       __u8 Buffer[1];
-} __packed;
-
 /* SMB2 Notify Action Flags */
 #define FILE_ACTION_ADDED              0x00000001
 #define FILE_ACTION_REMOVED            0x00000002
@@ -1528,7 +861,7 @@ struct smb2_file_pos_info {
        __le64 CurrentByteOffset;
 } __packed;
 
-#define FILE_MODE_INFO_MASK cpu_to_le32(0x0000103e)
+#define FILE_MODE_INFO_MASK cpu_to_le32(0x0000100e)
 
 struct smb2_file_mode_info {
        __le32 Mode;
@@ -1705,4 +1038,13 @@ int smb2_ioctl(struct ksmbd_work *work);
 int smb2_oplock_break(struct ksmbd_work *work);
 int smb2_notify(struct ksmbd_work *ksmbd_work);
 
+/*
+ * Get the body of the smb2 message excluding the 4 byte rfc1002 headers
+ * from request/response buffer.
+ */
+static inline void *smb2_get_msg(void *buf)
+{
+       return buf + 4;
+}
+
 #endif /* _SMB2PDU_H */
index 707490a..ef7f42b 100644 (file)
@@ -132,7 +132,7 @@ int ksmbd_lookup_protocol_idx(char *str)
  */
 int ksmbd_verify_smb_message(struct ksmbd_work *work)
 {
-       struct smb2_hdr *smb2_hdr = work->request_buf + work->next_smb2_rcv_hdr_off;
+       struct smb2_hdr *smb2_hdr = ksmbd_req_buf_next(work);
        struct smb_hdr *hdr;
 
        if (smb2_hdr->ProtocolId == SMB2_PROTO_NUMBER)
@@ -239,14 +239,14 @@ int ksmbd_lookup_dialect_by_id(__le16 *cli_dialects, __le16 dialects_count)
 static int ksmbd_negotiate_smb_dialect(void *buf)
 {
        int smb_buf_length = get_rfc1002_len(buf);
-       __le32 proto = ((struct smb2_hdr *)buf)->ProtocolId;
+       __le32 proto = ((struct smb2_hdr *)smb2_get_msg(buf))->ProtocolId;
 
        if (proto == SMB2_PROTO_NUMBER) {
                struct smb2_negotiate_req *req;
                int smb2_neg_size =
-                       offsetof(struct smb2_negotiate_req, Dialects) - 4;
+                       offsetof(struct smb2_negotiate_req, Dialects);
 
-               req = (struct smb2_negotiate_req *)buf;
+               req = (struct smb2_negotiate_req *)smb2_get_msg(buf);
                if (smb2_neg_size > smb_buf_length)
                        goto err_out;
 
@@ -445,11 +445,12 @@ int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command)
        struct ksmbd_conn *conn = work->conn;
        int ret;
 
-       conn->dialect = ksmbd_negotiate_smb_dialect(work->request_buf);
+       conn->dialect =
+               ksmbd_negotiate_smb_dialect(work->request_buf);
        ksmbd_debug(SMB, "conn->dialect 0x%x\n", conn->dialect);
 
        if (command == SMB2_NEGOTIATE_HE) {
-               struct smb2_hdr *smb2_hdr = work->request_buf;
+               struct smb2_hdr *smb2_hdr = smb2_get_msg(work->request_buf);
 
                if (smb2_hdr->ProtocolId != SMB2_PROTO_NUMBER) {
                        ksmbd_debug(SMB, "Downgrade to SMB1 negotiation\n");
index 6e79e75..5059084 100644 (file)
@@ -10,6 +10,7 @@
 
 #include "glob.h"
 #include "nterr.h"
+#include "../smbfs_common/smb2pdu.h"
 #include "smb2pdu.h"
 
 /* ksmbd's Specific ERRNO */
 #define SMB302_VERSION_STRING  "3.02"
 #define SMB311_VERSION_STRING  "3.1.1"
 
-/* Dialects */
-#define SMB10_PROT_ID          0x00
-#define SMB20_PROT_ID          0x0202
-#define SMB21_PROT_ID          0x0210
-/* multi-protocol negotiate request */
-#define SMB2X_PROT_ID          0x02FF
-#define SMB30_PROT_ID          0x0300
-#define SMB302_PROT_ID         0x0302
-#define SMB311_PROT_ID         0x0311
-#define BAD_PROT_ID            0xFFFF
-
 #define SMB_ECHO_INTERVAL      (60 * HZ)
 
 #define CIFS_DEFAULT_IOSIZE    (64 * 1024)
 /*
  * File Attribute flags
  */
-#define ATTR_READONLY                  0x0001
-#define ATTR_HIDDEN                    0x0002
-#define ATTR_SYSTEM                    0x0004
-#define ATTR_VOLUME                    0x0008
-#define ATTR_DIRECTORY                 0x0010
-#define ATTR_ARCHIVE                   0x0020
-#define ATTR_DEVICE                    0x0040
-#define ATTR_NORMAL                    0x0080
-#define ATTR_TEMPORARY                 0x0100
-#define ATTR_SPARSE                    0x0200
-#define ATTR_REPARSE                   0x0400
-#define ATTR_COMPRESSED                        0x0800
-#define ATTR_OFFLINE                   0x1000
-#define ATTR_NOT_CONTENT_INDEXED       0x2000
-#define ATTR_ENCRYPTED                 0x4000
 #define ATTR_POSIX_SEMANTICS           0x01000000
 #define ATTR_BACKUP_SEMANTICS          0x02000000
 #define ATTR_DELETE_ON_CLOSE           0x04000000
 #define ATTR_NO_BUFFERING              0x20000000
 #define ATTR_WRITE_THROUGH             0x80000000
 
-#define ATTR_READONLY_LE               cpu_to_le32(ATTR_READONLY)
-#define ATTR_HIDDEN_LE                 cpu_to_le32(ATTR_HIDDEN)
-#define ATTR_SYSTEM_LE                 cpu_to_le32(ATTR_SYSTEM)
-#define ATTR_DIRECTORY_LE              cpu_to_le32(ATTR_DIRECTORY)
-#define ATTR_ARCHIVE_LE                        cpu_to_le32(ATTR_ARCHIVE)
-#define ATTR_NORMAL_LE                 cpu_to_le32(ATTR_NORMAL)
-#define ATTR_TEMPORARY_LE              cpu_to_le32(ATTR_TEMPORARY)
-#define ATTR_SPARSE_FILE_LE            cpu_to_le32(ATTR_SPARSE)
-#define ATTR_REPARSE_POINT_LE          cpu_to_le32(ATTR_REPARSE)
-#define ATTR_COMPRESSED_LE             cpu_to_le32(ATTR_COMPRESSED)
-#define ATTR_OFFLINE_LE                        cpu_to_le32(ATTR_OFFLINE)
-#define ATTR_NOT_CONTENT_INDEXED_LE    cpu_to_le32(ATTR_NOT_CONTENT_INDEXED)
-#define ATTR_ENCRYPTED_LE              cpu_to_le32(ATTR_ENCRYPTED)
-#define ATTR_INTEGRITY_STREAML_LE      cpu_to_le32(0x00008000)
-#define ATTR_NO_SCRUB_DATA_LE          cpu_to_le32(0x00020000)
-#define ATTR_MASK_LE                   cpu_to_le32(0x00007FB7)
-
 /* List of FileSystemAttributes - see 2.5.1 of MS-FSCC */
 #define FILE_SUPPORTS_SPARSE_VDL       0x10000000 /* faster nonsparse extend */
 #define FILE_SUPPORTS_BLOCK_REFCOUNTING        0x08000000 /* allow ioctl dup extents */
 /* file_execute, file_read_attributes*/
 /* write_dac, and delete.           */
 
-#define FILE_READ_RIGHTS (FILE_READ_DATA | FILE_READ_EA | FILE_READ_ATTRIBUTES)
-#define FILE_WRITE_RIGHTS (FILE_WRITE_DATA | FILE_APPEND_DATA \
-               | FILE_WRITE_EA | FILE_WRITE_ATTRIBUTES)
-#define FILE_EXEC_RIGHTS (FILE_EXECUTE)
-
 #define SET_FILE_READ_RIGHTS (FILE_READ_DATA | FILE_READ_EA \
                | FILE_READ_ATTRIBUTES \
                | DELETE | READ_CONTROL | WRITE_DAC \
@@ -477,12 +430,6 @@ struct smb_version_cmds {
        int (*proc)(struct ksmbd_work *swork);
 };
 
-static inline size_t
-smb2_hdr_size_no_buflen(struct smb_version_values *vals)
-{
-       return vals->header_size - 4;
-}
-
 int ksmbd_min_protocol(void);
 int ksmbd_max_protocol(void);
 
index a2fd5a4..7e57cbb 100644 (file)
@@ -484,7 +484,7 @@ static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg)
                struct smb_direct_data_transfer *req =
                        (struct smb_direct_data_transfer *)recvmsg->packet;
                struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet
-                               + le32_to_cpu(req->data_offset) - 4);
+                               + le32_to_cpu(req->data_offset));
                ksmbd_debug(RDMA,
                            "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n",
                            le16_to_cpu(req->credits_granted),
@@ -2043,7 +2043,6 @@ int ksmbd_rdma_destroy(void)
        smb_direct_listener.cm_id = NULL;
 
        if (smb_direct_wq) {
-               flush_workqueue(smb_direct_wq);
                destroy_workqueue(smb_direct_wq);
                smb_direct_wq = NULL;
        }
index 835b384..19d3639 100644 (file)
@@ -1013,7 +1013,7 @@ int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp,
                        loff_t off, loff_t len)
 {
        smb_break_all_levII_oplock(work, fp, 1);
-       if (fp->f_ci->m_fattr & ATTR_SPARSE_FILE_LE)
+       if (fp->f_ci->m_fattr & FILE_ATTRIBUTE_SPARSE_FILE_LE)
                return vfs_fallocate(fp->filp,
                                     FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
                                     off, len);
@@ -1624,7 +1624,7 @@ void *ksmbd_vfs_init_kstat(char **p, struct ksmbd_kstat *ksmbd_kstat)
        time = ksmbd_UnixTimeToNT(kstat->ctime);
        info->ChangeTime = cpu_to_le64(time);
 
-       if (ksmbd_kstat->file_attributes & ATTR_DIRECTORY_LE) {
+       if (ksmbd_kstat->file_attributes & FILE_ATTRIBUTE_DIRECTORY_LE) {
                info->EndOfFile = 0;
                info->AllocationSize = 0;
        } else {
@@ -1654,9 +1654,9 @@ int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
         * or that acl is disable in server's filesystem and the config is yes.
         */
        if (S_ISDIR(ksmbd_kstat->kstat->mode))
-               ksmbd_kstat->file_attributes = ATTR_DIRECTORY_LE;
+               ksmbd_kstat->file_attributes = FILE_ATTRIBUTE_DIRECTORY_LE;
        else
-               ksmbd_kstat->file_attributes = ATTR_ARCHIVE_LE;
+               ksmbd_kstat->file_attributes = FILE_ATTRIBUTE_ARCHIVE_LE;
 
        if (test_share_config_flag(work->tcon->share_conf,
                                   KSMBD_SHARE_FLAG_STORE_DOS_ATTRS)) {
index b0d5b8f..adf94a4 100644 (file)
@@ -25,48 +25,9 @@ enum {
 };
 
 /* CreateOptions */
-/* Flag is set, it must not be a file , valid for directory only */
-#define FILE_DIRECTORY_FILE_LE                 cpu_to_le32(0x00000001)
-#define FILE_WRITE_THROUGH_LE                  cpu_to_le32(0x00000002)
-#define FILE_SEQUENTIAL_ONLY_LE                        cpu_to_le32(0x00000004)
-
-/* Should not buffer on server*/
-#define FILE_NO_INTERMEDIATE_BUFFERING_LE      cpu_to_le32(0x00000008)
-/* MBZ */
-#define FILE_SYNCHRONOUS_IO_ALERT_LE           cpu_to_le32(0x00000010)
-/* MBZ */
-#define FILE_SYNCHRONOUS_IO_NONALERT_LE                cpu_to_le32(0x00000020)
-
-/* Flaf must not be set for directory */
-#define FILE_NON_DIRECTORY_FILE_LE             cpu_to_le32(0x00000040)
-
-/* Should be zero */
 #define CREATE_TREE_CONNECTION                 cpu_to_le32(0x00000080)
-#define FILE_COMPLETE_IF_OPLOCKED_LE           cpu_to_le32(0x00000100)
-#define FILE_NO_EA_KNOWLEDGE_LE                        cpu_to_le32(0x00000200)
-#define FILE_OPEN_REMOTE_INSTANCE              cpu_to_le32(0x00000400)
-
-/**
- * Doc says this is obsolete "open for recovery" flag should be zero
- * in any case.
- */
-#define CREATE_OPEN_FOR_RECOVERY               cpu_to_le32(0x00000400)
-#define FILE_RANDOM_ACCESS_LE                  cpu_to_le32(0x00000800)
-#define FILE_DELETE_ON_CLOSE_LE                        cpu_to_le32(0x00001000)
-#define FILE_OPEN_BY_FILE_ID_LE                        cpu_to_le32(0x00002000)
-#define FILE_OPEN_FOR_BACKUP_INTENT_LE         cpu_to_le32(0x00004000)
-#define FILE_NO_COMPRESSION_LE                 cpu_to_le32(0x00008000)
-
-/* Should be zero*/
-#define FILE_OPEN_REQUIRING_OPLOCK             cpu_to_le32(0x00010000)
-#define FILE_DISALLOW_EXCLUSIVE                        cpu_to_le32(0x00020000)
 #define FILE_RESERVE_OPFILTER_LE               cpu_to_le32(0x00100000)
-#define FILE_OPEN_REPARSE_POINT_LE             cpu_to_le32(0x00200000)
-#define FILE_OPEN_NO_RECALL_LE                 cpu_to_le32(0x00400000)
 
-/* Should be zero */
-#define FILE_OPEN_FOR_FREE_SPACE_QUERY_LE      cpu_to_le32(0x00800000)
-#define CREATE_OPTIONS_MASK                    cpu_to_le32(0x00FFFFFF)
 #define CREATE_OPTION_READONLY                 0x10000000
 /* system. NB not sent over wire */
 #define CREATE_OPTION_SPECIAL                  0x20000000
index 994ec22..9320a42 100644 (file)
@@ -230,7 +230,7 @@ static void netfs_rreq_completed(struct netfs_read_request *rreq, bool was_async
 
 /*
  * Deal with the completion of writing the data to the cache.  We have to clear
- * the PG_fscache bits on the pages involved and release the caller's ref.
+ * the PG_fscache bits on the folios involved and release the caller's ref.
  *
  * May be called in softirq mode and we inherit a ref from the caller.
  */
@@ -238,7 +238,7 @@ static void netfs_rreq_unmark_after_write(struct netfs_read_request *rreq,
                                          bool was_async)
 {
        struct netfs_read_subrequest *subreq;
-       struct page *page;
+       struct folio *folio;
        pgoff_t unlocked = 0;
        bool have_unlocked = false;
 
@@ -247,14 +247,14 @@ static void netfs_rreq_unmark_after_write(struct netfs_read_request *rreq,
        list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
                XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE);
 
-               xas_for_each(&xas, page, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
+               xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
                        /* We might have multiple writes from the same huge
-                        * page, but we mustn't unlock a page more than once.
+                        * folio, but we mustn't unlock a folio more than once.
                         */
-                       if (have_unlocked && page->index <= unlocked)
+                       if (have_unlocked && folio_index(folio) <= unlocked)
                                continue;
-                       unlocked = page->index;
-                       end_page_fscache(page);
+                       unlocked = folio_index(folio);
+                       folio_end_fscache(folio);
                        have_unlocked = true;
                }
        }
@@ -367,18 +367,17 @@ static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq,
 }
 
 /*
- * Unlock the pages in a read operation.  We need to set PG_fscache on any
- * pages we're going to write back before we unlock them.
+ * Unlock the folios in a read operation.  We need to set PG_fscache on any
+ * folios we're going to write back before we unlock them.
  */
 static void netfs_rreq_unlock(struct netfs_read_request *rreq)
 {
        struct netfs_read_subrequest *subreq;
-       struct page *page;
+       struct folio *folio;
        unsigned int iopos, account = 0;
        pgoff_t start_page = rreq->start / PAGE_SIZE;
        pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
        bool subreq_failed = false;
-       int i;
 
        XA_STATE(xas, &rreq->mapping->i_pages, start_page);
 
@@ -403,9 +402,9 @@ static void netfs_rreq_unlock(struct netfs_read_request *rreq)
        trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);
 
        rcu_read_lock();
-       xas_for_each(&xas, page, last_page) {
-               unsigned int pgpos = (page->index - start_page) * PAGE_SIZE;
-               unsigned int pgend = pgpos + thp_size(page);
+       xas_for_each(&xas, folio, last_page) {
+               unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
+               unsigned int pgend = pgpos + folio_size(folio);
                bool pg_failed = false;
 
                for (;;) {
@@ -414,7 +413,7 @@ static void netfs_rreq_unlock(struct netfs_read_request *rreq)
                                break;
                        }
                        if (test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags))
-                               set_page_fscache(page);
+                               folio_start_fscache(folio);
                        pg_failed |= subreq_failed;
                        if (pgend < iopos + subreq->len)
                                break;
@@ -433,17 +432,16 @@ static void netfs_rreq_unlock(struct netfs_read_request *rreq)
                }
 
                if (!pg_failed) {
-                       for (i = 0; i < thp_nr_pages(page); i++)
-                               flush_dcache_page(page);
-                       SetPageUptodate(page);
+                       flush_dcache_folio(folio);
+                       folio_mark_uptodate(folio);
                }
 
-               if (!test_bit(NETFS_RREQ_DONT_UNLOCK_PAGES, &rreq->flags)) {
-                       if (page->index == rreq->no_unlock_page &&
-                           test_bit(NETFS_RREQ_NO_UNLOCK_PAGE, &rreq->flags))
+               if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
+                       if (folio_index(folio) == rreq->no_unlock_folio &&
+                           test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
                                _debug("no unlock");
                        else
-                               unlock_page(page);
+                               folio_unlock(folio);
                }
        }
        rcu_read_unlock();
@@ -876,7 +874,6 @@ void netfs_readahead(struct readahead_control *ractl,
                     void *netfs_priv)
 {
        struct netfs_read_request *rreq;
-       struct page *page;
        unsigned int debug_index = 0;
        int ret;
 
@@ -911,11 +908,11 @@ void netfs_readahead(struct readahead_control *ractl,
 
        } while (rreq->submitted < rreq->len);
 
-       /* Drop the refs on the pages here rather than in the cache or
+       /* Drop the refs on the folios here rather than in the cache or
         * filesystem.  The locks will be dropped in netfs_rreq_unlock().
         */
-       while ((page = readahead_page(ractl)))
-               put_page(page);
+       while (readahead_folio(ractl))
+               ;
 
        /* If we decrement nr_rd_ops to 0, the ref belongs to us. */
        if (atomic_dec_and_test(&rreq->nr_rd_ops))
@@ -935,7 +932,7 @@ EXPORT_SYMBOL(netfs_readahead);
 /**
  * netfs_readpage - Helper to manage a readpage request
  * @file: The file to read from
- * @page: The page to read
+ * @folio: The folio to read
  * @ops: The network filesystem's operations for the helper to use
  * @netfs_priv: Private netfs data to be retained in the request
  *
@@ -950,7 +947,7 @@ EXPORT_SYMBOL(netfs_readahead);
  * This is usable whether or not caching is enabled.
  */
 int netfs_readpage(struct file *file,
-                  struct page *page,
+                  struct folio *folio,
                   const struct netfs_read_request_ops *ops,
                   void *netfs_priv)
 {
@@ -958,23 +955,23 @@ int netfs_readpage(struct file *file,
        unsigned int debug_index = 0;
        int ret;
 
-       _enter("%lx", page_index(page));
+       _enter("%lx", folio_index(folio));
 
        rreq = netfs_alloc_read_request(ops, netfs_priv, file);
        if (!rreq) {
                if (netfs_priv)
-                       ops->cleanup(netfs_priv, page_file_mapping(page));
-               unlock_page(page);
+                       ops->cleanup(netfs_priv, folio_file_mapping(folio));
+               folio_unlock(folio);
                return -ENOMEM;
        }
-       rreq->mapping   = page_file_mapping(page);
-       rreq->start     = page_file_offset(page);
-       rreq->len       = thp_size(page);
+       rreq->mapping   = folio_file_mapping(folio);
+       rreq->start     = folio_file_pos(folio);
+       rreq->len       = folio_size(folio);
 
        if (ops->begin_cache_operation) {
                ret = ops->begin_cache_operation(rreq);
                if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) {
-                       unlock_page(page);
+                       folio_unlock(folio);
                        goto out;
                }
        }
@@ -1012,40 +1009,40 @@ out:
 EXPORT_SYMBOL(netfs_readpage);
 
 /**
- * netfs_skip_page_read - prep a page for writing without reading first
- * @page: page being prepared
+ * netfs_skip_folio_read - prep a folio for writing without reading first
+ * @folio: The folio being prepared
  * @pos: starting position for the write
  * @len: length of write
  *
  * In some cases, write_begin doesn't need to read at all:
- * - full page write
- * - write that lies in a page that is completely beyond EOF
- * - write that covers the the page from start to EOF or beyond it
+ * - full folio write
+ * - write that lies in a folio that is completely beyond EOF
+ * - write that covers the folio from start to EOF or beyond it
  *
  * If any of these criteria are met, then zero out the unwritten parts
- * of the page and return true. Otherwise, return false.
+ * of the folio and return true. Otherwise, return false.
  */
-static bool netfs_skip_page_read(struct page *page, loff_t pos, size_t len)
+static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len)
 {
-       struct inode *inode = page->mapping->host;
+       struct inode *inode = folio_inode(folio);
        loff_t i_size = i_size_read(inode);
-       size_t offset = offset_in_thp(page, pos);
+       size_t offset = offset_in_folio(folio, pos);
 
-       /* Full page write */
-       if (offset == 0 && len >= thp_size(page))
+       /* Full folio write */
+       if (offset == 0 && len >= folio_size(folio))
                return true;
 
-       /* pos beyond last page in the file */
+       /* pos beyond last folio in the file */
        if (pos - offset >= i_size)
                goto zero_out;
 
-       /* Write that covers from the start of the page to EOF or beyond */
+       /* Write that covers from the start of the folio to EOF or beyond */
        if (offset == 0 && (pos + len) >= i_size)
                goto zero_out;
 
        return false;
 zero_out:
-       zero_user_segments(page, 0, offset, offset + len, thp_size(page));
+       zero_user_segments(&folio->page, 0, offset, offset + len, folio_size(folio));
        return true;
 }
 
@@ -1054,9 +1051,9 @@ zero_out:
  * @file: The file to read from
  * @mapping: The mapping to read from
  * @pos: File position at which the write will begin
- * @len: The length of the write (may extend beyond the end of the page chosen)
- * @flags: AOP_* flags
- * @_page: Where to put the resultant page
+ * @len: The length of the write (may extend beyond the end of the folio chosen)
+ * @aop_flags: AOP_* flags
+ * @_folio: Where to put the resultant folio
  * @_fsdata: Place for the netfs to store a cookie
  * @ops: The network filesystem's operations for the helper to use
  * @netfs_priv: Private netfs data to be retained in the request
@@ -1072,37 +1069,41 @@ zero_out:
  * issue_op, is mandatory.
  *
  * The check_write_begin() operation can be provided to check for and flush
- * conflicting writes once the page is grabbed and locked.  It is passed a
+ * conflicting writes once the folio is grabbed and locked.  It is passed a
  * pointer to the fsdata cookie that gets returned to the VM to be passed to
  * write_end.  It is permitted to sleep.  It should return 0 if the request
- * should go ahead; unlock the page and return -EAGAIN to cause the page to be
- * regot; or return an error.
+ * should go ahead; unlock the folio and return -EAGAIN to cause the folio to
+ * be regot; or return an error.
  *
  * This is usable whether or not caching is enabled.
  */
 int netfs_write_begin(struct file *file, struct address_space *mapping,
-                     loff_t pos, unsigned int len, unsigned int flags,
-                     struct page **_page, void **_fsdata,
+                     loff_t pos, unsigned int len, unsigned int aop_flags,
+                     struct folio **_folio, void **_fsdata,
                      const struct netfs_read_request_ops *ops,
                      void *netfs_priv)
 {
        struct netfs_read_request *rreq;
-       struct page *page, *xpage;
+       struct folio *folio;
        struct inode *inode = file_inode(file);
-       unsigned int debug_index = 0;
+       unsigned int debug_index = 0, fgp_flags;
        pgoff_t index = pos >> PAGE_SHIFT;
        int ret;
 
        DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
 
 retry:
-       page = grab_cache_page_write_begin(mapping, index, flags);
-       if (!page)
+       fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
+       if (aop_flags & AOP_FLAG_NOFS)
+               fgp_flags |= FGP_NOFS;
+       folio = __filemap_get_folio(mapping, index, fgp_flags,
+                                   mapping_gfp_mask(mapping));
+       if (!folio)
                return -ENOMEM;
 
        if (ops->check_write_begin) {
                /* Allow the netfs (eg. ceph) to flush conflicts. */
-               ret = ops->check_write_begin(file, pos, len, page, _fsdata);
+               ret = ops->check_write_begin(file, pos, len, folio, _fsdata);
                if (ret < 0) {
                        trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
                        if (ret == -EAGAIN)
@@ -1111,28 +1112,28 @@ retry:
                }
        }
 
-       if (PageUptodate(page))
-               goto have_page;
+       if (folio_test_uptodate(folio))
+               goto have_folio;
 
        /* If the page is beyond the EOF, we want to clear it - unless it's
         * within the cache granule containing the EOF, in which case we need
         * to preload the granule.
         */
        if (!ops->is_cache_enabled(inode) &&
-           netfs_skip_page_read(page, pos, len)) {
+           netfs_skip_folio_read(folio, pos, len)) {
                netfs_stat(&netfs_n_rh_write_zskip);
-               goto have_page_no_wait;
+               goto have_folio_no_wait;
        }
 
        ret = -ENOMEM;
        rreq = netfs_alloc_read_request(ops, netfs_priv, file);
        if (!rreq)
                goto error;
-       rreq->mapping           = page->mapping;
-       rreq->start             = page_offset(page);
-       rreq->len               = thp_size(page);
-       rreq->no_unlock_page    = page->index;
-       __set_bit(NETFS_RREQ_NO_UNLOCK_PAGE, &rreq->flags);
+       rreq->mapping           = folio_file_mapping(folio);
+       rreq->start             = folio_file_pos(folio);
+       rreq->len               = folio_size(folio);
+       rreq->no_unlock_folio   = folio_index(folio);
+       __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
        netfs_priv = NULL;
 
        if (ops->begin_cache_operation) {
@@ -1147,14 +1148,14 @@ retry:
        /* Expand the request to meet caching requirements and download
         * preferences.
         */
-       ractl._nr_pages = thp_nr_pages(page);
+       ractl._nr_pages = folio_nr_pages(folio);
        netfs_rreq_expand(rreq, &ractl);
        netfs_get_read_request(rreq);
 
-       /* We hold the page locks, so we can drop the references */
-       while ((xpage = readahead_page(&ractl)))
-               if (xpage != page)
-                       put_page(xpage);
+       /* We hold the folio locks, so we can drop the references */
+       folio_get(folio);
+       while (readahead_folio(&ractl))
+               ;
 
        atomic_set(&rreq->nr_rd_ops, 1);
        do {
@@ -1184,22 +1185,22 @@ retry:
        if (ret < 0)
                goto error;
 
-have_page:
-       ret = wait_on_page_fscache_killable(page);
+have_folio:
+       ret = folio_wait_fscache_killable(folio);
        if (ret < 0)
                goto error;
-have_page_no_wait:
+have_folio_no_wait:
        if (netfs_priv)
                ops->cleanup(netfs_priv, mapping);
-       *_page = page;
+       *_folio = folio;
        _leave(" = 0");
        return 0;
 
 error_put:
        netfs_put_read_request(rreq, false);
 error:
-       unlock_page(page);
-       put_page(page);
+       folio_unlock(folio);
+       folio_put(folio);
        if (netfs_priv)
                ops->cleanup(netfs_priv, mapping);
        _leave(" = %d", ret);
index b2a1d96..5a93a5d 100644 (file)
@@ -288,11 +288,8 @@ nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen)
        p = xdr_inline_decode(argp->xdr, count << 2);
        if (!p)
                return nfserr_bad_xdr;
-       i = 0;
-       while (i < count)
-               bmval[i++] = be32_to_cpup(p++);
-       while (i < bmlen)
-               bmval[i++] = 0;
+       for (i = 0; i < bmlen; i++)
+               bmval[i] = (i < count) ? be32_to_cpup(p++) : 0;
 
        return nfs_ok;
 }
index b9614db..f243cb5 100644 (file)
@@ -218,7 +218,7 @@ static int zbufsize_842(size_t size)
 #if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS)
 static int zbufsize_zstd(size_t size)
 {
-       return ZSTD_compressBound(size);
+       return zstd_compress_bound(size);
 }
 #endif
 
index 0015cf8..c40445d 100644 (file)
@@ -34,7 +34,7 @@ static void *zstd_init(struct squashfs_sb_info *msblk, void *buff)
                goto failed;
        wksp->window_size = max_t(size_t,
                        msblk->block_size, SQUASHFS_METADATA_SIZE);
-       wksp->mem_size = ZSTD_DStreamWorkspaceBound(wksp->window_size);
+       wksp->mem_size = zstd_dstream_workspace_bound(wksp->window_size);
        wksp->mem = vmalloc(wksp->mem_size);
        if (wksp->mem == NULL)
                goto failed;
@@ -63,15 +63,15 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm,
        struct squashfs_page_actor *output)
 {
        struct workspace *wksp = strm;
-       ZSTD_DStream *stream;
+       zstd_dstream *stream;
        size_t total_out = 0;
        int error = 0;
-       ZSTD_inBuffer in_buf = { NULL, 0, 0 };
-       ZSTD_outBuffer out_buf = { NULL, 0, 0 };
+       zstd_in_buffer in_buf = { NULL, 0, 0 };
+       zstd_out_buffer out_buf = { NULL, 0, 0 };
        struct bvec_iter_all iter_all = {};
        struct bio_vec *bvec = bvec_init_iter_all(&iter_all);
 
-       stream = ZSTD_initDStream(wksp->window_size, wksp->mem, wksp->mem_size);
+       stream = zstd_init_dstream(wksp->window_size, wksp->mem, wksp->mem_size);
 
        if (!stream) {
                ERROR("Failed to initialize zstd decompressor\n");
@@ -116,14 +116,14 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm,
                }
 
                total_out -= out_buf.pos;
-               zstd_err = ZSTD_decompressStream(stream, &out_buf, &in_buf);
+               zstd_err = zstd_decompress_stream(stream, &out_buf, &in_buf);
                total_out += out_buf.pos; /* add the additional data produced */
                if (zstd_err == 0)
                        break;
 
-               if (ZSTD_isError(zstd_err)) {
+               if (zstd_is_error(zstd_err)) {
                        ERROR("zstd decompression error: %d\n",
-                                       (int)ZSTD_getErrorCode(zstd_err));
+                                       (int)zstd_get_error_code(zstd_err));
                        error = -EIO;
                        break;
                }
index 70abdfa..42e3e55 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/bio.h>
+#include <linux/iversion.h>
 
 #include "udf_i.h"
 #include "udf_sb.h"
@@ -43,7 +44,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
        struct fileIdentDesc *fi = NULL;
        struct fileIdentDesc cfi;
        udf_pblk_t block, iblock;
-       loff_t nf_pos;
+       loff_t nf_pos, emit_pos = 0;
        int flen;
        unsigned char *fname = NULL, *copy_name = NULL;
        unsigned char *nameptr;
@@ -57,6 +58,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
        int i, num, ret = 0;
        struct extent_position epos = { NULL, 0, {0, 0} };
        struct super_block *sb = dir->i_sb;
+       bool pos_valid = false;
 
        if (ctx->pos == 0) {
                if (!dir_emit_dot(file, ctx))
@@ -67,6 +69,21 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
        if (nf_pos >= size)
                goto out;
 
+       /*
+        * Something changed since last readdir (either lseek was called or dir
+        * changed)?  We need to verify the position correctly points at the
+        * beginning of some dir entry so that the directory parsing code does
+        * not get confused. Since UDF does not have any reliable way of
+        * identifying beginning of dir entry (names are under user control),
+        * we need to scan the directory from the beginning.
+        */
+       if (!inode_eq_iversion(dir, file->f_version)) {
+               emit_pos = nf_pos;
+               nf_pos = 0;
+       } else {
+               pos_valid = true;
+       }
+
        fname = kmalloc(UDF_NAME_LEN, GFP_NOFS);
        if (!fname) {
                ret = -ENOMEM;
@@ -122,13 +139,21 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
 
        while (nf_pos < size) {
                struct kernel_lb_addr tloc;
+               loff_t cur_pos = nf_pos;
 
-               ctx->pos = (nf_pos >> 2) + 1;
+               /* Update file position only if we got past the current one */
+               if (nf_pos >= emit_pos) {
+                       ctx->pos = (nf_pos >> 2) + 1;
+                       pos_valid = true;
+               }
 
                fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc,
                                        &elen, &offset);
                if (!fi)
                        goto out;
+               /* Still not at offset where user asked us to read from? */
+               if (cur_pos < emit_pos)
+                       continue;
 
                liu = le16_to_cpu(cfi.lengthOfImpUse);
                lfi = cfi.lengthFileIdent;
@@ -186,8 +211,11 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
        } /* end while */
 
        ctx->pos = (nf_pos >> 2) + 1;
+       pos_valid = true;
 
 out:
+       if (pos_valid)
+               file->f_version = inode_query_iversion(dir);
        if (fibh.sbh != fibh.ebh)
                brelse(fibh.ebh);
        brelse(fibh.sbh);
index caeef08..0ed4861 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/sched.h>
 #include <linux/crc-itu-t.h>
 #include <linux/exportfs.h>
+#include <linux/iversion.h>
 
 static inline int udf_match(int len1, const unsigned char *name1, int len2,
                            const unsigned char *name2)
@@ -134,6 +135,8 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
                        mark_buffer_dirty_inode(fibh->ebh, inode);
                mark_buffer_dirty_inode(fibh->sbh, inode);
        }
+       inode_inc_iversion(inode);
+
        return 0;
 }
 
index 34247fb..f26b5e0 100644 (file)
@@ -57,6 +57,7 @@
 #include <linux/crc-itu-t.h>
 #include <linux/log2.h>
 #include <asm/byteorder.h>
+#include <linux/iversion.h>
 
 #include "udf_sb.h"
 #include "udf_i.h"
@@ -149,6 +150,7 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
        init_rwsem(&ei->i_data_sem);
        ei->cached_extent.lstart = -1;
        spin_lock_init(&ei->i_extent_cache_lock);
+       inode_set_iversion(&ei->vfs_inode, 1);
 
        return &ei->vfs_inode;
 }
index d7d875c..1e4ee04 100644 (file)
@@ -248,6 +248,7 @@ xfs_initialize_perag(
                spin_unlock(&mp->m_perag_lock);
                radix_tree_preload_end();
 
+#ifdef __KERNEL__
                /* Place kernel structure only init below this point. */
                spin_lock_init(&pag->pag_ici_lock);
                spin_lock_init(&pag->pagb_lock);
@@ -257,6 +258,7 @@ xfs_initialize_perag(
                init_waitqueue_head(&pag->pagb_wait);
                pag->pagb_count = 0;
                pag->pagb_tree = RB_ROOT;
+#endif /* __KERNEL__ */
 
                error = xfs_buf_hash_init(pag);
                if (error)
index 3f597ca..e411d51 100644 (file)
@@ -64,6 +64,10 @@ struct xfs_perag {
        /* Blocks reserved for the reverse mapping btree. */
        struct xfs_ag_resv      pag_rmapbt_resv;
 
+       /* for rcu-safe freeing */
+       struct rcu_head rcu_head;
+
+#ifdef __KERNEL__
        /* -- kernel only structures below this line -- */
 
        /*
@@ -90,9 +94,6 @@ struct xfs_perag {
        spinlock_t      pag_buf_lock;   /* lock for pag_buf_hash */
        struct rhashtable pag_buf_hash;
 
-       /* for rcu-safe freeing */
-       struct rcu_head rcu_head;
-
        /* background prealloc block trimming */
        struct delayed_work     pag_blockgc_work;
 
@@ -102,6 +103,7 @@ struct xfs_perag {
         * or have some other means to control concurrency.
         */
        struct rhashtable       pagi_unlinked_hash;
+#endif /* __KERNEL__ */
 };
 
 int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount,
index b4e19aa..f18a875 100644 (file)
@@ -2785,6 +2785,7 @@ error0:
        return error;
 }
 
+#ifdef __KERNEL__
 struct xfs_btree_split_args {
        struct xfs_btree_cur    *cur;
        int                     level;
@@ -2870,6 +2871,9 @@ xfs_btree_split(
        destroy_work_on_stack(&args.work);
        return args.result;
 }
+#else
+#define xfs_btree_split        __xfs_btree_split
+#endif /* __KERNEL__ */
 
 
 /*
index dd7a2db..9dc1ecb 100644 (file)
@@ -864,7 +864,6 @@ xfs_da3_node_rebalance(
 {
        struct xfs_da_intnode   *node1;
        struct xfs_da_intnode   *node2;
-       struct xfs_da_intnode   *tmpnode;
        struct xfs_da_node_entry *btree1;
        struct xfs_da_node_entry *btree2;
        struct xfs_da_node_entry *btree_s;
@@ -894,9 +893,7 @@ xfs_da3_node_rebalance(
            ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
             (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) <
                        be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) {
-               tmpnode = node1;
-               node1 = node2;
-               node2 = tmpnode;
+               swap(node1, node2);
                xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
                xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
                btree1 = nodehdr1.btree;
index f715e88..e7a163a 100644 (file)
@@ -193,7 +193,7 @@ struct bpf_map {
        atomic64_t usercnt;
        struct work_struct work;
        struct mutex freeze_mutex;
-       u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */
+       atomic64_t writecnt;
 };
 
 static inline bool map_value_has_spin_lock(const struct bpf_map *map)
@@ -1419,6 +1419,7 @@ void bpf_map_put(struct bpf_map *map);
 void *bpf_map_area_alloc(u64 size, int numa_node);
 void *bpf_map_area_mmapable_alloc(u64 size, int numa_node);
 void bpf_map_area_free(void *base);
+bool bpf_map_write_active(const struct bpf_map *map);
 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
 int  generic_map_lookup_batch(struct bpf_map *map,
                              const union bpf_attr *attr,
index bc2699f..7ad6c3d 100644 (file)
@@ -302,6 +302,8 @@ enum {
        CEPH_SESSION_REQUEST_FLUSH_MDLOG,
 };
 
+#define CEPH_SESSION_BLOCKLISTED       (1 << 0)  /* session blocklisted */
+
 extern const char *ceph_session_op_name(int op);
 
 struct ceph_mds_session_head {
index 83fa08a..3431011 100644 (file)
@@ -475,6 +475,14 @@ extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
                                       u64 expected_object_size,
                                       u64 expected_write_size,
                                       u32 flags);
+extern int osd_req_op_copy_from_init(struct ceph_osd_request *req,
+                                    u64 src_snapid, u64 src_version,
+                                    struct ceph_object_id *src_oid,
+                                    struct ceph_object_locator *src_oloc,
+                                    u32 src_fadvise_flags,
+                                    u32 dst_fadvise_flags,
+                                    u32 truncate_seq, u64 truncate_size,
+                                    u8 copy_from_flags);
 
 extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
                                               struct ceph_snap_context *snapc,
@@ -515,17 +523,6 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
                   struct page *req_page, size_t req_len,
                   struct page **resp_pages, size_t *resp_len);
 
-int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
-                       u64 src_snapid, u64 src_version,
-                       struct ceph_object_id *src_oid,
-                       struct ceph_object_locator *src_oloc,
-                       u32 src_fadvise_flags,
-                       struct ceph_object_id *dst_oid,
-                       struct ceph_object_locator *dst_oloc,
-                       u32 dst_fadvise_flags,
-                       u32 truncate_seq, u64 truncate_size,
-                       u8 copy_from_flags);
-
 /* watch/notify */
 struct ceph_osd_linger_request *
 ceph_osdc_watch(struct ceph_osd_client *osdc,
index 6b5d36b..dbd39b2 100644 (file)
@@ -362,6 +362,7 @@ void efi_native_runtime_setup(void);
 
 /* OEM GUIDs */
 #define DELLEMC_EFI_RCI2_TABLE_GUID            EFI_GUID(0x2d9f28a2, 0xa886, 0x456a,  0x97, 0xa8, 0xf1, 0x1e, 0xf2, 0x4f, 0xf4, 0x55)
+#define AMD_SEV_MEM_ENCRYPT_GUID               EFI_GUID(0x0cf29b71, 0x9e51, 0x433a,  0xa3, 0xb7, 0x81, 0xf3, 0xab, 0x16, 0xb8, 0x75)
 
 typedef struct {
        efi_guid_t guid;
index 462634b..74c4102 100644 (file)
@@ -205,9 +205,9 @@ static inline dev_t disk_devt(struct gendisk *disk)
 void disk_uevent(struct gendisk *disk, enum kobject_action action);
 
 /* block/genhd.c */
-int device_add_disk(struct device *parent, struct gendisk *disk,
-               const struct attribute_group **groups);
-static inline int add_disk(struct gendisk *disk)
+int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
+                                const struct attribute_group **groups);
+static inline int __must_check add_disk(struct gendisk *disk)
 {
        return device_add_disk(NULL, disk, NULL);
 }
index 968b4c4..77755ac 100644 (file)
@@ -85,7 +85,7 @@
 struct completion;
 struct user;
 
-#ifdef CONFIG_PREEMPT_VOLUNTARY
+#ifdef CONFIG_PREEMPT_VOLUNTARY_BUILD
 
 extern int __cond_resched(void);
 # define might_resched() __cond_resched()
index 60a35d9..c310648 100644 (file)
@@ -150,7 +150,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_MMU_RELOAD        (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_UNBLOCK           2
 #define KVM_REQ_UNHALT            3
-#define KVM_REQ_VM_BUGGED         (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_VM_DEAD           (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQUEST_ARCH_BASE     8
 
 #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
@@ -617,6 +617,7 @@ struct kvm {
        unsigned int max_halt_poll_ns;
        u32 dirty_ring_size;
        bool vm_bugged;
+       bool vm_dead;
 
 #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER
        struct notifier_block pm_notifier;
@@ -650,12 +651,19 @@ struct kvm {
 #define vcpu_err(vcpu, fmt, ...)                                       \
        kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
 
+static inline void kvm_vm_dead(struct kvm *kvm)
+{
+       kvm->vm_dead = true;
+       kvm_make_all_cpus_request(kvm, KVM_REQ_VM_DEAD);
+}
+
 static inline void kvm_vm_bugged(struct kvm *kvm)
 {
        kvm->vm_bugged = true;
-       kvm_make_all_cpus_request(kvm, KVM_REQ_VM_BUGGED);
+       kvm_vm_dead(kvm);
 }
 
+
 #define KVM_BUG(cond, kvm, fmt...)                             \
 ({                                                             \
        int __ret = (cond);                                     \
@@ -866,7 +874,7 @@ void kvm_release_pfn_dirty(kvm_pfn_t pfn);
 void kvm_set_pfn_dirty(kvm_pfn_t pfn);
 void kvm_set_pfn_accessed(kvm_pfn_t pfn);
 
-void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache);
+void kvm_release_pfn(kvm_pfn_t pfn, bool dirty);
 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
                        int len);
 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
@@ -942,12 +950,8 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
 kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn);
 kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map);
-int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
-               struct gfn_to_pfn_cache *cache, bool atomic);
 struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn);
 void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty);
-int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
-                 struct gfn_to_pfn_cache *cache, bool dirty, bool atomic);
 unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn);
 unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable);
 int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset,
index 2237abb..234eab0 100644 (file)
@@ -53,13 +53,6 @@ struct gfn_to_hva_cache {
        struct kvm_memory_slot *memslot;
 };
 
-struct gfn_to_pfn_cache {
-       u64 generation;
-       gfn_t gfn;
-       kvm_pfn_t pfn;
-       bool dirty;
-};
-
 #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
 /*
  * Memory caches are used to preallocate memory ahead of various MMU flows,
index 97afcea..8b18fe9 100644 (file)
@@ -145,13 +145,13 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw,
        GENMASK(31 - ESW_TUN_ID_BITS - ESW_RESERVED_BITS, \
                ESW_TUN_OPTS_OFFSET + 1)
 
-u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev);
+u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev);
 u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev);
 struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw);
 
 #else  /* CONFIG_MLX5_ESWITCH */
 
-static inline u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev)
+static inline u8 mlx5_eswitch_mode(const struct mlx5_core_dev *dev)
 {
        return MLX5_ESWITCH_NONE;
 }
index bb8c6f5..c3a6e62 100644 (file)
@@ -105,7 +105,18 @@ struct page {
                        struct page_pool *pp;
                        unsigned long _pp_mapping_pad;
                        unsigned long dma_addr;
-                       atomic_long_t pp_frag_count;
+                       union {
+                               /**
+                                * dma_addr_upper: might require a 64-bit
+                                * value on 32-bit architectures.
+                                */
+                               unsigned long dma_addr_upper;
+                               /**
+                                * For frag page support, not supported in
+                                * 32-bit architectures with 64-bit DMA.
+                                */
+                               atomic_long_t pp_frag_count;
+                       };
                };
                struct {        /* slab, slob and slub */
                        union {
index 49cf6eb..e616f94 100644 (file)
@@ -148,7 +148,7 @@ struct msi_desc {
                                u8      is_msix         : 1;
                                u8      multiple        : 3;
                                u8      multi_cap       : 3;
-                               u8      maskbit         : 1;
+                               u8      can_mask        : 1;
                                u8      is_64           : 1;
                                u8      is_virtual      : 1;
                                u16     entry_nr;
index 12c4177..ca0683b 100644 (file)
@@ -166,13 +166,13 @@ struct netfs_read_request {
        short                   error;          /* 0 or error that occurred */
        loff_t                  i_size;         /* Size of the file */
        loff_t                  start;          /* Start position */
-       pgoff_t                 no_unlock_page; /* Don't unlock this page after read */
+       pgoff_t                 no_unlock_folio; /* Don't unlock this folio after read */
        refcount_t              usage;
        unsigned long           flags;
 #define NETFS_RREQ_INCOMPLETE_IO       0       /* Some ioreqs terminated short or with error */
 #define NETFS_RREQ_WRITE_TO_CACHE      1       /* Need to write to the cache */
-#define NETFS_RREQ_NO_UNLOCK_PAGE      2       /* Don't unlock no_unlock_page on completion */
-#define NETFS_RREQ_DONT_UNLOCK_PAGES   3       /* Don't unlock the pages on completion */
+#define NETFS_RREQ_NO_UNLOCK_FOLIO     2       /* Don't unlock no_unlock_folio on completion */
+#define NETFS_RREQ_DONT_UNLOCK_FOLIOS  3       /* Don't unlock the folios on completion */
 #define NETFS_RREQ_FAILED              4       /* The request failed */
 #define NETFS_RREQ_IN_PROGRESS         5       /* Unlocked when the request completes */
        const struct netfs_read_request_ops *netfs_ops;
@@ -190,7 +190,7 @@ struct netfs_read_request_ops {
        void (*issue_op)(struct netfs_read_subrequest *subreq);
        bool (*is_still_valid)(struct netfs_read_request *rreq);
        int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
-                                struct page *page, void **_fsdata);
+                                struct folio *folio, void **_fsdata);
        void (*done)(struct netfs_read_request *rreq);
        void (*cleanup)(struct address_space *mapping, void *netfs_priv);
 };
@@ -240,11 +240,11 @@ extern void netfs_readahead(struct readahead_control *,
                            const struct netfs_read_request_ops *,
                            void *);
 extern int netfs_readpage(struct file *,
-                         struct page *,
+                         struct folio *,
                          const struct netfs_read_request_ops *,
                          void *);
 extern int netfs_write_begin(struct file *, struct address_space *,
-                            loff_t, unsigned int, unsigned int, struct page **,
+                            loff_t, unsigned int, unsigned int, struct folio **,
                             void **,
                             const struct netfs_read_request_ops *,
                             void *);
index 6a30916..1a0c646 100644 (file)
@@ -253,6 +253,20 @@ static inline struct address_space *page_mapping_file(struct page *page)
        return folio_mapping(folio);
 }
 
+/**
+ * folio_inode - Get the host inode for this folio.
+ * @folio: The folio.
+ *
+ * For folios which are in the page cache, return the inode that this folio
+ * belongs to.
+ *
+ * Do not call this for folios which aren't in the page cache.
+ */
+static inline struct inode *folio_inode(struct folio *folio)
+{
+       return folio->mapping->host;
+}
+
 static inline bool page_cache_add_speculative(struct page *page, int count)
 {
        VM_BUG_ON_PAGE(PageTail(page), page);
@@ -280,6 +294,25 @@ static inline void folio_attach_private(struct folio *folio, void *data)
 }
 
 /**
+ * folio_change_private - Change private data on a folio.
+ * @folio: Folio to change the data on.
+ * @data: Data to set on the folio.
+ *
+ * Change the private data attached to a folio and return the old
+ * data.  The page must previously have had data attached and the data
+ * must be detached before the folio will be freed.
+ *
+ * Return: Data that was previously attached to the folio.
+ */
+static inline void *folio_change_private(struct folio *folio, void *data)
+{
+       void *old = folio_get_private(folio);
+
+       folio->private = data;
+       return old;
+}
+
+/**
  * folio_detach_private - Detach private data from a folio.
  * @folio: Folio to detach data from.
  *
index 138d764..18a75c8 100644 (file)
@@ -233,6 +233,8 @@ enum pci_dev_flags {
        PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10),
        /* Don't use Relaxed Ordering for TLPs directed at this device */
        PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11),
+       /* Device does honor MSI masking despite saying otherwise */
+       PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12),
 };
 
 enum pci_irq_reroute_variant {
@@ -1666,6 +1668,7 @@ void pci_cfg_access_lock(struct pci_dev *dev);
 bool pci_cfg_access_trylock(struct pci_dev *dev);
 void pci_cfg_access_unlock(struct pci_dev *dev);
 
+void pci_dev_lock(struct pci_dev *dev);
 int pci_dev_trylock(struct pci_dev *dev);
 void pci_dev_unlock(struct pci_dev *dev);
 
index 98a9371..ae4004e 100644 (file)
@@ -6,7 +6,6 @@
 #include <linux/preempt.h>
 #include <linux/smp.h>
 #include <linux/cpumask.h>
-#include <linux/printk.h>
 #include <linux/pfn.h>
 #include <linux/init.h>
 
index 00fef00..5bbcd28 100644 (file)
@@ -184,8 +184,10 @@ static inline void posix_cputimers_group_init(struct posix_cputimers *pct,
 #endif
 
 #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
+void clear_posix_cputimers_work(struct task_struct *p);
 void posix_cputimers_init_work(void);
 #else
+static inline void clear_posix_cputimers_work(struct task_struct *p) { }
 static inline void posix_cputimers_init_work(void) { }
 #endif
 
index 85b656f..9497f6b 100644 (file)
@@ -198,6 +198,7 @@ void dump_stack_print_info(const char *log_lvl);
 void show_regs_print_info(const char *log_lvl);
 extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
 extern asmlinkage void dump_stack(void) __cold;
+void printk_trigger_flush(void);
 #else
 static inline __printf(1, 0)
 int vprintk(const char *s, va_list args)
@@ -274,6 +275,9 @@ static inline void dump_stack_lvl(const char *log_lvl)
 static inline void dump_stack(void)
 {
 }
+static inline void printk_trigger_flush(void)
+{
+}
 #endif
 
 #ifdef CONFIG_SMP
index 50453b2..2d167ac 100644 (file)
@@ -673,7 +673,7 @@ struct trace_event_file {
 
 #define PERF_MAX_TRACE_SIZE    8192
 
-#define MAX_FILTER_STR_VAL     256     /* Should handle KSYM_SYMBOL_LEN */
+#define MAX_FILTER_STR_VAL     256U    /* Should handle KSYM_SYMBOL_LEN */
 
 enum event_trigger_type {
        ETT_NONE                = (0),
index 1eaaa93..329d63b 100644 (file)
@@ -15,7 +15,7 @@
 #else
 #define MODULE_VERMAGIC_SMP ""
 #endif
-#ifdef CONFIG_PREEMPT
+#ifdef CONFIG_PREEMPT_BUILD
 #define MODULE_VERMAGIC_PREEMPT "preempt "
 #elif defined(CONFIG_PREEMPT_RT)
 #define MODULE_VERMAGIC_PREEMPT "preempt_rt "
index b465f8f..04e87f4 100644 (file)
@@ -120,10 +120,15 @@ retry:
 
        if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
                u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size);
+               unsigned int nh_off = p_off;
                struct skb_shared_info *shinfo = skb_shinfo(skb);
 
+               /* UFO may not include transport header in gso_size. */
+               if (gso_type & SKB_GSO_UDP)
+                       nh_off -= thlen;
+
                /* Too small packets are not really GSO ones. */
-               if (skb->len - p_off > gso_size) {
+               if (skb->len - nh_off > gso_size) {
                        shinfo->gso_size = gso_size;
                        shinfo->gso_type = gso_type;
 
index e87f78c..113408e 100644 (file)
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
 /*
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * Copyright (c) Yann Collet, Facebook, Inc.
  * All rights reserved.
  *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of https://github.com/facebook/zstd) and
+ * the GPLv2 (found in the COPYING file in the root directory of
+ * https://github.com/facebook/zstd). You may select, at your option, one of the
+ * above-listed licenses.
  */
 
-#ifndef ZSTD_H
-#define ZSTD_H
+#ifndef LINUX_ZSTD_H
+#define LINUX_ZSTD_H
 
-/* ======   Dependency   ======*/
-#include <linux/types.h>   /* size_t */
+/**
+ * This is a kernel-style API that wraps the upstream zstd API, which cannot be
+ * used directly because the symbols aren't exported. It exposes the minimal
+ * functionality which is currently required by users of zstd in the kernel.
+ * Expose extra functions from lib/zstd/zstd.h as needed.
+ */
 
+/* ======   Dependency   ====== */
+#include <linux/types.h>
+#include <linux/zstd_errors.h>
+#include <linux/zstd_lib.h>
 
-/*-*****************************************************************************
- * Introduction
+/* ======   Helper Functions   ====== */
+/**
+ * zstd_compress_bound() - maximum compressed size in worst case scenario
+ * @src_size: The size of the data to compress.
  *
- * zstd, short for Zstandard, is a fast lossless compression algorithm,
- * targeting real-time compression scenarios at zlib-level and better
- * compression ratios. The zstd compression library provides in-memory
- * compression and decompression functions. The library supports compression
- * levels from 1 up to ZSTD_maxCLevel() which is 22. Levels >= 20, labeled
- * ultra, should be used with caution, as they require more memory.
- * Compression can be done in:
- *  - a single step, reusing a context (described as Explicit memory management)
- *  - unbounded multiple steps (described as Streaming compression)
- * The compression ratio achievable on small data can be highly improved using
- * compression with a dictionary in:
- *  - a single step (described as Simple dictionary API)
- *  - a single step, reusing a dictionary (described as Fast dictionary API)
- ******************************************************************************/
-
-/*======  Helper functions  ======*/
+ * Return:    The maximum compressed size in the worst case scenario.
+ */
+size_t zstd_compress_bound(size_t src_size);
 
 /**
- * enum ZSTD_ErrorCode - zstd error codes
+ * zstd_is_error() - tells if a size_t function result is an error code
+ * @code:  The function result to check for error.
  *
- * Functions that return size_t can be checked for errors using ZSTD_isError()
- * and the ZSTD_ErrorCode can be extracted using ZSTD_getErrorCode().
+ * Return: Non-zero iff the code is an error.
+ */
+unsigned int zstd_is_error(size_t code);
+
+/**
+ * enum zstd_error_code - zstd error codes
  */
-typedef enum {
-       ZSTD_error_no_error,
-       ZSTD_error_GENERIC,
-       ZSTD_error_prefix_unknown,
-       ZSTD_error_version_unsupported,
-       ZSTD_error_parameter_unknown,
-       ZSTD_error_frameParameter_unsupported,
-       ZSTD_error_frameParameter_unsupportedBy32bits,
-       ZSTD_error_frameParameter_windowTooLarge,
-       ZSTD_error_compressionParameter_unsupported,
-       ZSTD_error_init_missing,
-       ZSTD_error_memory_allocation,
-       ZSTD_error_stage_wrong,
-       ZSTD_error_dstSize_tooSmall,
-       ZSTD_error_srcSize_wrong,
-       ZSTD_error_corruption_detected,
-       ZSTD_error_checksum_wrong,
-       ZSTD_error_tableLog_tooLarge,
-       ZSTD_error_maxSymbolValue_tooLarge,
-       ZSTD_error_maxSymbolValue_tooSmall,
-       ZSTD_error_dictionary_corrupted,
-       ZSTD_error_dictionary_wrong,
-       ZSTD_error_dictionaryCreation_failed,
-       ZSTD_error_maxCode
-} ZSTD_ErrorCode;
+typedef ZSTD_ErrorCode zstd_error_code;
 
 /**
- * ZSTD_maxCLevel() - maximum compression level available
+ * zstd_get_error_code() - translates an error function result to an error code
+ * @code:  The function result for which zstd_is_error(code) is true.
  *
- * Return: Maximum compression level available.
+ * Return: A unique error code for this error.
  */
-int ZSTD_maxCLevel(void);
+zstd_error_code zstd_get_error_code(size_t code);
+
 /**
- * ZSTD_compressBound() - maximum compressed size in worst case scenario
- * @srcSize: The size of the data to compress.
+ * zstd_get_error_name() - translates an error function result to a string
+ * @code:  The function result for which zstd_is_error(code) is true.
  *
- * Return:   The maximum compressed size in the worst case scenario.
+ * Return: An error string corresponding to the error code.
  */
-size_t ZSTD_compressBound(size_t srcSize);
+const char *zstd_get_error_name(size_t code);
+
 /**
- * ZSTD_isError() - tells if a size_t function result is an error code
- * @code:  The function result to check for error.
+ * zstd_min_clevel() - minimum allowed compression level
  *
- * Return: Non-zero iff the code is an error.
+ * Return: The minimum allowed compression level.
  */
-static __attribute__((unused)) unsigned int ZSTD_isError(size_t code)
-{
-       return code > (size_t)-ZSTD_error_maxCode;
-}
+int zstd_min_clevel(void);
+
 /**
- * ZSTD_getErrorCode() - translates an error function result to a ZSTD_ErrorCode
- * @functionResult: The result of a function for which ZSTD_isError() is true.
+ * zstd_max_clevel() - maximum allowed compression level
  *
- * Return:          The ZSTD_ErrorCode corresponding to the functionResult or 0
- *                  if the functionResult isn't an error.
+ * Return: The maximum allowed compression level.
  */
-static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode(
-       size_t functionResult)
-{
-       if (!ZSTD_isError(functionResult))
-               return (ZSTD_ErrorCode)0;
-       return (ZSTD_ErrorCode)(0 - functionResult);
-}
+int zstd_max_clevel(void);
+
+/* ======   Parameter Selection   ====== */
 
 /**
- * enum ZSTD_strategy - zstd compression search strategy
+ * enum zstd_strategy - zstd compression search strategy
  *
- * From faster to stronger.
+ * From faster to stronger. See zstd_lib.h.
  */
-typedef enum {
-       ZSTD_fast,
-       ZSTD_dfast,
-       ZSTD_greedy,
-       ZSTD_lazy,
-       ZSTD_lazy2,
-       ZSTD_btlazy2,
-       ZSTD_btopt,
-       ZSTD_btopt2
-} ZSTD_strategy;
+typedef ZSTD_strategy zstd_strategy;
 
 /**
- * struct ZSTD_compressionParameters - zstd compression parameters
+ * struct zstd_compression_parameters - zstd compression parameters
  * @windowLog:    Log of the largest match distance. Larger means more
  *                compression, and more memory needed during decompression.
- * @chainLog:     Fully searched segment. Larger means more compression, slower,
- *                and more memory (useless for fast).
+ * @chainLog:     Fully searched segment. Larger means more compression,
+ *                slower, and more memory (useless for fast).
  * @hashLog:      Dispatch table. Larger means more compression,
  *                slower, and more memory.
  * @searchLog:    Number of searches. Larger means more compression and slower.
@@ -141,1017 +100,348 @@ typedef enum {
  * @targetLength: Acceptable match size for optimal parser (only). Larger means
  *                more compression, and slower.
  * @strategy:     The zstd compression strategy.
+ *
+ * See zstd_lib.h.
  */
-typedef struct {
-       unsigned int windowLog;
-       unsigned int chainLog;
-       unsigned int hashLog;
-       unsigned int searchLog;
-       unsigned int searchLength;
-       unsigned int targetLength;
-       ZSTD_strategy strategy;
-} ZSTD_compressionParameters;
+typedef ZSTD_compressionParameters zstd_compression_parameters;
 
 /**
- * struct ZSTD_frameParameters - zstd frame parameters
- * @contentSizeFlag: Controls whether content size will be present in the frame
- *                   header (when known).
- * @checksumFlag:    Controls whether a 32-bit checksum is generated at the end
- *                   of the frame for error detection.
- * @noDictIDFlag:    Controls whether dictID will be saved into the frame header
- *                   when using dictionary compression.
+ * struct zstd_frame_parameters - zstd frame parameters
+ * @contentSizeFlag: Controls whether content size will be present in the
+ *                   frame header (when known).
+ * @checksumFlag:    Controls whether a 32-bit checksum is generated at the
+ *                   end of the frame for error detection.
+ * @noDictIDFlag:    Controls whether dictID will be saved into the frame
+ *                   header when using dictionary compression.
  *
- * The default value is all fields set to 0.
+ * The default value is all fields set to 0. See zstd_lib.h.
  */
-typedef struct {
-       unsigned int contentSizeFlag;
-       unsigned int checksumFlag;
-       unsigned int noDictIDFlag;
-} ZSTD_frameParameters;
+typedef ZSTD_frameParameters zstd_frame_parameters;
 
 /**
- * struct ZSTD_parameters - zstd parameters
+ * struct zstd_parameters - zstd parameters
  * @cParams: The compression parameters.
  * @fParams: The frame parameters.
  */
-typedef struct {
-       ZSTD_compressionParameters cParams;
-       ZSTD_frameParameters fParams;
-} ZSTD_parameters;
+typedef ZSTD_parameters zstd_parameters;
 
 /**
- * ZSTD_getCParams() - returns ZSTD_compressionParameters for selected level
- * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel().
- * @estimatedSrcSize: The estimated source size to compress or 0 if unknown.
- * @dictSize:         The dictionary size or 0 if a dictionary isn't being used.
+ * zstd_get_params() - returns zstd_parameters for selected level
+ * @level:              The compression level
+ * @estimated_src_size: The estimated source size to compress or 0
+ *                      if unknown.
  *
- * Return:            The selected ZSTD_compressionParameters.
+ * Return:              The selected zstd_parameters.
  */
-ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel,
-       unsigned long long estimatedSrcSize, size_t dictSize);
+zstd_parameters zstd_get_params(int level,
+       unsigned long long estimated_src_size);
 
-/**
- * ZSTD_getParams() - returns ZSTD_parameters for selected level
- * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel().
- * @estimatedSrcSize: The estimated source size to compress or 0 if unknown.
- * @dictSize:         The dictionary size or 0 if a dictionary isn't being used.
- *
- * The same as ZSTD_getCParams() except also selects the default frame
- * parameters (all zero).
- *
- * Return:            The selected ZSTD_parameters.
- */
-ZSTD_parameters ZSTD_getParams(int compressionLevel,
-       unsigned long long estimatedSrcSize, size_t dictSize);
+/* ======   Single-pass Compression   ====== */
 
-/*-*************************************
- * Explicit memory management
- **************************************/
+typedef ZSTD_CCtx zstd_cctx;
 
 /**
- * ZSTD_CCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_CCtx
- * @cParams: The compression parameters to be used for compression.
+ * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx
+ * @parameters: The compression parameters to be used.
  *
  * If multiple compression parameters might be used, the caller must call
- * ZSTD_CCtxWorkspaceBound() for each set of parameters and use the maximum
+ * zstd_cctx_workspace_bound() for each set of parameters and use the maximum
  * size.
  *
- * Return:   A lower bound on the size of the workspace that is passed to
- *           ZSTD_initCCtx().
+ * Return:      A lower bound on the size of the workspace that is passed to
+ *              zstd_init_cctx().
  */
-size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams);
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters);
 
 /**
- * struct ZSTD_CCtx - the zstd compression context
- *
- * When compressing many times it is recommended to allocate a context just once
- * and reuse it for each successive compression operation.
- */
-typedef struct ZSTD_CCtx_s ZSTD_CCtx;
-/**
- * ZSTD_initCCtx() - initialize a zstd compression context
- * @workspace:     The workspace to emplace the context into. It must outlive
- *                 the returned context.
- * @workspaceSize: The size of workspace. Use ZSTD_CCtxWorkspaceBound() to
- *                 determine how large the workspace must be.
- *
- * Return:         A compression context emplaced into workspace.
- */
-ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize);
-
-/**
- * ZSTD_compressCCtx() - compress src into dst
- * @ctx:         The context. Must have been initialized with a workspace at
- *               least as large as ZSTD_CCtxWorkspaceBound(params.cParams).
- * @dst:         The buffer to compress src into.
- * @dstCapacity: The size of the destination buffer. May be any size, but
- *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
- * @src:         The data to compress.
- * @srcSize:     The size of the data to compress.
- * @params:      The parameters to use for compression. See ZSTD_getParams().
- *
- * Return:       The compressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity,
-       const void *src, size_t srcSize, ZSTD_parameters params);
-
-/**
- * ZSTD_DCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_DCtx
- *
- * Return: A lower bound on the size of the workspace that is passed to
- *         ZSTD_initDCtx().
- */
-size_t ZSTD_DCtxWorkspaceBound(void);
-
-/**
- * struct ZSTD_DCtx - the zstd decompression context
- *
- * When decompressing many times it is recommended to allocate a context just
- * once and reuse it for each successive decompression operation.
- */
-typedef struct ZSTD_DCtx_s ZSTD_DCtx;
-/**
- * ZSTD_initDCtx() - initialize a zstd decompression context
- * @workspace:     The workspace to emplace the context into. It must outlive
- *                 the returned context.
- * @workspaceSize: The size of workspace. Use ZSTD_DCtxWorkspaceBound() to
- *                 determine how large the workspace must be.
- *
- * Return:         A decompression context emplaced into workspace.
- */
-ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize);
-
-/**
- * ZSTD_decompressDCtx() - decompress zstd compressed src into dst
- * @ctx:         The decompression context.
- * @dst:         The buffer to decompress src into.
- * @dstCapacity: The size of the destination buffer. Must be at least as large
- *               as the decompressed size. If the caller cannot upper bound the
- *               decompressed size, then it's better to use the streaming API.
- * @src:         The zstd compressed data to decompress. Multiple concatenated
- *               frames and skippable frames are allowed.
- * @srcSize:     The exact size of the data to decompress.
- *
- * Return:       The decompressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_decompressDCtx(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity,
-       const void *src, size_t srcSize);
-
-/*-************************
- * Simple dictionary API
- **************************/
-
-/**
- * ZSTD_compress_usingDict() - compress src into dst using a dictionary
- * @ctx:         The context. Must have been initialized with a workspace at
- *               least as large as ZSTD_CCtxWorkspaceBound(params.cParams).
- * @dst:         The buffer to compress src into.
- * @dstCapacity: The size of the destination buffer. May be any size, but
- *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
- * @src:         The data to compress.
- * @srcSize:     The size of the data to compress.
- * @dict:        The dictionary to use for compression.
- * @dictSize:    The size of the dictionary.
- * @params:      The parameters to use for compression. See ZSTD_getParams().
- *
- * Compression using a predefined dictionary. The same dictionary must be used
- * during decompression.
- *
- * Return:       The compressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity,
-       const void *src, size_t srcSize, const void *dict, size_t dictSize,
-       ZSTD_parameters params);
-
-/**
- * ZSTD_decompress_usingDict() - decompress src into dst using a dictionary
- * @ctx:         The decompression context.
- * @dst:         The buffer to decompress src into.
- * @dstCapacity: The size of the destination buffer. Must be at least as large
- *               as the decompressed size. If the caller cannot upper bound the
- *               decompressed size, then it's better to use the streaming API.
- * @src:         The zstd compressed data to decompress. Multiple concatenated
- *               frames and skippable frames are allowed.
- * @srcSize:     The exact size of the data to decompress.
- * @dict:        The dictionary to use for decompression. The same dictionary
- *               must've been used to compress the data.
- * @dictSize:    The size of the dictionary.
- *
- * Return:       The decompressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_decompress_usingDict(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity,
-       const void *src, size_t srcSize, const void *dict, size_t dictSize);
-
-/*-**************************
- * Fast dictionary API
- ***************************/
-
-/**
- * ZSTD_CDictWorkspaceBound() - memory needed to initialize a ZSTD_CDict
- * @cParams: The compression parameters to be used for compression.
+ * zstd_init_cctx() - initialize a zstd compression context
+ * @workspace:      The workspace to emplace the context into. It must outlive
+ *                  the returned context.
+ * @workspace_size: The size of workspace. Use zstd_cctx_workspace_bound() to
+ *                  determine how large the workspace must be.
  *
- * Return:   A lower bound on the size of the workspace that is passed to
- *           ZSTD_initCDict().
- */
-size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams);
-
-/**
- * struct ZSTD_CDict - a digested dictionary to be used for compression
+ * Return:          A zstd compression context or NULL on error.
  */
-typedef struct ZSTD_CDict_s ZSTD_CDict;
+zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size);
 
 /**
- * ZSTD_initCDict() - initialize a digested dictionary for compression
- * @dictBuffer:    The dictionary to digest. The buffer is referenced by the
- *                 ZSTD_CDict so it must outlive the returned ZSTD_CDict.
- * @dictSize:      The size of the dictionary.
- * @params:        The parameters to use for compression. See ZSTD_getParams().
- * @workspace:     The workspace. It must outlive the returned ZSTD_CDict.
- * @workspaceSize: The workspace size. Must be at least
- *                 ZSTD_CDictWorkspaceBound(params.cParams).
+ * zstd_compress_cctx() - compress src into dst with the initialized parameters
+ * @cctx:         The context. Must have been initialized with zstd_init_cctx().
+ * @dst:          The buffer to compress src into.
+ * @dst_capacity: The size of the destination buffer. May be any size, but
+ *                ZSTD_compressBound(srcSize) is guaranteed to be large enough.
+ * @src:          The data to compress.
+ * @src_size:     The size of the data to compress.
+ * @parameters:   The compression parameters to be used.
  *
- * When compressing multiple messages / blocks with the same dictionary it is
- * recommended to load it just once. The ZSTD_CDict merely references the
- * dictBuffer, so it must outlive the returned ZSTD_CDict.
- *
- * Return:         The digested dictionary emplaced into workspace.
+ * Return:        The compressed size or an error, which can be checked using
+ *                zstd_is_error().
  */
-ZSTD_CDict *ZSTD_initCDict(const void *dictBuffer, size_t dictSize,
-       ZSTD_parameters params, void *workspace, size_t workspaceSize);
+size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
+       const void *src, size_t src_size, const zstd_parameters *parameters);
 
-/**
- * ZSTD_compress_usingCDict() - compress src into dst using a ZSTD_CDict
- * @ctx:         The context. Must have been initialized with a workspace at
- *               least as large as ZSTD_CCtxWorkspaceBound(cParams) where
- *               cParams are the compression parameters used to initialize the
- *               cdict.
- * @dst:         The buffer to compress src into.
- * @dstCapacity: The size of the destination buffer. May be any size, but
- *               ZSTD_compressBound(srcSize) is guaranteed to be large enough.
- * @src:         The data to compress.
- * @srcSize:     The size of the data to compress.
- * @cdict:       The digested dictionary to use for compression.
- * @params:      The parameters to use for compression. See ZSTD_getParams().
- *
- * Compression using a digested dictionary. The same dictionary must be used
- * during decompression.
- *
- * Return:       The compressed size or an error, which can be checked using
- *               ZSTD_isError().
- */
-size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
-       const void *src, size_t srcSize, const ZSTD_CDict *cdict);
+/* ======   Single-pass Decompression   ====== */
 
+typedef ZSTD_DCtx zstd_dctx;
 
 /**
- * ZSTD_DDictWorkspaceBound() - memory needed to initialize a ZSTD_DDict
+ * zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx
  *
- * Return:  A lower bound on the size of the workspace that is passed to
- *          ZSTD_initDDict().
- */
-size_t ZSTD_DDictWorkspaceBound(void);
-
-/**
- * struct ZSTD_DDict - a digested dictionary to be used for decompression
+ * Return: A lower bound on the size of the workspace that is passed to
+ *         zstd_init_dctx().
  */
-typedef struct ZSTD_DDict_s ZSTD_DDict;
+size_t zstd_dctx_workspace_bound(void);
 
 /**
- * ZSTD_initDDict() - initialize a digested dictionary for decompression
- * @dictBuffer:    The dictionary to digest. The buffer is referenced by the
- *                 ZSTD_DDict so it must outlive the returned ZSTD_DDict.
- * @dictSize:      The size of the dictionary.
- * @workspace:     The workspace. It must outlive the returned ZSTD_DDict.
- * @workspaceSize: The workspace size. Must be at least
- *                 ZSTD_DDictWorkspaceBound().
- *
- * When decompressing multiple messages / blocks with the same dictionary it is
- * recommended to load it just once. The ZSTD_DDict merely references the
- * dictBuffer, so it must outlive the returned ZSTD_DDict.
+ * zstd_init_dctx() - initialize a zstd decompression context
+ * @workspace:      The workspace to emplace the context into. It must outlive
+ *                  the returned context.
+ * @workspace_size: The size of workspace. Use zstd_dctx_workspace_bound() to
+ *                  determine how large the workspace must be.
  *
- * Return:         The digested dictionary emplaced into workspace.
+ * Return:          A zstd decompression context or NULL on error.
  */
-ZSTD_DDict *ZSTD_initDDict(const void *dictBuffer, size_t dictSize,
-       void *workspace, size_t workspaceSize);
+zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size);
 
 /**
- * ZSTD_decompress_usingDDict() - decompress src into dst using a ZSTD_DDict
- * @ctx:         The decompression context.
- * @dst:         The buffer to decompress src into.
- * @dstCapacity: The size of the destination buffer. Must be at least as large
- *               as the decompressed size. If the caller cannot upper bound the
- *               decompressed size, then it's better to use the streaming API.
- * @src:         The zstd compressed data to decompress. Multiple concatenated
- *               frames and skippable frames are allowed.
- * @srcSize:     The exact size of the data to decompress.
- * @ddict:       The digested dictionary to use for decompression. The same
- *               dictionary must've been used to compress the data.
+ * zstd_decompress_dctx() - decompress zstd compressed src into dst
+ * @dctx:         The decompression context.
+ * @dst:          The buffer to decompress src into.
+ * @dst_capacity: The size of the destination buffer. Must be at least as large
+ *                as the decompressed size. If the caller cannot upper bound the
+ *                decompressed size, then it's better to use the streaming API.
+ * @src:          The zstd compressed data to decompress. Multiple concatenated
+ *                frames and skippable frames are allowed.
+ * @src_size:     The exact size of the data to decompress.
  *
- * Return:       The decompressed size or an error, which can be checked using
- *               ZSTD_isError().
+ * Return:        The decompressed size or an error, which can be checked using
+ *                zstd_is_error().
  */
-size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst,
-       size_t dstCapacity, const void *src, size_t srcSize,
-       const ZSTD_DDict *ddict);
+size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
+       const void *src, size_t src_size);
 
-
-/*-**************************
- * Streaming
- ***************************/
+/* ======   Streaming Buffers   ====== */
 
 /**
- * struct ZSTD_inBuffer - input buffer for streaming
+ * struct zstd_in_buffer - input buffer for streaming
  * @src:  Start of the input buffer.
  * @size: Size of the input buffer.
  * @pos:  Position where reading stopped. Will be updated.
  *        Necessarily 0 <= pos <= size.
+ *
+ * See zstd_lib.h.
  */
-typedef struct ZSTD_inBuffer_s {
-       const void *src;
-       size_t size;
-       size_t pos;
-} ZSTD_inBuffer;
+typedef ZSTD_inBuffer zstd_in_buffer;
 
 /**
- * struct ZSTD_outBuffer - output buffer for streaming
+ * struct zstd_out_buffer - output buffer for streaming
  * @dst:  Start of the output buffer.
  * @size: Size of the output buffer.
  * @pos:  Position where writing stopped. Will be updated.
  *        Necessarily 0 <= pos <= size.
+ *
+ * See zstd_lib.h.
  */
-typedef struct ZSTD_outBuffer_s {
-       void *dst;
-       size_t size;
-       size_t pos;
-} ZSTD_outBuffer;
+typedef ZSTD_outBuffer zstd_out_buffer;
 
+/* ======   Streaming Compression   ====== */
 
-
-/*-*****************************************************************************
- * Streaming compression - HowTo
- *
- * A ZSTD_CStream object is required to track streaming operation.
- * Use ZSTD_initCStream() to initialize a ZSTD_CStream object.
- * ZSTD_CStream objects can be reused multiple times on consecutive compression
- * operations. It is recommended to re-use ZSTD_CStream in situations where many
- * streaming operations will be achieved consecutively. Use one separate
- * ZSTD_CStream per thread for parallel execution.
- *
- * Use ZSTD_compressStream() repetitively to consume input stream.
- * The function will automatically update both `pos` fields.
- * Note that it may not consume the entire input, in which case `pos < size`,
- * and it's up to the caller to present again remaining data.
- * It returns a hint for the preferred number of bytes to use as an input for
- * the next function call.
- *
- * At any moment, it's possible to flush whatever data remains within internal
- * buffer, using ZSTD_flushStream(). `output->pos` will be updated. There might
- * still be some content left within the internal buffer if `output->size` is
- * too small. It returns the number of bytes left in the internal buffer and
- * must be called until it returns 0.
- *
- * ZSTD_endStream() instructs to finish a frame. It will perform a flush and
- * write frame epilogue. The epilogue is required for decoders to consider a
- * frame completed. Similar to ZSTD_flushStream(), it may not be able to flush
- * the full content if `output->size` is too small. In which case, call again
- * ZSTD_endStream() to complete the flush. It returns the number of bytes left
- * in the internal buffer and must be called until it returns 0.
- ******************************************************************************/
+typedef ZSTD_CStream zstd_cstream;
 
 /**
- * ZSTD_CStreamWorkspaceBound() - memory needed to initialize a ZSTD_CStream
- * @cParams: The compression parameters to be used for compression.
+ * zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream
+ * @cparams: The compression parameters to be used for compression.
  *
  * Return:   A lower bound on the size of the workspace that is passed to
- *           ZSTD_initCStream() and ZSTD_initCStream_usingCDict().
- */
-size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams);
-
-/**
- * struct ZSTD_CStream - the zstd streaming compression context
- */
-typedef struct ZSTD_CStream_s ZSTD_CStream;
-
-/*===== ZSTD_CStream management functions =====*/
-/**
- * ZSTD_initCStream() - initialize a zstd streaming compression context
- * @params:         The zstd compression parameters.
- * @pledgedSrcSize: If params.fParams.contentSizeFlag == 1 then the caller must
- *                  pass the source size (zero means empty source). Otherwise,
- *                  the caller may optionally pass the source size, or zero if
- *                  unknown.
- * @workspace:      The workspace to emplace the context into. It must outlive
- *                  the returned context.
- * @workspaceSize:  The size of workspace.
- *                  Use ZSTD_CStreamWorkspaceBound(params.cParams) to determine
- *                  how large the workspace must be.
- *
- * Return:          The zstd streaming compression context.
+ *           zstd_init_cstream().
  */
-ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params,
-       unsigned long long pledgedSrcSize, void *workspace,
-       size_t workspaceSize);
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams);
 
 /**
- * ZSTD_initCStream_usingCDict() - initialize a streaming compression context
- * @cdict:          The digested dictionary to use for compression.
- * @pledgedSrcSize: Optionally the source size, or zero if unknown.
- * @workspace:      The workspace to emplace the context into. It must outlive
- *                  the returned context.
- * @workspaceSize:  The size of workspace. Call ZSTD_CStreamWorkspaceBound()
- *                  with the cParams used to initialize the cdict to determine
- *                  how large the workspace must be.
+ * zstd_init_cstream() - initialize a zstd streaming compression context
+ * @parameters        The zstd parameters to use for compression.
+ * @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller
+ *                    must pass the source size (zero means empty source).
+ *                    Otherwise, the caller may optionally pass the source
+ *                    size, or zero if unknown.
+ * @workspace:        The workspace to emplace the context into. It must outlive
+ *                    the returned context.
+ * @workspace_size:   The size of workspace.
+ *                    Use zstd_cstream_workspace_bound(params->cparams) to
+ *                    determine how large the workspace must be.
  *
- * Return:          The zstd streaming compression context.
+ * Return:            The zstd streaming compression context or NULL on error.
  */
-ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict,
-       unsigned long long pledgedSrcSize, void *workspace,
-       size_t workspaceSize);
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
+       unsigned long long pledged_src_size, void *workspace, size_t workspace_size);
 
-/*===== Streaming compression functions =====*/
 /**
- * ZSTD_resetCStream() - reset the context using parameters from creation
- * @zcs:            The zstd streaming compression context to reset.
- * @pledgedSrcSize: Optionally the source size, or zero if unknown.
+ * zstd_reset_cstream() - reset the context using parameters from creation
+ * @cstream:          The zstd streaming compression context to reset.
+ * @pledged_src_size: Optionally the source size, or zero if unknown.
  *
  * Resets the context using the parameters from creation. Skips dictionary
- * loading, since it can be reused. If `pledgedSrcSize` is non-zero the frame
+ * loading, since it can be reused. If `pledged_src_size` is non-zero the frame
  * content size is always written into the frame header.
  *
- * Return:          Zero or an error, which can be checked using ZSTD_isError().
+ * Return:            Zero or an error, which can be checked using
+ *                    zstd_is_error().
  */
-size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize);
+size_t zstd_reset_cstream(zstd_cstream *cstream,
+       unsigned long long pledged_src_size);
+
 /**
- * ZSTD_compressStream() - streaming compress some of input into output
- * @zcs:    The zstd streaming compression context.
- * @output: Destination buffer. `output->pos` is updated to indicate how much
- *          compressed data was written.
- * @input:  Source buffer. `input->pos` is updated to indicate how much data was
- *          read. Note that it may not consume the entire input, in which case
- *          `input->pos < input->size`, and it's up to the caller to present
- *          remaining data again.
+ * zstd_compress_stream() - streaming compress some of input into output
+ * @cstream: The zstd streaming compression context.
+ * @output:  Destination buffer. `output->pos` is updated to indicate how much
+ *           compressed data was written.
+ * @input:   Source buffer. `input->pos` is updated to indicate how much data
+ *           was read. Note that it may not consume the entire input, in which
+ *           case `input->pos < input->size`, and it's up to the caller to
+ *           present remaining data again.
  *
  * The `input` and `output` buffers may be any size. Guaranteed to make some
  * forward progress if `input` and `output` are not empty.
  *
- * Return:  A hint for the number of bytes to use as the input for the next
- *          function call or an error, which can be checked using
- *          ZSTD_isError().
+ * Return:   A hint for the number of bytes to use as the input for the next
+ *           function call or an error, which can be checked using
+ *           zstd_is_error().
  */
-size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output,
-       ZSTD_inBuffer *input);
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+       zstd_in_buffer *input);
+
 /**
- * ZSTD_flushStream() - flush internal buffers into output
- * @zcs:    The zstd streaming compression context.
- * @output: Destination buffer. `output->pos` is updated to indicate how much
- *          compressed data was written.
+ * zstd_flush_stream() - flush internal buffers into output
+ * @cstream: The zstd streaming compression context.
+ * @output:  Destination buffer. `output->pos` is updated to indicate how much
+ *           compressed data was written.
  *
- * ZSTD_flushStream() must be called until it returns 0, meaning all the data
- * has been flushed. Since ZSTD_flushStream() causes a block to be ended,
+ * zstd_flush_stream() must be called until it returns 0, meaning all the data
+ * has been flushed. Since zstd_flush_stream() causes a block to be ended,
  * calling it too often will degrade the compression ratio.
  *
- * Return:  The number of bytes still present within internal buffers or an
- *          error, which can be checked using ZSTD_isError().
+ * Return:   The number of bytes still present within internal buffers or an
+ *           error, which can be checked using zstd_is_error().
  */
-size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output);
-/**
- * ZSTD_endStream() - flush internal buffers into output and end the frame
- * @zcs:    The zstd streaming compression context.
- * @output: Destination buffer. `output->pos` is updated to indicate how much
- *          compressed data was written.
- *
- * ZSTD_endStream() must be called until it returns 0, meaning all the data has
- * been flushed and the frame epilogue has been written.
- *
- * Return:  The number of bytes still present within internal buffers or an
- *          error, which can be checked using ZSTD_isError().
- */
-size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output);
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output);
 
 /**
- * ZSTD_CStreamInSize() - recommended size for the input buffer
- *
- * Return: The recommended size for the input buffer.
- */
-size_t ZSTD_CStreamInSize(void);
-/**
- * ZSTD_CStreamOutSize() - recommended size for the output buffer
+ * zstd_end_stream() - flush internal buffers into output and end the frame
+ * @cstream: The zstd streaming compression context.
+ * @output:  Destination buffer. `output->pos` is updated to indicate how much
+ *           compressed data was written.
  *
- * When the output buffer is at least this large, it is guaranteed to be large
- * enough to flush at least one complete compressed block.
+ * zstd_end_stream() must be called until it returns 0, meaning all the data has
+ * been flushed and the frame epilogue has been written.
  *
- * Return: The recommended size for the output buffer.
+ * Return:   The number of bytes still present within internal buffers or an
+ *           error, which can be checked using zstd_is_error().
  */
-size_t ZSTD_CStreamOutSize(void);
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output);
 
+/* ======   Streaming Decompression   ====== */
 
-
-/*-*****************************************************************************
- * Streaming decompression - HowTo
- *
- * A ZSTD_DStream object is required to track streaming operations.
- * Use ZSTD_initDStream() to initialize a ZSTD_DStream object.
- * ZSTD_DStream objects can be re-used multiple times.
- *
- * Use ZSTD_decompressStream() repetitively to consume your input.
- * The function will update both `pos` fields.
- * If `input->pos < input->size`, some input has not been consumed.
- * It's up to the caller to present again remaining data.
- * If `output->pos < output->size`, decoder has flushed everything it could.
- * Returns 0 iff a frame is completely decoded and fully flushed.
- * Otherwise it returns a suggested next input size that will never load more
- * than the current frame.
- ******************************************************************************/
+typedef ZSTD_DStream zstd_dstream;
 
 /**
- * ZSTD_DStreamWorkspaceBound() - memory needed to initialize a ZSTD_DStream
- * @maxWindowSize: The maximum window size allowed for compressed frames.
+ * zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream
+ * @max_window_size: The maximum window size allowed for compressed frames.
  *
- * Return:         A lower bound on the size of the workspace that is passed to
- *                 ZSTD_initDStream() and ZSTD_initDStream_usingDDict().
+ * Return:           A lower bound on the size of the workspace that is passed
+ *                   to zstd_init_dstream().
  */
-size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize);
+size_t zstd_dstream_workspace_bound(size_t max_window_size);
 
 /**
- * struct ZSTD_DStream - the zstd streaming decompression context
- */
-typedef struct ZSTD_DStream_s ZSTD_DStream;
-/*===== ZSTD_DStream management functions =====*/
-/**
- * ZSTD_initDStream() - initialize a zstd streaming decompression context
- * @maxWindowSize: The maximum window size allowed for compressed frames.
- * @workspace:     The workspace to emplace the context into. It must outlive
- *                 the returned context.
- * @workspaceSize: The size of workspace.
- *                 Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine
- *                 how large the workspace must be.
- *
- * Return:         The zstd streaming decompression context.
- */
-ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace,
-       size_t workspaceSize);
-/**
- * ZSTD_initDStream_usingDDict() - initialize streaming decompression context
- * @maxWindowSize: The maximum window size allowed for compressed frames.
- * @ddict:         The digested dictionary to use for decompression.
- * @workspace:     The workspace to emplace the context into. It must outlive
- *                 the returned context.
- * @workspaceSize: The size of workspace.
- *                 Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine
- *                 how large the workspace must be.
+ * zstd_init_dstream() - initialize a zstd streaming decompression context
+ * @max_window_size: The maximum window size allowed for compressed frames.
+ * @workspace:       The workspace to emplace the context into. It must outlive
+ *                   the returned context.
+ * @workspaceSize:   The size of workspace.
+ *                   Use zstd_dstream_workspace_bound(max_window_size) to
+ *                   determine how large the workspace must be.
  *
- * Return:         The zstd streaming decompression context.
+ * Return:           The zstd streaming decompression context.
  */
-ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize,
-       const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize);
+zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
+       size_t workspace_size);
 
-/*===== Streaming decompression functions =====*/
 /**
- * ZSTD_resetDStream() - reset the context using parameters from creation
- * @zds:   The zstd streaming decompression context to reset.
+ * zstd_reset_dstream() - reset the context using parameters from creation
+ * @dstream: The zstd streaming decompression context to reset.
  *
  * Resets the context using the parameters from creation. Skips dictionary
  * loading, since it can be reused.
  *
- * Return: Zero or an error, which can be checked using ZSTD_isError().
+ * Return:   Zero or an error, which can be checked using zstd_is_error().
  */
-size_t ZSTD_resetDStream(ZSTD_DStream *zds);
+size_t zstd_reset_dstream(zstd_dstream *dstream);
+
 /**
- * ZSTD_decompressStream() - streaming decompress some of input into output
- * @zds:    The zstd streaming decompression context.
- * @output: Destination buffer. `output.pos` is updated to indicate how much
- *          decompressed data was written.
- * @input:  Source buffer. `input.pos` is updated to indicate how much data was
- *          read. Note that it may not consume the entire input, in which case
- *          `input.pos < input.size`, and it's up to the caller to present
- *          remaining data again.
+ * zstd_decompress_stream() - streaming decompress some of input into output
+ * @dstream: The zstd streaming decompression context.
+ * @output:  Destination buffer. `output.pos` is updated to indicate how much
+ *           decompressed data was written.
+ * @input:   Source buffer. `input.pos` is updated to indicate how much data was
+ *           read. Note that it may not consume the entire input, in which case
+ *           `input.pos < input.size`, and it's up to the caller to present
+ *           remaining data again.
  *
  * The `input` and `output` buffers may be any size. Guaranteed to make some
  * forward progress if `input` and `output` are not empty.
- * ZSTD_decompressStream() will not consume the last byte of the frame until
+ * zstd_decompress_stream() will not consume the last byte of the frame until
  * the entire frame is flushed.
  *
- * Return:  Returns 0 iff a frame is completely decoded and fully flushed.
- *          Otherwise returns a hint for the number of bytes to use as the input
- *          for the next function call or an error, which can be checked using
- *          ZSTD_isError(). The size hint will never load more than the frame.
+ * Return:   Returns 0 iff a frame is completely decoded and fully flushed.
+ *           Otherwise returns a hint for the number of bytes to use as the
+ *           input for the next function call or an error, which can be checked
+ *           using zstd_is_error(). The size hint will never load more than the
+ *           frame.
  */
-size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output,
-       ZSTD_inBuffer *input);
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+       zstd_in_buffer *input);
 
-/**
- * ZSTD_DStreamInSize() - recommended size for the input buffer
- *
- * Return: The recommended size for the input buffer.
- */
-size_t ZSTD_DStreamInSize(void);
-/**
- * ZSTD_DStreamOutSize() - recommended size for the output buffer
- *
- * When the output buffer is at least this large, it is guaranteed to be large
- * enough to flush at least one complete decompressed block.
- *
- * Return: The recommended size for the output buffer.
- */
-size_t ZSTD_DStreamOutSize(void);
-
-
-/* --- Constants ---*/
-#define ZSTD_MAGICNUMBER            0xFD2FB528   /* >= v0.8.0 */
-#define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50U
-
-#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
-#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
-
-#define ZSTD_WINDOWLOG_MAX_32  27
-#define ZSTD_WINDOWLOG_MAX_64  27
-#define ZSTD_WINDOWLOG_MAX \
-       ((unsigned int)(sizeof(size_t) == 4 \
-               ? ZSTD_WINDOWLOG_MAX_32 \
-               : ZSTD_WINDOWLOG_MAX_64))
-#define ZSTD_WINDOWLOG_MIN 10
-#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX
-#define ZSTD_HASHLOG_MIN        6
-#define ZSTD_CHAINLOG_MAX     (ZSTD_WINDOWLOG_MAX+1)
-#define ZSTD_CHAINLOG_MIN      ZSTD_HASHLOG_MIN
-#define ZSTD_HASHLOG3_MAX      17
-#define ZSTD_SEARCHLOG_MAX    (ZSTD_WINDOWLOG_MAX-1)
-#define ZSTD_SEARCHLOG_MIN      1
-/* only for ZSTD_fast, other strategies are limited to 6 */
-#define ZSTD_SEARCHLENGTH_MAX   7
-/* only for ZSTD_btopt, other strategies are limited to 4 */
-#define ZSTD_SEARCHLENGTH_MIN   3
-#define ZSTD_TARGETLENGTH_MIN   4
-#define ZSTD_TARGETLENGTH_MAX 999
-
-/* for static allocation */
-#define ZSTD_FRAMEHEADERSIZE_MAX 18
-#define ZSTD_FRAMEHEADERSIZE_MIN  6
-#define ZSTD_frameHeaderSize_prefix 5
-#define ZSTD_frameHeaderSize_min ZSTD_FRAMEHEADERSIZE_MIN
-#define ZSTD_frameHeaderSize_max ZSTD_FRAMEHEADERSIZE_MAX
-/* magic number + skippable frame length */
-#define ZSTD_skippableHeaderSize 8
-
-
-/*-*************************************
- * Compressed size functions
- **************************************/
-
-/**
- * ZSTD_findFrameCompressedSize() - returns the size of a compressed frame
- * @src:     Source buffer. It should point to the start of a zstd encoded frame
- *           or a skippable frame.
- * @srcSize: The size of the source buffer. It must be at least as large as the
- *           size of the frame.
- *
- * Return:   The compressed size of the frame pointed to by `src` or an error,
- *           which can be check with ZSTD_isError().
- *           Suitable to pass to ZSTD_decompress() or similar functions.
- */
-size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize);
-
-/*-*************************************
- * Decompressed size functions
- **************************************/
-/**
- * ZSTD_getFrameContentSize() - returns the content size in a zstd frame header
- * @src:     It should point to the start of a zstd encoded frame.
- * @srcSize: The size of the source buffer. It must be at least as large as the
- *           frame header. `ZSTD_frameHeaderSize_max` is always large enough.
- *
- * Return:   The frame content size stored in the frame header if known.
- *           `ZSTD_CONTENTSIZE_UNKNOWN` if the content size isn't stored in the
- *           frame header. `ZSTD_CONTENTSIZE_ERROR` on invalid input.
- */
-unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+/* ======   Frame Inspection Functions ====== */
 
 /**
- * ZSTD_findDecompressedSize() - returns decompressed size of a series of frames
- * @src:     It should point to the start of a series of zstd encoded and/or
- *           skippable frames.
- * @srcSize: The exact size of the series of frames.
+ * zstd_find_frame_compressed_size() - returns the size of a compressed frame
+ * @src:      Source buffer. It should point to the start of a zstd encoded
+ *            frame or a skippable frame.
+ * @src_size: The size of the source buffer. It must be at least as large as the
+ *            size of the frame.
  *
- * If any zstd encoded frame in the series doesn't have the frame content size
- * set, `ZSTD_CONTENTSIZE_UNKNOWN` is returned. But frame content size is always
- * set when using ZSTD_compress(). The decompressed size can be very large.
- * If the source is untrusted, the decompressed size could be wrong or
- * intentionally modified. Always ensure the result fits within the
- * application's authorized limits. ZSTD_findDecompressedSize() handles multiple
- * frames, and so it must traverse the input to read each frame header. This is
- * efficient as most of the data is skipped, however it does mean that all frame
- * data must be present and valid.
- *
- * Return:   Decompressed size of all the data contained in the frames if known.
- *           `ZSTD_CONTENTSIZE_UNKNOWN` if the decompressed size is unknown.
- *           `ZSTD_CONTENTSIZE_ERROR` if an error occurred.
- */
-unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize);
-
-/*-*************************************
- * Advanced compression functions
- **************************************/
-/**
- * ZSTD_checkCParams() - ensure parameter values remain within authorized range
- * @cParams: The zstd compression parameters.
- *
- * Return:   Zero or an error, which can be checked using ZSTD_isError().
+ * Return:    The compressed size of the frame pointed to by `src` or an error,
+ *            which can be check with zstd_is_error().
+ *            Suitable to pass to ZSTD_decompress() or similar functions.
  */
-size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams);
+size_t zstd_find_frame_compressed_size(const void *src, size_t src_size);
 
 /**
- * ZSTD_adjustCParams() - optimize parameters for a given srcSize and dictSize
- * @srcSize:  Optionally the estimated source size, or zero if unknown.
- * @dictSize: Optionally the estimated dictionary size, or zero if unknown.
- *
- * Return:    The optimized parameters.
- */
-ZSTD_compressionParameters ZSTD_adjustCParams(
-       ZSTD_compressionParameters cParams, unsigned long long srcSize,
-       size_t dictSize);
-
-/*--- Advanced decompression functions ---*/
-
-/**
- * ZSTD_isFrame() - returns true iff the buffer starts with a valid frame
- * @buffer: The source buffer to check.
- * @size:   The size of the source buffer, must be at least 4 bytes.
- *
- * Return: True iff the buffer starts with a zstd or skippable frame identifier.
- */
-unsigned int ZSTD_isFrame(const void *buffer, size_t size);
-
-/**
- * ZSTD_getDictID_fromDict() - returns the dictionary id stored in a dictionary
- * @dict:     The dictionary buffer.
- * @dictSize: The size of the dictionary buffer.
- *
- * Return:    The dictionary id stored within the dictionary or 0 if the
- *            dictionary is not a zstd dictionary. If it returns 0 the
- *            dictionary can still be loaded as a content-only dictionary.
- */
-unsigned int ZSTD_getDictID_fromDict(const void *dict, size_t dictSize);
-
-/**
- * ZSTD_getDictID_fromDDict() - returns the dictionary id stored in a ZSTD_DDict
- * @ddict: The ddict to find the id of.
- *
- * Return: The dictionary id stored within `ddict` or 0 if the dictionary is not
- *         a zstd dictionary. If it returns 0 `ddict` will be loaded as a
- *         content-only dictionary.
- */
-unsigned int ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict);
-
-/**
- * ZSTD_getDictID_fromFrame() - returns the dictionary id stored in a zstd frame
- * @src:     Source buffer. It must be a zstd encoded frame.
- * @srcSize: The size of the source buffer. It must be at least as large as the
- *           frame header. `ZSTD_frameHeaderSize_max` is always large enough.
- *
- * Return:   The dictionary id required to decompress the frame stored within
- *           `src` or 0 if the dictionary id could not be decoded. It can return
- *           0 if the frame does not require a dictionary, the dictionary id
- *           wasn't stored in the frame, `src` is not a zstd frame, or `srcSize`
- *           is too small.
- */
-unsigned int ZSTD_getDictID_fromFrame(const void *src, size_t srcSize);
-
-/**
- * struct ZSTD_frameParams - zstd frame parameters stored in the frame header
- * @frameContentSize: The frame content size, or 0 if not present.
+ * struct zstd_frame_params - zstd frame parameters stored in the frame header
+ * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not
+ *                    present.
  * @windowSize:       The window size, or 0 if the frame is a skippable frame.
+ * @blockSizeMax:     The maximum block size.
+ * @frameType:        The frame type (zstd or skippable)
+ * @headerSize:       The size of the frame header.
  * @dictID:           The dictionary id, or 0 if not present.
  * @checksumFlag:     Whether a checksum was used.
+ *
+ * See zstd_lib.h.
  */
-typedef struct {
-       unsigned long long frameContentSize;
-       unsigned int windowSize;
-       unsigned int dictID;
-       unsigned int checksumFlag;
-} ZSTD_frameParams;
+typedef ZSTD_frameHeader zstd_frame_header;
 
 /**
- * ZSTD_getFrameParams() - extracts parameters from a zstd or skippable frame
- * @fparamsPtr: On success the frame parameters are written here.
- * @src:        The source buffer. It must point to a zstd or skippable frame.
- * @srcSize:    The size of the source buffer. `ZSTD_frameHeaderSize_max` is
- *              always large enough to succeed.
+ * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame
+ * @params:   On success the frame parameters are written here.
+ * @src:      The source buffer. It must point to a zstd or skippable frame.
+ * @src_size: The size of the source buffer.
  *
- * Return:      0 on success. If more data is required it returns how many bytes
- *              must be provided to make forward progress. Otherwise it returns
- *              an error, which can be checked using ZSTD_isError().
+ * Return:    0 on success. If more data is required it returns how many bytes
+ *            must be provided to make forward progress. Otherwise it returns
+ *            an error, which can be checked using zstd_is_error().
  */
-size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src,
-       size_t srcSize);
-
-/*-*****************************************************************************
- * Buffer-less and synchronous inner streaming functions
- *
- * This is an advanced API, giving full control over buffer management, for
- * users which need direct control over memory.
- * But it's also a complex one, with many restrictions (documented below).
- * Prefer using normal streaming API for an easier experience
- ******************************************************************************/
-
-/*-*****************************************************************************
- * Buffer-less streaming compression (synchronous mode)
- *
- * A ZSTD_CCtx object is required to track streaming operations.
- * Use ZSTD_initCCtx() to initialize a context.
- * ZSTD_CCtx object can be re-used multiple times within successive compression
- * operations.
- *
- * Start by initializing a context.
- * Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary
- * compression,
- * or ZSTD_compressBegin_advanced(), for finer parameter control.
- * It's also possible to duplicate a reference context which has already been
- * initialized, using ZSTD_copyCCtx()
- *
- * Then, consume your input using ZSTD_compressContinue().
- * There are some important considerations to keep in mind when using this
- * advanced function :
- * - ZSTD_compressContinue() has no internal buffer. It uses externally provided
- *   buffer only.
- * - Interface is synchronous : input is consumed entirely and produce 1+
- *   (or more) compressed blocks.
- * - Caller must ensure there is enough space in `dst` to store compressed data
- *   under worst case scenario. Worst case evaluation is provided by
- *   ZSTD_compressBound().
- *   ZSTD_compressContinue() doesn't guarantee recover after a failed
- *   compression.
- * - ZSTD_compressContinue() presumes prior input ***is still accessible and
- *   unmodified*** (up to maximum distance size, see WindowLog).
- *   It remembers all previous contiguous blocks, plus one separated memory
- *   segment (which can itself consists of multiple contiguous blocks)
- * - ZSTD_compressContinue() detects that prior input has been overwritten when
- *   `src` buffer overlaps. In which case, it will "discard" the relevant memory
- *   section from its history.
- *
- * Finish a frame with ZSTD_compressEnd(), which will write the last block(s)
- * and optional checksum. It's possible to use srcSize==0, in which case, it
- * will write a final empty block to end the frame. Without last block mark,
- * frames will be considered unfinished (corrupted) by decoders.
- *
- * `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new
- * frame.
- ******************************************************************************/
-
-/*=====   Buffer-less streaming compression functions  =====*/
-size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel);
-size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict,
-       size_t dictSize, int compressionLevel);
-size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict,
-       size_t dictSize, ZSTD_parameters params,
-       unsigned long long pledgedSrcSize);
-size_t ZSTD_copyCCtx(ZSTD_CCtx *cctx, const ZSTD_CCtx *preparedCCtx,
-       unsigned long long pledgedSrcSize);
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict,
-       unsigned long long pledgedSrcSize);
-size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
-       const void *src, size_t srcSize);
-size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
-       const void *src, size_t srcSize);
-
-
-
-/*-*****************************************************************************
- * Buffer-less streaming decompression (synchronous mode)
- *
- * A ZSTD_DCtx object is required to track streaming operations.
- * Use ZSTD_initDCtx() to initialize a context.
- * A ZSTD_DCtx object can be re-used multiple times.
- *
- * First typical operation is to retrieve frame parameters, using
- * ZSTD_getFrameParams(). It fills a ZSTD_frameParams structure which provide
- * important information to correctly decode the frame, such as the minimum
- * rolling buffer size to allocate to decompress data (`windowSize`), and the
- * dictionary ID used.
- * Note: content size is optional, it may not be present. 0 means unknown.
- * Note that these values could be wrong, either because of data malformation,
- * or because an attacker is spoofing deliberate false information. As a
- * consequence, check that values remain within valid application range,
- * especially `windowSize`, before allocation. Each application can set its own
- * limit, depending on local restrictions. For extended interoperability, it is
- * recommended to support at least 8 MB.
- * Frame parameters are extracted from the beginning of the compressed frame.
- * Data fragment must be large enough to ensure successful decoding, typically
- * `ZSTD_frameHeaderSize_max` bytes.
- * Result: 0: successful decoding, the `ZSTD_frameParams` structure is filled.
- *        >0: `srcSize` is too small, provide at least this many bytes.
- *        errorCode, which can be tested using ZSTD_isError().
- *
- * Start decompression, with ZSTD_decompressBegin() or
- * ZSTD_decompressBegin_usingDict(). Alternatively, you can copy a prepared
- * context, using ZSTD_copyDCtx().
- *
- * Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue()
- * alternatively.
- * ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize'
- * to ZSTD_decompressContinue().
- * ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will
- * fail.
- *
- * The result of ZSTD_decompressContinue() is the number of bytes regenerated
- * within 'dst' (necessarily <= dstCapacity). It can be zero, which is not an
- * error; it just means ZSTD_decompressContinue() has decoded some metadata
- * item. It can also be an error code, which can be tested with ZSTD_isError().
- *
- * ZSTD_decompressContinue() needs previous data blocks during decompression, up
- * to `windowSize`. They should preferably be located contiguously, prior to
- * current block. Alternatively, a round buffer of sufficient size is also
- * possible. Sufficient size is determined by frame parameters.
- * ZSTD_decompressContinue() is very sensitive to contiguity, if 2 blocks don't
- * follow each other, make sure that either the compressor breaks contiguity at
- * the same place, or that previous contiguous segment is large enough to
- * properly handle maximum back-reference.
- *
- * A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
- * Context can then be reset to start a new decompression.
- *
- * Note: it's possible to know if next input to present is a header or a block,
- * using ZSTD_nextInputType(). This information is not required to properly
- * decode a frame.
- *
- * == Special case: skippable frames ==
- *
- * Skippable frames allow integration of user-defined data into a flow of
- * concatenated frames. Skippable frames will be ignored (skipped) by a
- * decompressor. The format of skippable frames is as follows:
- * a) Skippable frame ID - 4 Bytes, Little endian format, any value from
- *    0x184D2A50 to 0x184D2A5F
- * b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
- * c) Frame Content - any content (User Data) of length equal to Frame Size
- * For skippable frames ZSTD_decompressContinue() always returns 0.
- * For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0
- * what means that a frame is skippable.
- * Note: If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might
- *       actually be a zstd encoded frame with no content. For purposes of
- *       decompression, it is valid in both cases to skip the frame using
- *       ZSTD_findFrameCompressedSize() to find its size in bytes.
- * It also returns frame size as fparamsPtr->frameContentSize.
- ******************************************************************************/
-
-/*=====   Buffer-less streaming decompression functions  =====*/
-size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx);
-size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict,
-       size_t dictSize);
-void   ZSTD_copyDCtx(ZSTD_DCtx *dctx, const ZSTD_DCtx *preparedDCtx);
-size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx);
-size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity,
-       const void *src, size_t srcSize);
-typedef enum {
-       ZSTDnit_frameHeader,
-       ZSTDnit_blockHeader,
-       ZSTDnit_block,
-       ZSTDnit_lastBlock,
-       ZSTDnit_checksum,
-       ZSTDnit_skippableFrame
-} ZSTD_nextInputType_e;
-ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx);
-
-/*-*****************************************************************************
- * Block functions
- *
- * Block functions produce and decode raw zstd blocks, without frame metadata.
- * Frame metadata cost is typically ~18 bytes, which can be non-negligible for
- * very small blocks (< 100 bytes). User will have to take in charge required
- * information to regenerate data, such as compressed and content sizes.
- *
- * A few rules to respect:
- * - Compressing and decompressing require a context structure
- *   + Use ZSTD_initCCtx() and ZSTD_initDCtx()
- * - It is necessary to init context before starting
- *   + compression : ZSTD_compressBegin()
- *   + decompression : ZSTD_decompressBegin()
- *   + variants _usingDict() are also allowed
- *   + copyCCtx() and copyDCtx() work too
- * - Block size is limited, it must be <= ZSTD_getBlockSizeMax()
- *   + If you need to compress more, cut data into multiple blocks
- *   + Consider using the regular ZSTD_compress() instead, as frame metadata
- *     costs become negligible when source size is large.
- * - When a block is considered not compressible enough, ZSTD_compressBlock()
- *   result will be zero. In which case, nothing is produced into `dst`.
- *   + User must test for such outcome and deal directly with uncompressed data
- *   + ZSTD_decompressBlock() doesn't accept uncompressed data as input!!!
- *   + In case of multiple successive blocks, decoder must be informed of
- *     uncompressed block existence to follow proper history. Use
- *     ZSTD_insertBlock() in such a case.
- ******************************************************************************/
-
-/* Define for static allocation */
-#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024)
-/*=====   Raw zstd block functions  =====*/
-size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx);
-size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity,
-       const void *src, size_t srcSize);
-size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity,
-       const void *src, size_t srcSize);
-size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart,
-       size_t blockSize);
+size_t zstd_get_frame_header(zstd_frame_header *params, const void *src,
+       size_t src_size);
 
-#endif  /* ZSTD_H */
+#endif  /* LINUX_ZSTD_H */
diff --git a/include/linux/zstd_errors.h b/include/linux/zstd_errors.h
new file mode 100644 (file)
index 0000000..58b6dd4
--- /dev/null
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_ERRORS_H_398273423
+#define ZSTD_ERRORS_H_398273423
+
+
+/*===== dependency =====*/
+#include <linux/types.h>   /* size_t */
+
+
+/* =====   ZSTDERRORLIB_API : control library symbols visibility   ===== */
+#define ZSTDERRORLIB_VISIBILITY 
+#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY
+
+/*-*********************************************
+ *  Error codes list
+ *-*********************************************
+ *  Error codes _values_ are pinned down since v1.3.1 only.
+ *  Therefore, don't rely on values if you may link to any version < v1.3.1.
+ *
+ *  Only values < 100 are considered stable.
+ *
+ *  note 1 : this API shall be used with static linking only.
+ *           dynamic linking is not yet officially supported.
+ *  note 2 : Prefer relying on the enum than on its value whenever possible
+ *           This is the only supported way to use the error list < v1.3.1
+ *  note 3 : ZSTD_isError() is always correct, whatever the library version.
+ **********************************************/
+typedef enum {
+  ZSTD_error_no_error = 0,
+  ZSTD_error_GENERIC  = 1,
+  ZSTD_error_prefix_unknown                = 10,
+  ZSTD_error_version_unsupported           = 12,
+  ZSTD_error_frameParameter_unsupported    = 14,
+  ZSTD_error_frameParameter_windowTooLarge = 16,
+  ZSTD_error_corruption_detected = 20,
+  ZSTD_error_checksum_wrong      = 22,
+  ZSTD_error_dictionary_corrupted      = 30,
+  ZSTD_error_dictionary_wrong          = 32,
+  ZSTD_error_dictionaryCreation_failed = 34,
+  ZSTD_error_parameter_unsupported   = 40,
+  ZSTD_error_parameter_outOfBound    = 42,
+  ZSTD_error_tableLog_tooLarge       = 44,
+  ZSTD_error_maxSymbolValue_tooLarge = 46,
+  ZSTD_error_maxSymbolValue_tooSmall = 48,
+  ZSTD_error_stage_wrong       = 60,
+  ZSTD_error_init_missing      = 62,
+  ZSTD_error_memory_allocation = 64,
+  ZSTD_error_workSpace_tooSmall= 66,
+  ZSTD_error_dstSize_tooSmall = 70,
+  ZSTD_error_srcSize_wrong    = 72,
+  ZSTD_error_dstBuffer_null   = 74,
+  /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */
+  ZSTD_error_frameIndex_tooLarge = 100,
+  ZSTD_error_seekableIO          = 102,
+  ZSTD_error_dstBuffer_wrong     = 104,
+  ZSTD_error_srcBuffer_wrong     = 105,
+  ZSTD_error_maxCode = 120  /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */
+} ZSTD_ErrorCode;
+
+/*! ZSTD_getErrorCode() :
+    convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
+    which can be used to compare with enum list published above */
+ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
+ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code);   /*< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */
+
+
+
+#endif /* ZSTD_ERRORS_H_398273423 */
diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h
new file mode 100644 (file)
index 0000000..b8c7dbf
--- /dev/null
@@ -0,0 +1,2432 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_H_235446
+#define ZSTD_H_235446
+
+/* ======   Dependency   ======*/
+#include <linux/limits.h>   /* INT_MAX */
+#include <linux/types.h>   /* size_t */
+
+
+/* =====   ZSTDLIB_API : control library symbols visibility   ===== */
+#define ZSTDLIB_VISIBILITY 
+#define ZSTDLIB_API ZSTDLIB_VISIBILITY
+
+
+/* *****************************************************************************
+  Introduction
+
+  zstd, short for Zstandard, is a fast lossless compression algorithm, targeting
+  real-time compression scenarios at zlib-level and better compression ratios.
+  The zstd compression library provides in-memory compression and decompression
+  functions.
+
+  The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
+  which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
+  caution, as they require more memory. The library also offers negative
+  compression levels, which extend the range of speed vs. ratio preferences.
+  The lower the level, the faster the speed (at the cost of compression).
+
+  Compression can be done in:
+    - a single step (described as Simple API)
+    - a single step, reusing a context (described as Explicit context)
+    - unbounded multiple steps (described as Streaming compression)
+
+  The compression ratio achievable on small data can be highly improved using
+  a dictionary. Dictionary compression can be performed in:
+    - a single step (described as Simple dictionary API)
+    - a single step, reusing a dictionary (described as Bulk-processing
+      dictionary API)
+
+  Advanced experimental functions can be accessed using
+  `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h.
+
+  Advanced experimental APIs should never be used with a dynamically-linked
+  library. They are not "stable"; their definitions or signatures may change in
+  the future. Only static linking is allowed.
+*******************************************************************************/
+
+/*------   Version   ------*/
+#define ZSTD_VERSION_MAJOR    1
+#define ZSTD_VERSION_MINOR    4
+#define ZSTD_VERSION_RELEASE  10
+#define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
+
+/*! ZSTD_versionNumber() :
+ *  Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */
+ZSTDLIB_API unsigned ZSTD_versionNumber(void);
+
+#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE
+#define ZSTD_QUOTE(str) #str
+#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
+#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
+
+/*! ZSTD_versionString() :
+ *  Return runtime library version, like "1.4.5". Requires v1.3.0+. */
+ZSTDLIB_API const char* ZSTD_versionString(void);
+
+/* *************************************
+ *  Default constant
+ ***************************************/
+#ifndef ZSTD_CLEVEL_DEFAULT
+#  define ZSTD_CLEVEL_DEFAULT 3
+#endif
+
+/* *************************************
+ *  Constants
+ ***************************************/
+
+/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */
+#define ZSTD_MAGICNUMBER            0xFD2FB528    /* valid since v0.8.0 */
+#define ZSTD_MAGIC_DICTIONARY       0xEC30A437    /* valid since v0.7.0 */
+#define ZSTD_MAGIC_SKIPPABLE_START  0x184D2A50    /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */
+#define ZSTD_MAGIC_SKIPPABLE_MASK   0xFFFFFFF0
+
+#define ZSTD_BLOCKSIZELOG_MAX  17
+#define ZSTD_BLOCKSIZE_MAX     (1<<ZSTD_BLOCKSIZELOG_MAX)
+
+
+
+/* *************************************
+*  Simple API
+***************************************/
+/*! ZSTD_compress() :
+ *  Compresses `src` content as a single zstd compressed frame into already allocated `dst`.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ *  @return : compressed size written into `dst` (<= `dstCapacity),
+ *            or an error code if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                                  int compressionLevel);
+
+/*! ZSTD_decompress() :
+ *  `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames.
+ *  `dstCapacity` is an upper bound of originalSize to regenerate.
+ *  If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data.
+ *  @return : the number of bytes decompressed into `dst` (<= `dstCapacity`),
+ *            or an errorCode if it fails (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity,
+                              const void* src, size_t compressedSize);
+
+/*! ZSTD_getFrameContentSize() : requires v1.3.0+
+ *  `src` should point to the start of a ZSTD encoded frame.
+ *  `srcSize` must be at least as large as the frame header.
+ *            hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
+ *  @return : - decompressed size of `src` frame content, if known
+ *            - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *            - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
+ *   note 1 : a 0 return value means the frame is valid but "empty".
+ *   note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *            Optionally, application can rely on some implicit limit,
+ *            as ZSTD_decompress() only needs an upper bound of decompressed size.
+ *            (For example, data could be necessarily cut into blocks <= 16 KB).
+ *   note 3 : decompressed size is always present when compression is completed using single-pass functions,
+ *            such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
+ *   note 4 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure return value fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 6 : This function replaces ZSTD_getDecompressedSize() */
+#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
+ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
+
+/*! ZSTD_getDecompressedSize() :
+ *  NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize().
+ *  Both functions work the same way, but ZSTD_getDecompressedSize() blends
+ *  "empty", "unknown" and "error" results to the same return value (0),
+ *  while ZSTD_getFrameContentSize() gives them separate return values.
+ * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
+ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_findFrameCompressedSize() :
+ * `src` should point to the start of a ZSTD frame or skippable frame.
+ * `srcSize` must be >= first frame size
+ * @return : the compressed size of the first frame starting at `src`,
+ *           suitable to pass as `srcSize` to `ZSTD_decompress` or similar,
+ *        or an error code if input is invalid */
+ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize);
+
+
+/*======  Helper functions  ======*/
+#define ZSTD_COMPRESSBOUND(srcSize)   ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
+ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
+ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
+ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
+ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed */
+ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
+
+
+/* *************************************
+*  Explicit context
+***************************************/
+/*= Compression context
+ *  When compressing many times,
+ *  it is recommended to allocate a context just once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Note : re-using context is just a speed / resource optimization.
+ *         It doesn't change the compression ratio, which remains identical.
+ *  Note 2 : In multi-threaded environments,
+ *         use one different context per thread for parallel execution.
+ */
+typedef struct ZSTD_CCtx_s ZSTD_CCtx;
+ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* accept NULL pointer */
+
+/*! ZSTD_compressCCtx() :
+ *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
+ *  Important : in order to behave similarly to `ZSTD_compress()`,
+ *  this function compresses at requested compression level,
+ *  __ignoring any other parameter__ .
+ *  If any advanced parameter was set using the advanced API,
+ *  they will all be reset. Only `compressionLevel` remains.
+ */
+ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
+                                     void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                                     int compressionLevel);
+
+/*= Decompression context
+ *  When decompressing many times,
+ *  it is recommended to allocate a context only once,
+ *  and re-use it for each successive compression operation.
+ *  This will make workload friendlier for system's memory.
+ *  Use one context per thread for parallel execution. */
+typedef struct ZSTD_DCtx_s ZSTD_DCtx;
+ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void);
+ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);  /* accept NULL pointer */
+
+/*! ZSTD_decompressDCtx() :
+ *  Same as ZSTD_decompress(),
+ *  requires an allocated ZSTD_DCtx.
+ *  Compatible with sticky parameters.
+ */
+ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize);
+
+
+/* *************************************
+*  Advanced compression API
+***************************************/
+
+/* API design :
+ *   Parameters are pushed one by one into an existing context,
+ *   using ZSTD_CCtx_set*() functions.
+ *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
+ *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
+ *   __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
+ *
+ *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
+ *
+ *   This API supercedes all other "advanced" API entry points in the experimental section.
+ *   In the future, we expect to remove from experimental API entry points which are redundant with this API.
+ */
+
+
+/* Compression strategies, listed from fastest to strongest */
+typedef enum { ZSTD_fast=1,
+               ZSTD_dfast=2,
+               ZSTD_greedy=3,
+               ZSTD_lazy=4,
+               ZSTD_lazy2=5,
+               ZSTD_btlazy2=6,
+               ZSTD_btopt=7,
+               ZSTD_btultra=8,
+               ZSTD_btultra2=9
+               /* note : new strategies _might_ be added in the future.
+                         Only the order (from fast to strong) is guaranteed */
+} ZSTD_strategy;
+
+
+typedef enum {
+
+    /* compression parameters
+     * Note: When compressing with a ZSTD_CDict these parameters are superseded
+     * by the parameters used to construct the ZSTD_CDict.
+     * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
+    ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table.
+                              * Note that exact compression parameters are dynamically determined,
+                              * depending on both compression level and srcSize (when known).
+                              * Default level is ZSTD_CLEVEL_DEFAULT==3.
+                              * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
+                              * Note 1 : it's possible to pass a negative compression level.
+                              * Note 2 : setting a level does not automatically set all other compression parameters
+                              *   to default. Setting this will however eventually dynamically impact the compression
+                              *   parameters which have not been manually set. The manually set
+                              *   ones will 'stick'. */
+    /* Advanced compression parameters :
+     * It's possible to pin down compression parameters to some specific values.
+     * In which case, these values are no longer dynamically selected by the compressor */
+    ZSTD_c_windowLog=101,    /* Maximum allowed back-reference distance, expressed as power of 2.
+                              * This will set a memory budget for streaming decompression,
+                              * with larger values requiring more memory
+                              * and typically compressing more.
+                              * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
+                              * Special: value 0 means "use default windowLog".
+                              * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
+                              *       requires explicitly allowing such size at streaming decompression stage. */
+    ZSTD_c_hashLog=102,      /* Size of the initial probe table, as a power of 2.
+                              * Resulting memory usage is (1 << (hashLog+2)).
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
+                              * Larger tables improve compression ratio of strategies <= dFast,
+                              * and improve speed of strategies > dFast.
+                              * Special: value 0 means "use default hashLog". */
+    ZSTD_c_chainLog=103,     /* Size of the multi-probe search table, as a power of 2.
+                              * Resulting memory usage is (1 << (chainLog+2)).
+                              * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
+                              * Larger tables result in better and slower compression.
+                              * This parameter is useless for "fast" strategy.
+                              * It's still useful when using "dfast" strategy,
+                              * in which case it defines a secondary probe table.
+                              * Special: value 0 means "use default chainLog". */
+    ZSTD_c_searchLog=104,    /* Number of search attempts, as a power of 2.
+                              * More attempts result in better and slower compression.
+                              * This parameter is useless for "fast" and "dFast" strategies.
+                              * Special: value 0 means "use default searchLog". */
+    ZSTD_c_minMatch=105,     /* Minimum size of searched matches.
+                              * Note that Zstandard can still find matches of smaller size,
+                              * it just tweaks its search algorithm to look for this size and larger.
+                              * Larger values increase compression and decompression speed, but decrease ratio.
+                              * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX.
+                              * Note that currently, for all strategies < btopt, effective minimum is 4.
+                              *                    , for all strategies > fast, effective maximum is 6.
+                              * Special: value 0 means "use default minMatchLength". */
+    ZSTD_c_targetLength=106, /* Impact of this field depends on strategy.
+                              * For strategies btopt, btultra & btultra2:
+                              *     Length of Match considered "good enough" to stop search.
+                              *     Larger values make compression stronger, and slower.
+                              * For strategy fast:
+                              *     Distance between match sampling.
+                              *     Larger values make compression faster, and weaker.
+                              * Special: value 0 means "use default targetLength". */
+    ZSTD_c_strategy=107,     /* See ZSTD_strategy enum definition.
+                              * The higher the value of selected strategy, the more complex it is,
+                              * resulting in stronger and slower compression.
+                              * Special: value 0 means "use default strategy". */
+
+    /* LDM mode parameters */
+    ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
+                                     * This parameter is designed to improve compression ratio
+                                     * for large inputs, by finding large matches at long distance.
+                                     * It increases memory usage and window size.
+                                     * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB
+                                     * except when expressly set to a different value.
+                                     * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and
+                                     * compression strategy >= ZSTD_btopt (== compression level 16+) */
+    ZSTD_c_ldmHashLog=161,   /* Size of the table for long distance matching, as a power of 2.
+                              * Larger values increase memory usage and compression ratio,
+                              * but decrease compression speed.
+                              * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX
+                              * default: windowlog - 7.
+                              * Special: value 0 means "automatically determine hashlog". */
+    ZSTD_c_ldmMinMatch=162,  /* Minimum match size for long distance matcher.
+                              * Larger/too small values usually decrease compression ratio.
+                              * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX.
+                              * Special: value 0 means "use default value" (default: 64). */
+    ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution.
+                              * Larger values improve collision resolution but decrease compression speed.
+                              * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX.
+                              * Special: value 0 means "use default value" (default: 3). */
+    ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table.
+                              * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN).
+                              * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage.
+                              * Larger values improve compression speed.
+                              * Deviating far from default value will likely result in a compression ratio decrease.
+                              * Special: value 0 means "automatically determine hashRateLog". */
+
+    /* frame parameters */
+    ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1)
+                              * Content size must be known at the beginning of compression.
+                              * This is automatically the case when using ZSTD_compress2(),
+                              * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
+    ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */
+    ZSTD_c_dictIDFlag=202,   /* When applicable, dictionary's ID is written into frame header (default:1) */
+
+    /* multi-threading parameters */
+    /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD).
+     * Otherwise, trying to set any other value than default (0) will be a no-op and return an error.
+     * In a situation where it's unknown if the linked library supports multi-threading or not,
+     * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property.
+     */
+    ZSTD_c_nbWorkers=400,    /* Select how many threads will be spawned to compress in parallel.
+                              * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() :
+                              * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller,
+                              * while compression is performed in parallel, within worker thread(s).
+                              * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end :
+                              *  in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call).
+                              * More workers improve speed, but also increase memory usage.
+                              * Default value is `0`, aka "single-threaded mode" : no worker is spawned,
+                              * compression is performed inside Caller's thread, and all invocations are blocking */
+    ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
+                              * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
+                              * 0 means default, which is dynamically determined based on compression parameters.
+                              * Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
+                              * The minimum size is automatically and transparently enforced. */
+    ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
+                              * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
+                              * It helps preserve compression ratio, while each job is compressed in parallel.
+                              * This value is enforced only when nbWorkers >= 1.
+                              * Larger values increase compression ratio, but decrease speed.
+                              * Possible values range from 0 to 9 :
+                              * - 0 means "default" : value will be determined by the library, depending on strategy
+                              * - 1 means "no overlap"
+                              * - 9 means "full overlap", using a full window size.
+                              * Each intermediate rank increases/decreases load size by a factor 2 :
+                              * 9: full window;  8: w/2;  7: w/4;  6: w/8;  5:w/16;  4: w/32;  3:w/64;  2:w/128;  1:no overlap;  0:default
+                              * default value varies between 6 and 9, depending on strategy */
+
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_c_rsyncable
+     * ZSTD_c_format
+     * ZSTD_c_forceMaxWindow
+     * ZSTD_c_forceAttachDict
+     * ZSTD_c_literalCompressionMode
+     * ZSTD_c_targetCBlockSize
+     * ZSTD_c_srcSizeHint
+     * ZSTD_c_enableDedicatedDictSearch
+     * ZSTD_c_stableInBuffer
+     * ZSTD_c_stableOutBuffer
+     * ZSTD_c_blockDelimiters
+     * ZSTD_c_validateSequences
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly;
+     *        also, the enums values themselves are unstable and can still change.
+     */
+     ZSTD_c_experimentalParam1=500,
+     ZSTD_c_experimentalParam2=10,
+     ZSTD_c_experimentalParam3=1000,
+     ZSTD_c_experimentalParam4=1001,
+     ZSTD_c_experimentalParam5=1002,
+     ZSTD_c_experimentalParam6=1003,
+     ZSTD_c_experimentalParam7=1004,
+     ZSTD_c_experimentalParam8=1005,
+     ZSTD_c_experimentalParam9=1006,
+     ZSTD_c_experimentalParam10=1007,
+     ZSTD_c_experimentalParam11=1008,
+     ZSTD_c_experimentalParam12=1009
+} ZSTD_cParameter;
+
+typedef struct {
+    size_t error;
+    int lowerBound;
+    int upperBound;
+} ZSTD_bounds;
+
+/*! ZSTD_cParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - lower and upper bounds, both inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam);
+
+/*! ZSTD_CCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is generally only possible during frame initialization (before starting compression).
+ *  Exception : when using multi-threading mode (nbWorkers >= 1),
+ *              the following parameters can be updated _during_ compression (within same frame):
+ *              => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
+ *              new parameters will be active for next job only (after a flush()).
+ * @return : an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value);
+
+/*! ZSTD_CCtx_setPledgedSrcSize() :
+ *  Total input data size to be compressed as a single frame.
+ *  Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag.
+ *  This value will also be controlled at end of frame, and trigger an error if not respected.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame.
+ *           In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN.
+ *           ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame.
+ *  Note 2 : pledgedSrcSize is only valid once, for the next frame.
+ *           It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN.
+ *  Note 3 : Whenever all input data is provided and consumed in a single round,
+ *           for example with ZSTD_compress2(),
+ *           or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end),
+ *           this value is automatically overridden by srcSize instead.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize);
+
+typedef enum {
+    ZSTD_reset_session_only = 1,
+    ZSTD_reset_parameters = 2,
+    ZSTD_reset_session_and_parameters = 3
+} ZSTD_ResetDirective;
+
+/*! ZSTD_CCtx_reset() :
+ *  There are 2 different things that can be reset, independently or jointly :
+ *  - The session : will stop compressing current frame, and make CCtx ready to start a new one.
+ *                  Useful after an error, or to interrupt any ongoing compression.
+ *                  Any internal data not yet flushed is cancelled.
+ *                  Compression parameters and dictionary remain unchanged.
+ *                  They will be used to compress next frame.
+ *                  Resetting session never fails.
+ *  - The parameters : changes all parameters back to "default".
+ *                  This removes any reference to any dictionary too.
+ *                  Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing)
+ *                  otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError())
+ *  - Both : similar to resetting the session, followed by resetting parameters.
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
+
+/*! ZSTD_compress2() :
+ *  Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
+ *  ZSTD_compress2() always starts a new frame.
+ *  Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - The function is always blocking, returns when compression is completed.
+ *  Hint : compression runs faster if `dstCapacity` >=  `ZSTD_compressBound(srcSize)`.
+ * @return : compressed size written into `dst` (<= `dstCapacity),
+ *           or an error code if it fails (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx,
+                                   void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+
+/* *************************************
+*  Advanced decompression API
+***************************************/
+
+/* The advanced API pushes parameters one by one into an existing DCtx context.
+ * Parameters are sticky, and remain valid for all following frames
+ * using the same DCtx context.
+ * It's possible to reset parameters to default values using ZSTD_DCtx_reset().
+ * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream().
+ *        Therefore, no new decompression function is necessary.
+ */
+
+typedef enum {
+
+    ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which
+                              * the streaming API will refuse to allocate memory buffer
+                              * in order to protect the host from unreasonable memory requirements.
+                              * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+                              * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT).
+                              * Special: value 0 means "use default maximum windowLog". */
+
+    /* note : additional experimental parameters are also available
+     * within the experimental section of the API.
+     * At the time of this writing, they include :
+     * ZSTD_d_format
+     * ZSTD_d_stableOutBuffer
+     * ZSTD_d_forceIgnoreChecksum
+     * ZSTD_d_refMultipleDDicts
+     * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
+     * note : never ever use experimentalParam? names directly
+     */
+     ZSTD_d_experimentalParam1=1000,
+     ZSTD_d_experimentalParam2=1001,
+     ZSTD_d_experimentalParam3=1002,
+     ZSTD_d_experimentalParam4=1003
+
+} ZSTD_dParameter;
+
+/*! ZSTD_dParam_getBounds() :
+ *  All parameters must belong to an interval with lower and upper bounds,
+ *  otherwise they will either trigger an error or be automatically clamped.
+ * @return : a structure, ZSTD_bounds, which contains
+ *         - an error status field, which must be tested using ZSTD_isError()
+ *         - both lower and upper bounds, inclusive
+ */
+ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam);
+
+/*! ZSTD_DCtx_setParameter() :
+ *  Set one compression parameter, selected by enum ZSTD_dParameter.
+ *  All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds().
+ *  Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter).
+ *  Setting a parameter is only possible during frame initialization (before starting decompression).
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value);
+
+/*! ZSTD_DCtx_reset() :
+ *  Return a DCtx to clean state.
+ *  Session and parameters can be reset jointly or separately.
+ *  Parameters can only be reset when no active frame is being decompressed.
+ * @return : 0, or an error code, which can be tested with ZSTD_isError()
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset);
+
+
+/* **************************
+*  Streaming
+****************************/
+
+typedef struct ZSTD_inBuffer_s {
+  const void* src;    /*< start of input buffer */
+  size_t size;        /*< size of input buffer */
+  size_t pos;         /*< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_inBuffer;
+
+typedef struct ZSTD_outBuffer_s {
+  void*  dst;         /*< start of output buffer */
+  size_t size;        /*< size of output buffer */
+  size_t pos;         /*< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */
+} ZSTD_outBuffer;
+
+
+
+/*-***********************************************************************
+*  Streaming compression - HowTo
+*
+*  A ZSTD_CStream object is required to track streaming operation.
+*  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
+*  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
+*  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
+*
+*  For parallel execution, use one separate ZSTD_CStream per thread.
+*
+*  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
+*
+*  Parameters are sticky : when starting a new compression on the same context,
+*  it will re-use the same sticky parameters as previous compression session.
+*  When in doubt, it's recommended to fully initialize the context before usage.
+*  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
+*  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
+*  set more specific parameters, the pledged source size, or load a dictionary.
+*
+*  Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to
+*  consume input stream. The function will automatically update both `pos`
+*  fields within `input` and `output`.
+*  Note that the function may not consume the entire input, for example, because
+*  the output buffer is already full, in which case `input.pos < input.size`.
+*  The caller must check if input has been entirely consumed.
+*  If not, the caller must make some room to receive more compressed data,
+*  and then present again remaining input data.
+*  note: ZSTD_e_continue is guaranteed to make some forward progress when called,
+*        but doesn't guarantee maximal forward progress. This is especially relevant
+*        when compressing with multiple threads. The call won't block if it can
+*        consume some input, but if it can't it will wait for some, but not all,
+*        output to be flushed.
+* @return : provides a minimum amount of data remaining to be flushed from internal buffers
+*           or an error code, which can be tested using ZSTD_isError().
+*
+*  At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
+*  using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated.
+*  Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0).
+*  In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the
+*  operation.
+*  note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if internal buffers are entirely flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+*  Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame.
+*  It will perform a flush and write frame epilogue.
+*  The epilogue is required for decoders to consider a frame completed.
+*  flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush.
+*  You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to
+*  start a new frame.
+*  note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will
+*        block until the flush is complete or the output buffer is full.
+*  @return : 0 if frame fully completed and fully flushed,
+*            >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
+*            or an error code, which can be tested using ZSTD_isError().
+*
+* *******************************************************************/
+
+typedef ZSTD_CCtx ZSTD_CStream;  /*< CCtx and CStream are now effectively same object (>= v1.3.0) */
+                                 /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
+/*===== ZSTD_CStream management functions =====*/
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
+ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);  /* accept NULL pointer */
+
+/*===== Streaming compression functions =====*/
+typedef enum {
+    ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */
+    ZSTD_e_flush=1,    /* flush any data provided so far,
+                        * it creates (at least) one new block, that can be decoded immediately on reception;
+                        * frame will continue: any future data can still reference previously compressed data, improving compression.
+                        * note : multithreaded compression will block to flush as much output as possible. */
+    ZSTD_e_end=2       /* flush any remaining data _and_ close current frame.
+                        * note that frame is only closed after compressed data is fully flushed (return value == 0).
+                        * After that point, any additional data starts a new frame.
+                        * note : each frame is independent (does not reference any content from previous frame).
+                        : note : multithreaded compression will block to flush as much output as possible. */
+} ZSTD_EndDirective;
+
+/*! ZSTD_compressStream2() :
+ *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
+ *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
+ *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
+ *  - output->pos must be <= dstCapacity, input->pos must be <= srcSize
+ *  - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit.
+ *  - endOp must be a valid directive
+ *  - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller.
+ *  - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available,
+ *                                                  and then immediately returns, just indicating that there is some data remaining to be flushed.
+ *                                                  The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte.
+ *  - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking.
+ *  - @return provides a minimum amount of data remaining to be flushed from internal buffers
+ *            or an error code, which can be tested using ZSTD_isError().
+ *            if @return != 0, flush is not fully completed, there is still some data left within internal buffers.
+ *            This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers.
+ *            For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed.
+ *  - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0),
+ *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
+ *            Before starting a new compression job, or changing compression parameters,
+ *            it is required to fully flush internal buffers.
+ */
+ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+                                         ZSTD_outBuffer* output,
+                                         ZSTD_inBuffer* input,
+                                         ZSTD_EndDirective endOp);
+
+
+/* These buffer sizes are softly recommended.
+ * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output.
+ * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(),
+ * reducing the amount of memory shuffling and buffering, resulting in minor performance savings.
+ *
+ * However, note that these recommendations are from the perspective of a C caller program.
+ * If the streaming interface is invoked from some other language,
+ * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo,
+ * a major performance rule is to reduce crossing such interface to an absolute minimum.
+ * It's not rare that performance ends being spent more into the interface, rather than compression itself.
+ * In which cases, prefer using large buffers, as large as practical,
+ * for both input and output, to reduce the nb of roundtrips.
+ */
+ZSTDLIB_API size_t ZSTD_CStreamInSize(void);    /*< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /*< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */
+
+
+/* *****************************************************************************
+ * This following is a legacy streaming API.
+ * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
+ * It is redundant, but remains fully supported.
+ * Advanced parameters and dictionary compression can only be used through the
+ * new API.
+ ******************************************************************************/
+
+/*!
+ * Equivalent to:
+ *
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ */
+ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel);
+/*!
+ * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue).
+ * NOTE: The return value is different. ZSTD_compressStream() returns a hint for
+ * the next read size (if non-zero and not an error). ZSTD_compressStream2()
+ * returns the minimum nb of bytes left to flush (if non-zero and not an error).
+ */
+ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */
+ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */
+ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
+
+
+/*-***************************************************************************
+*  Streaming decompression - HowTo
+*
+*  A ZSTD_DStream object is required to track streaming operations.
+*  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
+*  ZSTD_DStream objects can be re-used multiple times.
+*
+*  Use ZSTD_initDStream() to start a new decompression operation.
+* @return : recommended first input size
+*  Alternatively, use advanced API to set specific properties.
+*
+*  Use ZSTD_decompressStream() repetitively to consume your input.
+*  The function will update both `pos` fields.
+*  If `input.pos < input.size`, some input has not been consumed.
+*  It's up to the caller to present again remaining data.
+*  The function tries to flush all data decoded immediately, respecting output buffer size.
+*  If `output.pos < output.size`, decoder has flushed everything it could.
+*  But if `output.pos == output.size`, there might be some data left within internal buffers.,
+*  In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer.
+*  Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX.
+* @return : 0 when a frame is completely decoded and fully flushed,
+*        or an error code, which can be tested using ZSTD_isError(),
+*        or any other value > 0, which means there is still some decoding or flushing to do to complete current frame :
+*                                the return value is a suggested next input size (just a hint for better latency)
+*                                that will never request more than the remaining frame size.
+* *******************************************************************************/
+
+typedef ZSTD_DCtx ZSTD_DStream;  /*< DCtx and DStream are now effectively same object (>= v1.3.0) */
+                                 /* For compatibility with versions <= v1.2.0, prefer differentiating them. */
+/*===== ZSTD_DStream management functions =====*/
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void);
+ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds);  /* accept NULL pointer */
+
+/*===== Streaming decompression functions =====*/
+
+/* This function is redundant with the advanced API and equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, NULL);
+ */
+ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
+
+ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
+
+ZSTDLIB_API size_t ZSTD_DStreamInSize(void);    /*!< recommended size for input buffer */
+ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */
+
+
+/* ************************
+*  Simple dictionary API
+***************************/
+/*! ZSTD_compress_usingDict() :
+ *  Compression at an explicit compression level using a Dictionary.
+ *  A dictionary can be any arbitrary data segment (also called a prefix),
+ *  or a buffer with specified information (see dictBuilder/zdict.h).
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx,
+                                           void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     const void* dict,size_t dictSize,
+                                           int compressionLevel);
+
+/*! ZSTD_decompress_usingDict() :
+ *  Decompression using a known Dictionary.
+ *  Dictionary must be identical to the one used during compression.
+ *  Note : This function loads the dictionary, resulting in significant startup delay.
+ *         It's intended for a dictionary used only once.
+ *  Note : When `dict == NULL || dictSize < 8` no dictionary is used. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                             void* dst, size_t dstCapacity,
+                                       const void* src, size_t srcSize,
+                                       const void* dict,size_t dictSize);
+
+
+/* *********************************
+ *  Bulk processing dictionary API
+ **********************************/
+typedef struct ZSTD_CDict_s ZSTD_CDict;
+
+/*! ZSTD_createCDict() :
+ *  When compressing multiple messages or blocks using the same dictionary,
+ *  it's recommended to digest the dictionary only once, since it's a costly operation.
+ *  ZSTD_createCDict() will create a state from digesting a dictionary.
+ *  The resulting state can be used for future compression operations with very limited startup cost.
+ *  ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
+ * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ *  Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+ *  Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+ *      in which case the only thing that it transports is the @compressionLevel.
+ *      This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+ *      expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
+                                         int compressionLevel);
+
+/*! ZSTD_freeCDict() :
+ *  Function frees memory allocated by ZSTD_createCDict().
+ *  If a NULL pointer is passed, no operation is performed. */
+ZSTDLIB_API size_t      ZSTD_freeCDict(ZSTD_CDict* CDict);
+
+/*! ZSTD_compress_usingCDict() :
+ *  Compression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times.
+ *  Note : compression level is _decided at dictionary creation time_,
+ *     and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                            void* dst, size_t dstCapacity,
+                                      const void* src, size_t srcSize,
+                                      const ZSTD_CDict* cdict);
+
+
+typedef struct ZSTD_DDict_s ZSTD_DDict;
+
+/*! ZSTD_createDDict() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  dictBuffer can be released after DDict creation, as its content is copied inside DDict. */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_freeDDict() :
+ *  Function frees memory allocated with ZSTD_createDDict()
+ *  If a NULL pointer is passed, no operation is performed. */
+ZSTDLIB_API size_t      ZSTD_freeDDict(ZSTD_DDict* ddict);
+
+/*! ZSTD_decompress_usingDDict() :
+ *  Decompression using a digested Dictionary.
+ *  Recommended when same dictionary is used multiple times. */
+ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_DDict* ddict);
+
+
+/* ******************************
+ *  Dictionary helper functions
+ *******************************/
+
+/*! ZSTD_getDictID_fromDict() :
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
+
+/*! ZSTD_getDictID_fromDDict() :
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
+
+/*! ZSTD_getDictID_fromFrame() :
+ *  Provides the dictID required to decompressed the frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary to be decoded (most common case).
+ *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
+
+
+/* *****************************************************************************
+ * Advanced dictionary and prefix API
+ *
+ * This API allows dictionaries to be used with ZSTD_compress2(),
+ * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
+ * only reset with the context is reset with ZSTD_reset_parameters or
+ * ZSTD_reset_session_and_parameters. Prefixes are single-use.
+ ******************************************************************************/
+
+
+/*! ZSTD_CCtx_loadDictionary() :
+ *  Create an internal CDict from `dict` buffer.
+ *  Decompression will have to use same dictionary.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary,
+ *           meaning "return to no-dictionary mode".
+ *  Note 1 : Dictionary is sticky, it will be used for all future compressed frames.
+ *           To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters).
+ *  Note 2 : Loading a dictionary involves building tables.
+ *           It's also a CPU consuming operation, with non-negligible impact on latency.
+ *           Tables are dependent on compression parameters, and for this reason,
+ *           compression parameters can no longer be changed after loading a dictionary.
+ *  Note 3 :`dict` content will be copied internally.
+ *           Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead.
+ *           In such a case, dictionary buffer must outlive its users.
+ *  Note 4 : Use ZSTD_CCtx_loadDictionary_advanced()
+ *           to precisely select how dictionary content must be interpreted. */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_refCDict() :
+ *  Reference a prepared dictionary, to be used for all next compressed frames.
+ *  Note that compression parameters are enforced from within CDict,
+ *  and supersede any compression parameter previously set within CCtx.
+ *  The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs.
+ *  The ignored parameters will be used again if the CCtx is returned to no-dictionary mode.
+ *  The dictionary will remain valid for future compressed frames using same CCtx.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Referencing a NULL CDict means "return to no-dictionary mode".
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
+ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
+
+/*! ZSTD_CCtx_refPrefix() :
+ *  Reference a prefix (single-usage dictionary) for next compressed frame.
+ *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
+ *  Decompression will need same prefix to properly regenerate data.
+ *  Compressing with a prefix is similar in outcome as performing a diff and compressing it,
+ *  but performs much faster, especially during decompression (compression speed is tunable with compression level).
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
+ *  Note 1 : Prefix buffer is referenced. It **must** outlive compression.
+ *           Its content must remain unmodified during compression.
+ *  Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
+ *           ensure that the window size is large enough to contain the entire source.
+ *           See ZSTD_c_windowLog.
+ *  Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
+ *           It's a CPU consuming operation, with non-negligible impact on latency.
+ *           If there is a need to use the same prefix multiple times, consider loadDictionary instead.
+ *  Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
+ *           Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
+                                 const void* prefix, size_t prefixSize);
+
+/*! ZSTD_DCtx_loadDictionary() :
+ *  Create an internal DDict from dict buffer,
+ *  to be used to decompress next frames.
+ *  The dictionary remains valid for all future frames, until explicitly invalidated.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary,
+ *            meaning "return to no-dictionary mode".
+ *  Note 1 : Loading a dictionary involves building tables,
+ *           which has a non-negligible impact on CPU usage and latency.
+ *           It's recommended to "load once, use many times", to amortize the cost
+ *  Note 2 :`dict` content will be copied internally, so `dict` can be released after loading.
+ *           Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead.
+ *  Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of
+ *           how dictionary content is loaded and interpreted.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_DCtx_refDDict() :
+ *  Reference a prepared dictionary, to be used to decompress next frames.
+ *  The dictionary remains active for decompression of future frames using same DCtx.
+ *
+ *  If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function
+ *  will store the DDict references in a table, and the DDict used for decompression
+ *  will be determined at decompression time, as per the dict ID in the frame.
+ *  The memory for the table is allocated on the first call to refDDict, and can be
+ *  freed with ZSTD_freeDCtx().
+ *
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Currently, only one dictionary can be managed.
+ *           Referencing a new dictionary effectively "discards" any previous one.
+ *  Special: referencing a NULL DDict means "return to no-dictionary mode".
+ *  Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+/*! ZSTD_DCtx_refPrefix() :
+ *  Reference a prefix (single-usage dictionary) to decompress next frame.
+ *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
+ *  and must use the same prefix as the one used during compression.
+ *  Prefix is **only used once**. Reference is discarded at end of frame.
+ *  End of frame is reached when ZSTD_decompressStream() returns 0.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ *  Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
+ *  Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
+ *           Prefix buffer must remain unmodified up to the end of frame,
+ *           reached when ZSTD_decompressStream() returns 0.
+ *  Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
+ *           Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
+ *  Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
+ *           A full dictionary is more costly, as it requires building tables.
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
+                                 const void* prefix, size_t prefixSize);
+
+/* ===   Memory management   === */
+
+/*! ZSTD_sizeof_*() :
+ *  These functions give the _current_ memory usage of selected object.
+ *  Note that object memory usage can evolve (increase or decrease) over time. */
+ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs);
+ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds);
+ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict);
+ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
+
+#endif  /* ZSTD_H_235446 */
+
+
+/* **************************************************************************************
+ *   ADVANCED AND EXPERIMENTAL FUNCTIONS
+ ****************************************************************************************
+ * The definitions in the following section are considered experimental.
+ * They are provided for advanced scenarios.
+ * They should never be used with a dynamic library, as prototypes may change in the future.
+ * Use them only in association with static linking.
+ * ***************************************************************************************/
+
+#if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
+#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
+
+/* **************************************************************************************
+ *   experimental API (static linking only)
+ ****************************************************************************************
+ * The following symbols and constants
+ * are not planned to join "stable API" status in the near future.
+ * They can still change in future versions.
+ * Some of them are planned to remain in the static_only section indefinitely.
+ * Some of them might be removed in the future (especially when redundant with existing stable functions)
+ * ***************************************************************************************/
+
+#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1)   /* minimum input size required to query frame header size */
+#define ZSTD_FRAMEHEADERSIZE_MIN(format)    ((format) == ZSTD_f_zstd1 ? 6 : 2)
+#define ZSTD_FRAMEHEADERSIZE_MAX   18   /* can be useful for static allocation */
+#define ZSTD_SKIPPABLEHEADERSIZE    8
+
+/* compression parameter bounds */
+#define ZSTD_WINDOWLOG_MAX_32    30
+#define ZSTD_WINDOWLOG_MAX_64    31
+#define ZSTD_WINDOWLOG_MAX     ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64))
+#define ZSTD_WINDOWLOG_MIN       10
+#define ZSTD_HASHLOG_MAX       ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30)
+#define ZSTD_HASHLOG_MIN          6
+#define ZSTD_CHAINLOG_MAX_32     29
+#define ZSTD_CHAINLOG_MAX_64     30
+#define ZSTD_CHAINLOG_MAX      ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64))
+#define ZSTD_CHAINLOG_MIN        ZSTD_HASHLOG_MIN
+#define ZSTD_SEARCHLOG_MAX      (ZSTD_WINDOWLOG_MAX-1)
+#define ZSTD_SEARCHLOG_MIN        1
+#define ZSTD_MINMATCH_MAX         7   /* only for ZSTD_fast, other strategies are limited to 6 */
+#define ZSTD_MINMATCH_MIN         3   /* only for ZSTD_btopt+, faster strategies are limited to 4 */
+#define ZSTD_TARGETLENGTH_MAX    ZSTD_BLOCKSIZE_MAX
+#define ZSTD_TARGETLENGTH_MIN     0   /* note : comparing this constant to an unsigned results in a tautological test */
+#define ZSTD_STRATEGY_MIN        ZSTD_fast
+#define ZSTD_STRATEGY_MAX        ZSTD_btultra2
+
+
+#define ZSTD_OVERLAPLOG_MIN       0
+#define ZSTD_OVERLAPLOG_MAX       9
+
+#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27   /* by default, the streaming decoder will refuse any frame
+                                           * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size,
+                                           * to preserve host's memory from unreasonable requirements.
+                                           * This limit can be overridden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,).
+                                           * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */
+
+
+/* LDM parameter bounds */
+#define ZSTD_LDM_HASHLOG_MIN      ZSTD_HASHLOG_MIN
+#define ZSTD_LDM_HASHLOG_MAX      ZSTD_HASHLOG_MAX
+#define ZSTD_LDM_MINMATCH_MIN        4
+#define ZSTD_LDM_MINMATCH_MAX     4096
+#define ZSTD_LDM_BUCKETSIZELOG_MIN   1
+#define ZSTD_LDM_BUCKETSIZELOG_MAX   8
+#define ZSTD_LDM_HASHRATELOG_MIN     0
+#define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
+
+/* Advanced parameter bounds */
+#define ZSTD_TARGETCBLOCKSIZE_MIN   64
+#define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
+#define ZSTD_SRCSIZEHINT_MIN        0
+#define ZSTD_SRCSIZEHINT_MAX        INT_MAX
+
+/* internal */
+#define ZSTD_HASHLOG3_MAX           17
+
+
+/* ---  Advanced types  --- */
+
+typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params;
+
+typedef struct {
+    unsigned int offset;      /* The offset of the match. (NOT the same as the offset code)
+                               * If offset == 0 and matchLength == 0, this sequence represents the last
+                               * literals in the block of litLength size.
+                               */
+
+    unsigned int litLength;   /* Literal length of the sequence. */
+    unsigned int matchLength; /* Match length of the sequence. */
+
+                              /* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0.
+                               * In this case, we will treat the sequence as a marker for a block boundary.
+                               */
+
+    unsigned int rep;         /* Represents which repeat offset is represented by the field 'offset'.
+                               * Ranges from [0, 3].
+                               *
+                               * Repeat offsets are essentially previous offsets from previous sequences sorted in
+                               * recency order. For more detail, see doc/zstd_compression_format.md
+                               *
+                               * If rep == 0, then 'offset' does not contain a repeat offset.
+                               * If rep > 0:
+                               *  If litLength != 0:
+                               *      rep == 1 --> offset == repeat_offset_1
+                               *      rep == 2 --> offset == repeat_offset_2
+                               *      rep == 3 --> offset == repeat_offset_3
+                               *  If litLength == 0:
+                               *      rep == 1 --> offset == repeat_offset_2
+                               *      rep == 2 --> offset == repeat_offset_3
+                               *      rep == 3 --> offset == repeat_offset_1 - 1
+                               *
+                               * Note: This field is optional. ZSTD_generateSequences() will calculate the value of
+                               * 'rep', but repeat offsets do not necessarily need to be calculated from an external
+                               * sequence provider's perspective. For example, ZSTD_compressSequences() does not
+                               * use this 'rep' field at all (as of now).
+                               */
+} ZSTD_Sequence;
+
+typedef struct {
+    unsigned windowLog;       /*< largest match distance : larger == more compression, more memory needed during decompression */
+    unsigned chainLog;        /*< fully searched segment : larger == more compression, slower, more memory (useless for fast) */
+    unsigned hashLog;         /*< dispatch table : larger == faster, more memory */
+    unsigned searchLog;       /*< nb of searches : larger == more compression, slower */
+    unsigned minMatch;        /*< match length searched : larger == faster decompression, sometimes less compression */
+    unsigned targetLength;    /*< acceptable match size for optimal parser (only) : larger == more compression, slower */
+    ZSTD_strategy strategy;   /*< see ZSTD_strategy definition above */
+} ZSTD_compressionParameters;
+
+typedef struct {
+    int contentSizeFlag; /*< 1: content size will be in frame header (when known) */
+    int checksumFlag;    /*< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */
+    int noDictIDFlag;    /*< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */
+} ZSTD_frameParameters;
+
+typedef struct {
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
+} ZSTD_parameters;
+
+typedef enum {
+    ZSTD_dct_auto = 0,       /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */
+    ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */
+    ZSTD_dct_fullDict = 2    /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */
+} ZSTD_dictContentType_e;
+
+typedef enum {
+    ZSTD_dlm_byCopy = 0,  /*< Copy dictionary content internally */
+    ZSTD_dlm_byRef = 1    /*< Reference dictionary content -- the dictionary buffer must outlive its users. */
+} ZSTD_dictLoadMethod_e;
+
+typedef enum {
+    ZSTD_f_zstd1 = 0,           /* zstd frame format, specified in zstd_compression_format.md (default) */
+    ZSTD_f_zstd1_magicless = 1  /* Variant of zstd frame format, without initial 4-bytes magic number.
+                                 * Useful to save 4 bytes per generated frame.
+                                 * Decoder cannot recognise automatically this format, requiring this instruction. */
+} ZSTD_format_e;
+
+typedef enum {
+    /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */
+    ZSTD_d_validateChecksum = 0,
+    ZSTD_d_ignoreChecksum = 1
+} ZSTD_forceIgnoreChecksum_e;
+
+typedef enum {
+    /* Note: this enum controls ZSTD_d_refMultipleDDicts */
+    ZSTD_rmd_refSingleDDict = 0,
+    ZSTD_rmd_refMultipleDDicts = 1
+} ZSTD_refMultipleDDicts_e;
+
+typedef enum {
+    /* Note: this enum and the behavior it controls are effectively internal
+     * implementation details of the compressor. They are expected to continue
+     * to evolve and should be considered only in the context of extremely
+     * advanced performance tuning.
+     *
+     * Zstd currently supports the use of a CDict in three ways:
+     *
+     * - The contents of the CDict can be copied into the working context. This
+     *   means that the compression can search both the dictionary and input
+     *   while operating on a single set of internal tables. This makes
+     *   the compression faster per-byte of input. However, the initial copy of
+     *   the CDict's tables incurs a fixed cost at the beginning of the
+     *   compression. For small compressions (< 8 KB), that copy can dominate
+     *   the cost of the compression.
+     *
+     * - The CDict's tables can be used in-place. In this model, compression is
+     *   slower per input byte, because the compressor has to search two sets of
+     *   tables. However, this model incurs no start-up cost (as long as the
+     *   working context's tables can be reused). For small inputs, this can be
+     *   faster than copying the CDict's tables.
+     *
+     * - The CDict's tables are not used at all, and instead we use the working
+     *   context alone to reload the dictionary and use params based on the source
+     *   size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
+     *   This method is effective when the dictionary sizes are very small relative
+     *   to the input size, and the input size is fairly large to begin with.
+     *
+     * Zstd has a simple internal heuristic that selects which strategy to use
+     * at the beginning of a compression. However, if experimentation shows that
+     * Zstd is making poor choices, it is possible to override that choice with
+     * this enum.
+     */
+    ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
+    ZSTD_dictForceAttach   = 1, /* Never copy the dictionary. */
+    ZSTD_dictForceCopy     = 2, /* Always copy the dictionary. */
+    ZSTD_dictForceLoad     = 3  /* Always reload the dictionary */
+} ZSTD_dictAttachPref_e;
+
+typedef enum {
+  ZSTD_lcm_auto = 0,          /*< Automatically determine the compression mode based on the compression level.
+                               *   Negative compression levels will be uncompressed, and positive compression
+                               *   levels will be compressed. */
+  ZSTD_lcm_huffman = 1,       /*< Always attempt Huffman compression. Uncompressed literals will still be
+                               *   emitted if Huffman compression is not profitable. */
+  ZSTD_lcm_uncompressed = 2   /*< Always emit uncompressed literals. */
+} ZSTD_literalCompressionMode_e;
+
+
+/* *************************************
+*  Frame size functions
+***************************************/
+
+/*! ZSTD_findDecompressedSize() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - decompressed size of all data in all successive frames
+ *            - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *   note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode.
+ *            When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 2 : decompressed size is always present when compression is done with ZSTD_compress()
+ *   note 3 : decompressed size can be very large (64-bits value),
+ *            potentially larger than what local system can handle as a single memory segment.
+ *            In which case, it's necessary to use streaming mode to decompress data.
+ *   note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified.
+ *            Always ensure result fits within application's authorized limits.
+ *            Each application can set its own limits.
+ *   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
+ *            read each contained frame header.  This is fast as most of the data is skipped,
+ *            however it does mean that all frame data must be present and valid. */
+ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
+
+/*! ZSTD_decompressBound() :
+ *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
+ *  `srcSize` must be the _exact_ size of this series
+ *       (i.e. there should be a frame boundary at `src + srcSize`)
+ *  @return : - upper-bound for the decompressed size of all data in all successive frames
+ *            - if an error occurred: ZSTD_CONTENTSIZE_ERROR
+ *
+ *  note 1  : an error can occur if `src` contains an invalid or incorrectly formatted frame.
+ *  note 2  : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`.
+ *            in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value.
+ *  note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
+ *              upper-bound = # blocks * min(128 KB, Window_Size)
+ */
+ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
+
+/*! ZSTD_frameHeaderSize() :
+ *  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
+ * @return : size of the Frame Header,
+ *           or an error code (if srcSize is too small) */
+ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+
+typedef enum {
+  ZSTD_sf_noBlockDelimiters = 0,         /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
+  ZSTD_sf_explicitBlockDelimiters = 1    /* Representation of ZSTD_Sequence contains explicit block delimiters */
+} ZSTD_sequenceFormat_e;
+
+/*! ZSTD_generateSequences() :
+ * Generate sequences using ZSTD_compress2, given a source buffer.
+ *
+ * Each block will end with a dummy sequence
+ * with offset == 0, matchLength == 0, and litLength == length of last literals.
+ * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
+ * simply acts as a block delimiter.
+ *
+ * zc can be used to insert custom compression params.
+ * This function invokes ZSTD_compress2
+ *
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
+ * @return : number of sequences generated
+ */
+
+ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+                                          size_t outSeqsSize, const void* src, size_t srcSize);
+
+/*! ZSTD_mergeBlockDelimiters() :
+ * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
+ * by merging them into into the literals of the next sequence.
+ *
+ * As such, the final generated result has no explicit representation of block boundaries,
+ * and the final last literals segment is not represented in the sequences.
+ *
+ * The output of this function can be fed into ZSTD_compressSequences() with CCtx
+ * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
+ * @return : number of sequences left after merging
+ */
+ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
+
+/*! ZSTD_compressSequences() :
+ * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst.
+ * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.)
+ * The entire source is compressed into a single frame.
+ *
+ * The compression behavior changes based on cctx params. In particular:
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on
+ *    the block size derived from the cctx, and sequences may be split. This is the default setting.
+ *
+ *    If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain
+ *    block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided.
+ *
+ *    If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined
+ *    behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for
+ *    specifics regarding offset/matchlength requirements) then the function will bail out and return an error.
+ *
+ *    In addition to the two adjustable experimental params, there are other important cctx params.
+ *    - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN.
+ *    - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression.
+ *    - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset
+ *      is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md
+ *
+ * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused.
+ * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly,
+ *         and cannot emit an RLE block that disagrees with the repcode history
+ * @return : final compressed size or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
+                                  const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                                  const void* src, size_t srcSize);
+
+
+/*! ZSTD_writeSkippableFrame() :
+ * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer.
+ *
+ * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number,
+ * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15.
+ * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so
+ * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant.
+ *
+ * Returns an error if destination buffer is not large enough, if the source size is not representable
+ * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid).
+ *
+ * @return : number of bytes written or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
+                                            const void* src, size_t srcSize, unsigned magicVariant);
+
+
+/* *************************************
+*  Memory management
+***************************************/
+
+/*! ZSTD_estimate*() :
+ *  These functions make it possible to estimate memory usage
+ *  of a future {D,C}Ctx, before its creation.
+ *
+ *  ZSTD_estimateCCtxSize() will provide a memory budget large enough
+ *  for any compression level up to selected one.
+ *  Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
+ *         does not include space for a window buffer.
+ *         Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
+ *  The estimate will assume the input may be arbitrarily large,
+ *  which is the worst case.
+ *
+ *  When srcSize can be bound by a known and rather "small" value,
+ *  this fact can be used to provide a tighter estimation
+ *  because the CCtx compression context will need less memory.
+ *  This tighter estimation can be provided by more advanced functions
+ *  ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
+ *  and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
+ *  Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
+ *
+ *  Note 2 : only single-threaded compression is supported.
+ *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+ */
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
+
+/*! ZSTD_estimateCStreamSize() :
+ *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
+ *  It will also consider src size to be arbitrarily "large", which is worst case.
+ *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
+ *  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
+ *  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
+ *  Note : CStream size estimation is only correct for single-threaded compression.
+ *  ZSTD_DStream memory budget depends on window Size.
+ *  This information can be passed manually, using ZSTD_estimateDStreamSize,
+ *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
+ *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
+ *         an internal ?Dict will be created, which additional size is not estimated here.
+ *         In this case, get total size by adding ZSTD_estimate?DictSize */
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
+ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
+
+/*! ZSTD_estimate?DictSize() :
+ *  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
+ *  ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
+ *  Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
+ */
+ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
+ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
+
+/*! ZSTD_initStatic*() :
+ *  Initialize an object using a pre-allocated fixed-size buffer.
+ *  workspace: The memory area to emplace the object into.
+ *             Provided pointer *must be 8-bytes aligned*.
+ *             Buffer must outlive object.
+ *  workspaceSize: Use ZSTD_estimate*Size() to determine
+ *                 how large workspace must be to support target scenario.
+ * @return : pointer to object (same address as workspace, just different type),
+ *           or NULL if error (size too small, incorrect alignment, etc.)
+ *  Note : zstd will never resize nor malloc() when using a static buffer.
+ *         If the object requires more memory than available,
+ *         zstd will just error out (typically ZSTD_error_memory_allocation).
+ *  Note 2 : there is no corresponding "free" function.
+ *           Since workspace is allocated externally, it must be freed externally too.
+ *  Note 3 : cParams : use ZSTD_getCParams() to convert a compression level
+ *           into its associated cParams.
+ *  Limitation 1 : currently not compatible with internal dictionary creation, triggered by
+ *                 ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict().
+ *  Limitation 2 : static cctx currently not compatible with multi-threading.
+ *  Limitation 3 : static dctx is incompatible with legacy support.
+ */
+ZSTDLIB_API ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    /*< same as ZSTD_initStaticCCtx() */
+
+ZSTDLIB_API ZSTD_DCtx*    ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /*< same as ZSTD_initStaticDCtx() */
+
+ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict(
+                                        void* workspace, size_t workspaceSize,
+                                        const void* dict, size_t dictSize,
+                                        ZSTD_dictLoadMethod_e dictLoadMethod,
+                                        ZSTD_dictContentType_e dictContentType,
+                                        ZSTD_compressionParameters cParams);
+
+ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict(
+                                        void* workspace, size_t workspaceSize,
+                                        const void* dict, size_t dictSize,
+                                        ZSTD_dictLoadMethod_e dictLoadMethod,
+                                        ZSTD_dictContentType_e dictContentType);
+
+
+/*! Custom memory allocation :
+ *  These prototypes make it possible to pass your own allocation/free functions.
+ *  ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below.
+ *  All allocation/free operations will be completed using these custom variants instead of regular <stdlib.h> ones.
+ */
+typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size);
+typedef void  (*ZSTD_freeFunction) (void* opaque, void* address);
+typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem;
+static
+__attribute__((__unused__))
+ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /*< this constant defers to stdlib's functions */
+
+ZSTDLIB_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DCtx*    ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
+
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
+                                                  ZSTD_dictLoadMethod_e dictLoadMethod,
+                                                  ZSTD_dictContentType_e dictContentType,
+                                                  ZSTD_compressionParameters cParams,
+                                                  ZSTD_customMem customMem);
+
+/* ! Thread pool :
+ * These prototypes make it possible to share a thread pool among multiple compression contexts.
+ * This can limit resources for applications with multiple threads where each one uses
+ * a threaded compression mode (via ZSTD_c_nbWorkers parameter).
+ * ZSTD_createThreadPool creates a new thread pool with a given number of threads.
+ * Note that the lifetime of such pool must exist while being used.
+ * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
+ * to use an internal thread pool).
+ * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer.
+ */
+typedef struct POOL_ctx_s ZSTD_threadPool;
+ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
+ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);  /* accept NULL pointer */
+ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
+
+
+/*
+ * This API is temporary and is expected to change or disappear in the future!
+ */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
+    const void* dict, size_t dictSize,
+    ZSTD_dictLoadMethod_e dictLoadMethod,
+    ZSTD_dictContentType_e dictContentType,
+    const ZSTD_CCtx_params* cctxParams,
+    ZSTD_customMem customMem);
+
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(
+    const void* dict, size_t dictSize,
+    ZSTD_dictLoadMethod_e dictLoadMethod,
+    ZSTD_dictContentType_e dictContentType,
+    ZSTD_customMem customMem);
+
+
+/* *************************************
+*  Advanced compression functions
+***************************************/
+
+/*! ZSTD_createCDict_byReference() :
+ *  Create a digested dictionary for compression
+ *  Dictionary content is just referenced, not duplicated.
+ *  As a consequence, `dictBuffer` **must** outlive CDict,
+ *  and its content must remain unmodified throughout the lifetime of CDict.
+ *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
+
+/*! ZSTD_getDictID_fromCDict() :
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
+
+/*! ZSTD_getCParams() :
+ * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
+ * `estimatedSrcSize` value is optional, select 0 if not known */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*! ZSTD_getParams() :
+ *  same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
+ *  All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */
+ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+
+/*! ZSTD_checkCParams() :
+ *  Ensure param values remain within authorized range.
+ * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */
+ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
+
+/*! ZSTD_adjustCParams() :
+ *  optimize params for a given `srcSize` and `dictSize`.
+ * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN.
+ * `dictSize` must be `0` when there is no dictionary.
+ *  cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
+ *  This function never fails (wide contract) */
+ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
+
+/*! ZSTD_compress_advanced() :
+ *  Note : this function is now DEPRECATED.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
+ *  This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */
+ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
+                                          void* dst, size_t dstCapacity,
+                                    const void* src, size_t srcSize,
+                                    const void* dict,size_t dictSize,
+                                          ZSTD_parameters params);
+
+/*! ZSTD_compress_usingCDict_advanced() :
+ *  Note : this function is now REDUNDANT.
+ *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
+ *  This prototype will be marked as deprecated and generate compilation warning in some future version */
+ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                              void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize,
+                                        const ZSTD_CDict* cdict,
+                                              ZSTD_frameParameters fParams);
+
+
+/*! ZSTD_CCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx.
+ *  It saves some memory, but also requires that `dict` outlives its usage within `cctx` */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_CCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_CCtx_loadDictionary(), but gives finer control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_CCtx_refPrefix_advanced() :
+ *  Same as ZSTD_CCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/* ===   experimental parameters   === */
+/* these parameters can be used with ZSTD_setParameter()
+ * they are not guaranteed to remain supported in the future */
+
+ /* Enables rsyncable mode,
+  * which makes compressed files more rsync friendly
+  * by adding periodic synchronization points to the compressed data.
+  * The target average block size is ZSTD_c_jobSize / 2.
+  * It's possible to modify the job size to increase or decrease
+  * the granularity of the synchronization point.
+  * Once the jobSize is smaller than the window size,
+  * it will result in compression ratio degradation.
+  * NOTE 1: rsyncable mode only works when multithreading is enabled.
+  * NOTE 2: rsyncable performs poorly in combination with long range mode,
+  * since it will decrease the effectiveness of synchronization points,
+  * though mileage may vary.
+  * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s.
+  * If the selected compression level is already running significantly slower,
+  * the overall speed won't be significantly impacted.
+  */
+ #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1
+
+/* Select a compression format.
+ * The value must be of type ZSTD_format_e.
+ * See ZSTD_format_e enum definition for details */
+#define ZSTD_c_format ZSTD_c_experimentalParam2
+
+/* Force back-reference distances to remain < windowSize,
+ * even when referencing into Dictionary content (default:0) */
+#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3
+
+/* Controls whether the contents of a CDict
+ * are used in place, or copied into the working context.
+ * Accepts values from the ZSTD_dictAttachPref_e enum.
+ * See the comments on that enum for an explanation of the feature. */
+#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4
+
+/* Controls how the literals are compressed (default is auto).
+ * The value must be of type ZSTD_literalCompressionMode_e.
+ * See ZSTD_literalCompressionMode_t enum definition for details.
+ */
+#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
+
+/* Tries to fit compressed block size to be around targetCBlockSize.
+ * No target when targetCBlockSize == 0.
+ * There is no guarantee on compressed block size (default:0) */
+#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
+
+/* User's best guess of source size.
+ * Hint is not valid when srcSizeHint == 0.
+ * There is no guarantee that hint is close to actual source size,
+ * but compression ratio may regress significantly if guess considerably underestimates */
+#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7
+
+/* Controls whether the new and experimental "dedicated dictionary search
+ * structure" can be used. This feature is still rough around the edges, be
+ * prepared for surprising behavior!
+ *
+ * How to use it:
+ *
+ * When using a CDict, whether to use this feature or not is controlled at
+ * CDict creation, and it must be set in a CCtxParams set passed into that
+ * construction (via ZSTD_createCDict_advanced2()). A compression will then
+ * use the feature or not based on how the CDict was constructed; the value of
+ * this param, set in the CCtx, will have no effect.
+ *
+ * However, when a dictionary buffer is passed into a CCtx, such as via
+ * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control
+ * whether the CDict that is created internally can use the feature or not.
+ *
+ * What it does:
+ *
+ * Normally, the internal data structures of the CDict are analogous to what
+ * would be stored in a CCtx after compressing the contents of a dictionary.
+ * To an approximation, a compression using a dictionary can then use those
+ * data structures to simply continue what is effectively a streaming
+ * compression where the simulated compression of the dictionary left off.
+ * Which is to say, the search structures in the CDict are normally the same
+ * format as in the CCtx.
+ *
+ * It is possible to do better, since the CDict is not like a CCtx: the search
+ * structures are written once during CDict creation, and then are only read
+ * after that, while the search structures in the CCtx are both read and
+ * written as the compression goes along. This means we can choose a search
+ * structure for the dictionary that is read-optimized.
+ *
+ * This feature enables the use of that different structure.
+ *
+ * Note that some of the members of the ZSTD_compressionParameters struct have
+ * different semantics and constraints in the dedicated search structure. It is
+ * highly recommended that you simply set a compression level in the CCtxParams
+ * you pass into the CDict creation call, and avoid messing with the cParams
+ * directly.
+ *
+ * Effects:
+ *
+ * This will only have any effect when the selected ZSTD_strategy
+ * implementation supports this feature. Currently, that's limited to
+ * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2.
+ *
+ * Note that this means that the CDict tables can no longer be copied into the
+ * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be
+ * useable. The dictionary can only be attached or reloaded.
+ *
+ * In general, you should expect compression to be faster--sometimes very much
+ * so--and CDict creation to be slightly slower. Eventually, we will probably
+ * make this mode the default.
+ */
+#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8
+
+/* ZSTD_c_stableInBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the compressor, and
+ * compression will fail if it ever changes. This means the only flush
+ * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end
+ * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos)
+ * MUST not be modified during compression or you will get data corruption.
+ *
+ * When this flag is enabled zstd won't allocate an input window buffer,
+ * because the user guarantees it can reference the ZSTD_inBuffer until
+ * the frame is complete. But, it will still allocate an output buffer
+ * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also
+ * avoid the memcpy() from the input buffer to the input window buffer.
+ *
+ * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used.
+ * That means this flag cannot be used with ZSTD_compressStream().
+ *
+ * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, compression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST
+ * not be modified during compression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_inBuffer to find
+ * matches. Normally zstd maintains its own window buffer for this purpose,
+ * but passing this flag tells zstd to use the user provided buffer.
+ */
+#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9
+
+/* ZSTD_c_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells he compressor that the ZSTD_outBuffer will not be resized between
+ * calls. Specifically: (out.size - out.pos) will never grow. This gives the
+ * compressor the freedom to say: If the compressed data doesn't fit in the
+ * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to
+ * always decompress directly into the output buffer, instead of decompressing
+ * into an internal buffer and copying to the output buffer.
+ *
+ * When this flag is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer. It will still allocate the
+ * input window buffer (see ZSTD_c_stableInBuffer).
+ *
+ * Zstd will check that (out.size - out.pos) never grows and return an error
+ * if it does. While not strictly necessary, this should prevent surprises.
+ */
+#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10
+
+/* ZSTD_c_blockDelimiters
+ * Default is 0 == ZSTD_sf_noBlockDelimiters.
+ *
+ * For use with sequence compression API: ZSTD_compressSequences().
+ *
+ * Designates whether or not the given array of ZSTD_Sequence contains block delimiters
+ * and last literals, which are defined as sequences with offset == 0 and matchLength == 0.
+ * See the definition of ZSTD_Sequence for more specifics.
+ */
+#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11
+
+/* ZSTD_c_validateSequences
+ * Default is 0 == disabled. Set to 1 to enable sequence validation.
+ *
+ * For use with sequence compression API: ZSTD_compressSequences().
+ * Designates whether or not we validate sequences provided to ZSTD_compressSequences()
+ * during function execution.
+ *
+ * Without validation, providing a sequence that does not conform to the zstd spec will cause
+ * undefined behavior, and may produce a corrupted block.
+ *
+ * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for
+ * specifics regarding offset/matchlength requirements) then the function will bail out and
+ * return an error.
+ *
+ */
+#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
+
+/*! ZSTD_CCtx_getParameter() :
+ *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
+
+
+/*! ZSTD_CCtx_params :
+ *  Quick howto :
+ *  - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure
+ *  - ZSTD_CCtxParams_setParameter() : Push parameters one by one into
+ *                                     an existing ZSTD_CCtx_params structure.
+ *                                     This is similar to
+ *                                     ZSTD_CCtx_setParameter().
+ *  - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to
+ *                                    an existing CCtx.
+ *                                    These parameters will be applied to
+ *                                    all subsequent frames.
+ *  - ZSTD_compressStream2() : Do compression using the CCtx.
+ *  - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer.
+ *
+ *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
+ *  for static allocation of CCtx for single-threaded compression.
+ */
+ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
+ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  /* accept NULL pointer */
+
+/*! ZSTD_CCtxParams_reset() :
+ *  Reset params to default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
+
+/*! ZSTD_CCtxParams_init() :
+ *  Initializes the compression parameters of cctxParams according to
+ *  compression level. All other parameters are reset to their default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
+
+/*! ZSTD_CCtxParams_init_advanced() :
+ *  Initializes the compression and frame parameters of cctxParams according to
+ *  params. All other parameters are reset to their default values.
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
+
+/*! ZSTD_CCtxParams_setParameter() :
+ *  Similar to ZSTD_CCtx_setParameter.
+ *  Set one compression parameter, selected by enum ZSTD_cParameter.
+ *  Parameters must be applied to a ZSTD_CCtx using
+ *  ZSTD_CCtx_setParametersUsingCCtxParams().
+ * @result : a code representing success or failure (which can be tested with
+ *           ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
+
+/*! ZSTD_CCtxParams_getParameter() :
+ * Similar to ZSTD_CCtx_getParameter.
+ * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
+ * @result : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
+
+/*! ZSTD_CCtx_setParametersUsingCCtxParams() :
+ *  Apply a set of ZSTD_CCtx_params to the compression context.
+ *  This can be done even after compression is started,
+ *    if nbWorkers==0, this will have no impact until a new compression is started.
+ *    if nbWorkers>=1, new parameters will be picked up at next job,
+ *       with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
+ */
+ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
+
+/*! ZSTD_compressStream2_simpleArgs() :
+ *  Same as ZSTD_compressStream2(),
+ *  but using only integral types as arguments.
+ *  This variant might be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp);
+
+
+/* *************************************
+*  Advanced decompression functions
+***************************************/
+
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
+
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, ready to start decompression operation without startup delay.
+ *  Dictionary content is referenced, and therefore stays in dictBuffer.
+ *  It is important that dictBuffer outlives DDict,
+ *  it must remain read accessible throughout the lifetime of DDict */
+ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_byReference() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but references `dict` content instead of copying it into `dctx`.
+ *  This saves memory if `dict` remains around.,
+ *  However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+
+/*! ZSTD_DCtx_loadDictionary_advanced() :
+ *  Same as ZSTD_DCtx_loadDictionary(),
+ *  but gives direct control over
+ *  how to load the dictionary (by copy ? by reference ?)
+ *  and how to interpret it (automatic ? force raw mode ? full mode only ?). */
+ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_refPrefix_advanced() :
+ *  Same as ZSTD_DCtx_refPrefix(), but gives finer control over
+ *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
+ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+
+/*! ZSTD_DCtx_setMaxWindowSize() :
+ *  Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
+ *  This protects a decoder context from reserving too much memory for itself (potential attack scenario).
+ *  This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
+ *  By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
+
+/*! ZSTD_DCtx_getParameter() :
+ *  Get the requested decompression parameter value, selected by enum ZSTD_dParameter,
+ *  and store it into int* value.
+ * @return : 0, or an error code (which can be tested with ZSTD_isError()).
+ */
+ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value);
+
+/* ZSTD_d_format
+ * experimental parameter,
+ * allowing selection between ZSTD_format_e input compression formats
+ */
+#define ZSTD_d_format ZSTD_d_experimentalParam1
+/* ZSTD_d_stableOutBuffer
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same
+ * between calls, except for the modifications that zstd makes to pos (the
+ * caller must not modify pos). This is checked by the decompressor, and
+ * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer
+ * MUST be large enough to fit the entire decompressed frame. This will be
+ * checked when the frame content size is known. The data in the ZSTD_outBuffer
+ * in the range [dst, dst + pos) MUST not be modified during decompression
+ * or you will get data corruption.
+ *
+ * When this flags is enabled zstd won't allocate an output buffer, because
+ * it can write directly to the ZSTD_outBuffer, but it will still allocate
+ * an input buffer large enough to fit any compressed block. This will also
+ * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer.
+ * If you need to avoid the input buffer allocation use the buffer-less
+ * streaming API.
+ *
+ * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using
+ * this flag is ALWAYS memory safe, and will never access out-of-bounds
+ * memory. However, decompression WILL fail if you violate the preconditions.
+ *
+ * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST
+ * not be modified during decompression or you will get data corruption. This
+ * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate
+ * matches. Normally zstd maintains its own buffer for this purpose, but passing
+ * this flag tells zstd to use the user provided buffer.
+ */
+#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2
+
+/* ZSTD_d_forceIgnoreChecksum
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable
+ *
+ * Tells the decompressor to skip checksum validation during decompression, regardless
+ * of whether checksumming was specified during compression. This offers some
+ * slight performance benefits, and may be useful for debugging.
+ * Param has values of type ZSTD_forceIgnoreChecksum_e
+ */
+#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3
+
+/* ZSTD_d_refMultipleDDicts
+ * Experimental parameter.
+ * Default is 0 == disabled. Set to 1 to enable
+ *
+ * If enabled and dctx is allocated on the heap, then additional memory will be allocated
+ * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict()
+ * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead
+ * store all references. At decompression time, the appropriate dictID is selected
+ * from the set of DDicts based on the dictID in the frame.
+ *
+ * Usage is simply calling ZSTD_refDDict() on multiple dict buffers.
+ *
+ * Param has values of byte ZSTD_refMultipleDDicts_e
+ *
+ * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory
+ * allocation for the hash table. ZSTD_freeDCtx() also frees this memory.
+ * Memory is allocated as per ZSTD_DCtx::customMem.
+ *
+ * Although this function allocates memory for the table, the user is still responsible for
+ * memory management of the underlying ZSTD_DDict* themselves.
+ */
+#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4
+
+
+/*! ZSTD_DCtx_setFormat() :
+ *  Instruct the decoder context about what kind of data to decode next.
+ *  This instruction is mandatory to decode data without a fully-formed header,
+ *  such ZSTD_f_zstd1_magicless for example.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
+ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
+
+/*! ZSTD_decompressStream_simpleArgs() :
+ *  Same as ZSTD_decompressStream(),
+ *  but using only integral types as arguments.
+ *  This can be helpful for binders from dynamic languages
+ *  which have troubles handling structures containing memory pointers.
+ */
+ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos);
+
+
+/* ******************************************************************
+*  Advanced streaming functions
+*  Warning : most of these functions are now redundant with the Advanced API.
+*  Once Advanced API reaches "stable" status,
+*  redundant functions will be deprecated, and then at some point removed.
+********************************************************************/
+
+/*=====   Advanced Streaming compression functions  =====*/
+
+/*! ZSTD_initCStream_srcSize() :
+ * This function is deprecated, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *
+ * pledgedSrcSize must be correct. If it is not known at init time, use
+ * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
+ * "0" also disables frame content size field. It may be enabled in the future.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+                         int compressionLevel,
+                         unsigned long long pledgedSrcSize);
+
+/*! ZSTD_initCStream_usingDict() :
+ * This function is deprecated, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * Creates of an internal CDict (incompatible with static CCtx), except if
+ * dict == NULL or dictSize < 8, in which case no dict is used.
+ * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
+ * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+                     const void* dict, size_t dictSize,
+                           int compressionLevel);
+
+/*! ZSTD_initCStream_advanced() :
+ * This function is deprecated, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
+ *     for ((param, value) : params) {
+ *         ZSTD_CCtx_setParameter(zcs, param, value);
+ *     }
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
+ *
+ * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
+ * pledgedSrcSize must be correct.
+ * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize,
+                          ZSTD_parameters params,
+                          unsigned long long pledgedSrcSize);
+
+/*! ZSTD_initCStream_usingCDict() :
+ * This function is deprecated, and equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * note : cdict will just be referenced, and must outlive compression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+
+/*! ZSTD_initCStream_usingCDict_advanced() :
+ *   This function is DEPRECATED, and is approximately equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     // Pseudocode: Set each zstd frame parameter and leave the rest as-is.
+ *     for ((fParam, value) : fParams) {
+ *         ZSTD_CCtx_setParameter(zcs, fParam, value);
+ *     }
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *     ZSTD_CCtx_refCDict(zcs, cdict);
+ *
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
+ * pledgedSrcSize must be correct. If srcSize is not known at init time, use
+ * value ZSTD_CONTENTSIZE_UNKNOWN.
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t
+ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+                               const ZSTD_CDict* cdict,
+                                     ZSTD_frameParameters fParams,
+                                     unsigned long long pledgedSrcSize);
+
+/*! ZSTD_resetCStream() :
+ * This function is deprecated, and is equivalent to:
+ *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+ *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ *
+ *  start a new frame, using same parameters from previous frame.
+ *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
+ *  Note that zcs must be init at least once before using ZSTD_resetCStream().
+ *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
+ *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
+ *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
+ *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
+ * @return : 0, or an error code (which can be tested using ZSTD_isError())
+ *  Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+
+
+typedef struct {
+    unsigned long long ingested;   /* nb input bytes read and buffered */
+    unsigned long long consumed;   /* nb input bytes actually compressed */
+    unsigned long long produced;   /* nb of compressed bytes generated and buffered */
+    unsigned long long flushed;    /* nb of compressed bytes flushed : not provided; can be tracked from caller side */
+    unsigned currentJobID;         /* MT only : latest started job nb */
+    unsigned nbActiveWorkers;      /* MT only : nb of workers actively compressing at probe time */
+} ZSTD_frameProgression;
+
+/* ZSTD_getFrameProgression() :
+ * tells how much data has been ingested (read from input)
+ * consumed (input actually compressed) and produced (output) for current frame.
+ * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed.
+ * Aggregates progression inside active worker threads.
+ */
+ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
+
+/*! ZSTD_toFlushNow() :
+ *  Tell how many bytes are ready to be flushed immediately.
+ *  Useful for multithreading scenarios (nbWorkers >= 1).
+ *  Probe the oldest active job, defined as oldest job not yet entirely flushed,
+ *  and check its output buffer.
+ * @return : amount of data stored in oldest job and ready to be flushed immediately.
+ *  if @return == 0, it means either :
+ *  + there is no active job (could be checked with ZSTD_frameProgression()), or
+ *  + oldest job is still actively compressing data,
+ *    but everything it has produced has also been flushed so far,
+ *    therefore flush speed is limited by production speed of oldest job
+ *    irrespective of the speed of concurrent (and newer) jobs.
+ */
+ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
+
+
+/*=====   Advanced Streaming decompression functions  =====*/
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_loadDictionary(zds, dict, dictSize);
+ *
+ * note: no dictionary will be used if dict == NULL or dictSize < 8
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *     ZSTD_DCtx_refDDict(zds, ddict);
+ *
+ * note : ddict is referenced, it must outlive decompression session
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
+
+/*!
+ * This function is deprecated, and is equivalent to:
+ *
+ *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
+ *
+ * re-use decompression parameters from previous init; saves dictionary loading
+ * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ */
+ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
+
+
+/* *******************************************************************
+*  Buffer-less and synchronous inner streaming functions
+*
+*  This is an advanced API, giving full control over buffer management, for users which need direct control over memory.
+*  But it's also a complex one, with several restrictions, documented below.
+*  Prefer normal streaming API for an easier experience.
+********************************************************************* */
+
+/*
+  Buffer-less streaming compression (synchronous mode)
+
+  A ZSTD_CCtx object is required to track streaming operations.
+  Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
+  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+
+  Start by initializing a context.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
+  or ZSTD_compressBegin_advanced(), for finer parameter control.
+  It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
+
+  Then, consume your input using ZSTD_compressContinue().
+  There are some important considerations to keep in mind when using this advanced function :
+  - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only.
+  - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks.
+  - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario.
+    Worst case evaluation is provided by ZSTD_compressBound().
+    ZSTD_compressContinue() doesn't guarantee recover after a failed compression.
+  - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog).
+    It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks)
+  - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps.
+    In which case, it will "discard" the relevant memory section from its history.
+
+  Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum.
+  It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
+  Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
+
+  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
+*/
+
+/*=====   Buffer-less streaming compression functions  =====*/
+ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */
+ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+
+/*
+  Buffer-less streaming decompression (synchronous mode)
+
+  A ZSTD_DCtx object is required to track streaming operations.
+  Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
+  A ZSTD_DCtx object can be re-used multiple times.
+
+  First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
+  Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.
+  Data fragment must be large enough to ensure successful decoding.
+ `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough.
+  @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled.
+           >0 : `srcSize` is too small, please provide at least @result bytes on next attempt.
+           errorCode, which can be tested using ZSTD_isError().
+
+  It fills a ZSTD_frameHeader structure with important information to correctly decode the frame,
+  such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`).
+  Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information.
+  As a consequence, check that values remain within valid application range.
+  For example, do not allocate memory blindly, check that `windowSize` is within expectation.
+  Each application can set its own limits, depending on local restrictions.
+  For extended interoperability, it is recommended to support `windowSize` of at least 8 MB.
+
+  ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes.
+  ZSTD_decompressContinue() is very sensitive to contiguity,
+  if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place,
+  or that previous contiguous segment is large enough to properly handle maximum back-reference distance.
+  There are multiple ways to guarantee this condition.
+
+  The most memory efficient way is to use a round buffer of sufficient size.
+  Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(),
+  which can @return an error code if required value is too large for current system (in 32-bits mode).
+  In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one,
+  up to the moment there is not enough room left in the buffer to guarantee decoding another full block,
+  which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`.
+  At which point, decoding can resume from the beginning of the buffer.
+  Note that already decoded data stored in the buffer should be flushed before being overwritten.
+
+  There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory.
+
+  Finally, if you control the compression process, you can also ignore all buffer size rules,
+  as long as the encoder and decoder progress in "lock-step",
+  aka use exactly the same buffer sizes, break contiguity at the same place, etc.
+
+  Once buffers are setup, start decompression, with ZSTD_decompressBegin().
+  If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict().
+
+  Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively.
+  ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue().
+  ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail.
+
+ @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity).
+  It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item.
+  It can also be an error code, which can be tested with ZSTD_isError().
+
+  A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero.
+  Context can then be reset to start a new decompression.
+
+  Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType().
+  This information is not required to properly decode a frame.
+
+  == Special case : skippable frames ==
+
+  Skippable frames allow integration of user-defined data into a flow of concatenated frames.
+  Skippable frames will be ignored (skipped) by decompressor.
+  The format of skippable frames is as follows :
+  a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F
+  b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits
+  c) Frame Content - any content (User Data) of length equal to Frame Size
+  For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame.
+  For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content.
+*/
+
+/*=====   Buffer-less streaming decompression functions  =====*/
+typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e;
+typedef struct {
+    unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */
+    unsigned long long windowSize;       /* can be very large, up to <= frameContentSize */
+    unsigned blockSizeMax;
+    ZSTD_frameType_e frameType;          /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */
+    unsigned headerSize;
+    unsigned dictID;
+    unsigned checksumFlag;
+} ZSTD_frameHeader;
+
+/*! ZSTD_getFrameHeader() :
+ *  decode Frame Header, or requires larger `srcSize`.
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /*< doesn't consume input */
+/*! ZSTD_getFrameHeader_advanced() :
+ *  same as ZSTD_getFrameHeader(),
+ *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
+ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
+ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/* misc */
+ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
+typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
+ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+
+
+
+
+/* ============================ */
+/*       Block level API       */
+/* ============================ */
+
+/*!
+    Block functions produce and decode raw zstd blocks, without frame metadata.
+    Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes).
+    But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes.
+
+    A few rules to respect :
+    - Compressing and decompressing require a context structure
+      + Use ZSTD_createCCtx() and ZSTD_createDCtx()
+    - It is necessary to init context before starting
+      + compression : any ZSTD_compressBegin*() variant, including with dictionary
+      + decompression : any ZSTD_decompressBegin*() variant, including with dictionary
+      + copyCCtx() and copyDCtx() can be used too
+    - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB
+      + If input is larger than a block size, it's necessary to split input data into multiple blocks
+      + For inputs larger than a single block, consider using regular ZSTD_compress() instead.
+        Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block.
+    - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) !
+      ===> In which case, nothing is produced into `dst` !
+      + User __must__ test for such outcome and deal directly with uncompressed data
+      + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0.
+        Doing so would mess up with statistics history, leading to potential data corruption.
+      + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !!
+      + In case of multiple successive blocks, should some of them be uncompressed,
+        decoder must be informed of their existence in order to follow proper history.
+        Use ZSTD_insertBlock() for such a case.
+*/
+
+/*=====   Raw zstd block functions  =====*/
+ZSTDLIB_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
+ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
+
+
+#endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
+
index c412dde..83b8070 100644 (file)
@@ -485,6 +485,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
                 struct fib6_config *cfg, gfp_t gfp_flags,
                 struct netlink_ext_ack *extack);
 void fib6_nh_release(struct fib6_nh *fib6_nh);
+void fib6_nh_release_dsts(struct fib6_nh *fib6_nh);
 
 int call_fib6_entry_notifiers(struct net *net,
                              enum fib_event_type event_type,
index afbce90..45e0339 100644 (file)
@@ -47,6 +47,7 @@ struct ipv6_stub {
                            struct fib6_config *cfg, gfp_t gfp_flags,
                            struct netlink_ext_ack *extack);
        void (*fib6_nh_release)(struct fib6_nh *fib6_nh);
+       void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh);
        void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt);
        int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify);
        void (*fib6_rt_update)(struct net *net, struct fib6_info *rt,
index a964dae..ea85956 100644 (file)
@@ -30,6 +30,7 @@ enum nci_flag {
        NCI_UP,
        NCI_DATA_EXCHANGE,
        NCI_DATA_EXCHANGE_TO,
+       NCI_UNREG,
 };
 
 /* NCI device states */
index 3855f06..a408240 100644 (file)
@@ -216,14 +216,24 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
        page_pool_put_full_page(pool, page, true);
 }
 
+#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT        \
+               (sizeof(dma_addr_t) > sizeof(unsigned long))
+
 static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 {
-       return page->dma_addr;
+       dma_addr_t ret = page->dma_addr;
+
+       if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+               ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
+
+       return ret;
 }
 
 static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
 {
        page->dma_addr = addr;
+       if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
+               page->dma_addr_upper = upper_32_bits(addr);
 }
 
 static inline void page_pool_set_frag_count(struct page *page, long nr)
index bca73e8..499f5fa 100644 (file)
@@ -1016,31 +1016,32 @@ TRACE_EVENT(afs_dir_check_failed,
                      __entry->vnode, __entry->off, __entry->i_size)
            );
 
-TRACE_EVENT(afs_page_dirty,
-           TP_PROTO(struct afs_vnode *vnode, const char *where, struct page *page),
+TRACE_EVENT(afs_folio_dirty,
+           TP_PROTO(struct afs_vnode *vnode, const char *where, struct folio *folio),
 
-           TP_ARGS(vnode, where, page),
+           TP_ARGS(vnode, where, folio),
 
            TP_STRUCT__entry(
                    __field(struct afs_vnode *,         vnode           )
                    __field(const char *,               where           )
-                   __field(pgoff_t,                    page            )
+                   __field(pgoff_t,                    index           )
                    __field(unsigned long,              from            )
                    __field(unsigned long,              to              )
                             ),
 
            TP_fast_assign(
+                   unsigned long priv = (unsigned long)folio_get_private(folio);
                    __entry->vnode = vnode;
                    __entry->where = where;
-                   __entry->page = page->index;
-                   __entry->from = afs_page_dirty_from(page, page->private);
-                   __entry->to = afs_page_dirty_to(page, page->private);
-                   __entry->to |= (afs_is_page_dirty_mmapped(page->private) ?
-                                   (1UL << (BITS_PER_LONG - 1)) : 0);
+                   __entry->index = folio_index(folio);
+                   __entry->from  = afs_folio_dirty_from(folio, priv);
+                   __entry->to    = afs_folio_dirty_to(folio, priv);
+                   __entry->to   |= (afs_is_folio_dirty_mmapped(priv) ?
+                                     (1UL << (BITS_PER_LONG - 1)) : 0);
                           ),
 
            TP_printk("vn=%p %lx %s %lx-%lx%s",
-                     __entry->vnode, __entry->page, __entry->where,
+                     __entry->vnode, __entry->index, __entry->where,
                      __entry->from,
                      __entry->to & ~(1UL << (BITS_PER_LONG - 1)),
                      __entry->to & (1UL << (BITS_PER_LONG - 1)) ? " M" : "")
index 4e881d9..f8cb916 100644 (file)
@@ -570,9 +570,10 @@ TRACE_EVENT(f2fs_file_write_iter,
 );
 
 TRACE_EVENT(f2fs_map_blocks,
-       TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, int ret),
+       TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map,
+                               int create, int flag, int ret),
 
-       TP_ARGS(inode, map, ret),
+       TP_ARGS(inode, map, create, flag, ret),
 
        TP_STRUCT__entry(
                __field(dev_t,  dev)
@@ -583,11 +584,14 @@ TRACE_EVENT(f2fs_map_blocks,
                __field(unsigned int,   m_flags)
                __field(int,    m_seg_type)
                __field(bool,   m_may_create)
+               __field(bool,   m_multidev_dio)
+               __field(int,    create)
+               __field(int,    flag)
                __field(int,    ret)
        ),
 
        TP_fast_assign(
-               __entry->dev            = inode->i_sb->s_dev;
+               __entry->dev            = map->m_bdev->bd_dev;
                __entry->ino            = inode->i_ino;
                __entry->m_lblk         = map->m_lblk;
                __entry->m_pblk         = map->m_pblk;
@@ -595,12 +599,16 @@ TRACE_EVENT(f2fs_map_blocks,
                __entry->m_flags        = map->m_flags;
                __entry->m_seg_type     = map->m_seg_type;
                __entry->m_may_create   = map->m_may_create;
+               __entry->m_multidev_dio = map->m_multidev_dio;
+               __entry->create         = create;
+               __entry->flag           = flag;
                __entry->ret            = ret;
        ),
 
        TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, "
-               "start blkaddr = 0x%llx, len = 0x%llx, flags = %u,"
-               "seg_type = %d, may_create = %d, err = %d",
+               "start blkaddr = 0x%llx, len = 0x%llx, flags = %u, "
+               "seg_type = %d, may_create = %d, multidevice = %d, "
+               "create = %d, flag = %d, err = %d",
                show_dev_ino(__entry),
                (unsigned long long)__entry->m_lblk,
                (unsigned long long)__entry->m_pblk,
@@ -608,6 +616,9 @@ TRACE_EVENT(f2fs_map_blocks,
                __entry->m_flags,
                __entry->m_seg_type,
                __entry->m_may_create,
+               __entry->m_multidev_dio,
+               __entry->create,
+               __entry->flag,
                __entry->ret)
 );
 
@@ -807,20 +818,20 @@ TRACE_EVENT(f2fs_lookup_start,
        TP_STRUCT__entry(
                __field(dev_t,  dev)
                __field(ino_t,  ino)
-               __field(const char *,   name)
+               __string(name,  dentry->d_name.name)
                __field(unsigned int, flags)
        ),
 
        TP_fast_assign(
                __entry->dev    = dir->i_sb->s_dev;
                __entry->ino    = dir->i_ino;
-               __entry->name   = dentry->d_name.name;
+               __assign_str(name, dentry->d_name.name);
                __entry->flags  = flags;
        ),
 
        TP_printk("dev = (%d,%d), pino = %lu, name:%s, flags:%u",
                show_dev_ino(__entry),
-               __entry->name,
+               __get_str(name),
                __entry->flags)
 );
 
@@ -834,7 +845,7 @@ TRACE_EVENT(f2fs_lookup_end,
        TP_STRUCT__entry(
                __field(dev_t,  dev)
                __field(ino_t,  ino)
-               __field(const char *,   name)
+               __string(name,  dentry->d_name.name)
                __field(nid_t,  cino)
                __field(int,    err)
        ),
@@ -842,14 +853,14 @@ TRACE_EVENT(f2fs_lookup_end,
        TP_fast_assign(
                __entry->dev    = dir->i_sb->s_dev;
                __entry->ino    = dir->i_ino;
-               __entry->name   = dentry->d_name.name;
+               __assign_str(name, dentry->d_name.name);
                __entry->cino   = ino;
                __entry->err    = err;
        ),
 
        TP_printk("dev = (%d,%d), pino = %lu, name:%s, ino:%u, err:%d",
                show_dev_ino(__entry),
-               __entry->name,
+               __get_str(name),
                __entry->cino,
                __entry->err)
 );
index 78f0719..1daa452 100644 (file)
@@ -1130,6 +1130,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_BINARY_STATS_FD 203
 #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204
 #define KVM_CAP_ARM_MTE 205
+#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
index 70e01c6..e9122f1 100644 (file)
  * explicitly triggered (VIRTIO_MEM_REQ_UNPLUG).
  *
  * There are no guarantees what will happen if unplugged memory is
- * read/written. Such memory should, in general, not be touched. E.g.,
- * even writing might succeed, but the values will simply be discarded at
- * random points in time.
+ * read/written. In general, unplugged memory should not be touched, because
+ * the resulting action is undefined. There is one exception: without
+ * VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, unplugged memory inside the usable
+ * region can be read, to simplify creation of memory dumps.
  *
  * It can happen that the device cannot process a request, because it is
  * busy. The device driver has to retry later.
@@ -87,6 +88,8 @@
 
 /* node_id is an ACPI PXM and is valid */
 #define VIRTIO_MEM_F_ACPI_PXM          0
+/* unplugged memory must not be accessed */
+#define VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE    1
 
 
 /* --- virtio-mem: guest -> host requests --- */
index 45bcaa8..4b7bac1 100644 (file)
@@ -885,6 +885,11 @@ config ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 config CC_HAS_INT128
        def_bool !$(cc-option,$(m64-flag) -D__SIZEOF_INT128__=0) && 64BIT
 
+config CC_IMPLICIT_FALLTHROUGH
+       string
+       default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5)
+       default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough)
+
 #
 # For architectures that know their GCC __int128 support is sound
 #
index 2846113..04eeee1 100644 (file)
@@ -30,7 +30,7 @@ $(obj)/version.o: include/generated/compile.h
 quiet_cmd_compile.h = CHK     $@
       cmd_compile.h = \
        $(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@       \
-       "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)"    \
+       "$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT_BUILD)"      \
        "$(CONFIG_PREEMPT_RT)" $(CONFIG_CC_VERSION_TEXT) "$(LD)"
 
 include/generated/compile.h: FORCE
index 60f1bfc..ce77f02 100644 (file)
@@ -1,12 +1,23 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
+config PREEMPT_NONE_BUILD
+       bool
+
+config PREEMPT_VOLUNTARY_BUILD
+       bool
+
+config PREEMPT_BUILD
+       bool
+       select PREEMPTION
+       select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
+
 choice
        prompt "Preemption Model"
-       default PREEMPT_NONE_BEHAVIOUR
+       default PREEMPT_NONE
 
-config PREEMPT_NONE_BEHAVIOUR
+config PREEMPT_NONE
        bool "No Forced Preemption (Server)"
-       select PREEMPT_NONE if !PREEMPT_DYNAMIC
+       select PREEMPT_NONE_BUILD if !PREEMPT_DYNAMIC
        help
          This is the traditional Linux preemption model, geared towards
          throughput. It will still provide good latencies most of the
@@ -18,10 +29,10 @@ config PREEMPT_NONE_BEHAVIOUR
          raw processing power of the kernel, irrespective of scheduling
          latencies.
 
-config PREEMPT_VOLUNTARY_BEHAVIOUR
+config PREEMPT_VOLUNTARY
        bool "Voluntary Kernel Preemption (Desktop)"
        depends on !ARCH_NO_PREEMPT
-       select PREEMPT_VOLUNTARY if !PREEMPT_DYNAMIC
+       select PREEMPT_VOLUNTARY_BUILD if !PREEMPT_DYNAMIC
        help
          This option reduces the latency of the kernel by adding more
          "explicit preemption points" to the kernel code. These new
@@ -37,10 +48,10 @@ config PREEMPT_VOLUNTARY_BEHAVIOUR
 
          Select this if you are building a kernel for a desktop system.
 
-config PREEMPT_BEHAVIOUR
+config PREEMPT
        bool "Preemptible Kernel (Low-Latency Desktop)"
        depends on !ARCH_NO_PREEMPT
-       select PREEMPT
+       select PREEMPT_BUILD
        help
          This option reduces the latency of the kernel by making
          all kernel code (that is not executing in a critical section)
@@ -58,7 +69,7 @@ config PREEMPT_BEHAVIOUR
 
 config PREEMPT_RT
        bool "Fully Preemptible Kernel (Real-Time)"
-       depends on EXPERT && ARCH_SUPPORTS_RT && !PREEMPT_DYNAMIC
+       depends on EXPERT && ARCH_SUPPORTS_RT
        select PREEMPTION
        help
          This option turns the kernel into a real-time kernel by replacing
@@ -75,17 +86,6 @@ config PREEMPT_RT
 
 endchoice
 
-config PREEMPT_NONE
-       bool
-
-config PREEMPT_VOLUNTARY
-       bool
-
-config PREEMPT
-       bool
-       select PREEMPTION
-       select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
-
 config PREEMPT_COUNT
        bool
 
@@ -95,8 +95,8 @@ config PREEMPTION
 
 config PREEMPT_DYNAMIC
        bool "Preemption behaviour defined on boot"
-       depends on HAVE_PREEMPT_DYNAMIC
-       select PREEMPT
+       depends on HAVE_PREEMPT_DYNAMIC && !PREEMPT_RT
+       select PREEMPT_BUILD
        default y
        help
          This option allows to define the preemption model on the kernel
index 2ca643a..43eb350 100644 (file)
@@ -1809,6 +1809,8 @@ sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_sysctl_get_new_value_proto;
        case BPF_FUNC_sysctl_set_new_value:
                return &bpf_sysctl_set_new_value_proto;
+       case BPF_FUNC_ktime_get_coarse_ns:
+               return &bpf_ktime_get_coarse_ns_proto;
        default:
                return cgroup_base_func_proto(func_id, prog);
        }
index 1ffd469..649f076 100644 (file)
@@ -1364,8 +1364,6 @@ bpf_base_func_proto(enum bpf_func_id func_id)
                return &bpf_ktime_get_ns_proto;
        case BPF_FUNC_ktime_get_boot_ns:
                return &bpf_ktime_get_boot_ns_proto;
-       case BPF_FUNC_ktime_get_coarse_ns:
-               return &bpf_ktime_get_coarse_ns_proto;
        case BPF_FUNC_ringbuf_output:
                return &bpf_ringbuf_output_proto;
        case BPF_FUNC_ringbuf_reserve:
index 50f96ea..1033ee8 100644 (file)
@@ -132,6 +132,21 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
        return map;
 }
 
+static void bpf_map_write_active_inc(struct bpf_map *map)
+{
+       atomic64_inc(&map->writecnt);
+}
+
+static void bpf_map_write_active_dec(struct bpf_map *map)
+{
+       atomic64_dec(&map->writecnt);
+}
+
+bool bpf_map_write_active(const struct bpf_map *map)
+{
+       return atomic64_read(&map->writecnt) != 0;
+}
+
 static u32 bpf_map_value_size(const struct bpf_map *map)
 {
        if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
@@ -601,11 +616,8 @@ static void bpf_map_mmap_open(struct vm_area_struct *vma)
 {
        struct bpf_map *map = vma->vm_file->private_data;
 
-       if (vma->vm_flags & VM_MAYWRITE) {
-               mutex_lock(&map->freeze_mutex);
-               map->writecnt++;
-               mutex_unlock(&map->freeze_mutex);
-       }
+       if (vma->vm_flags & VM_MAYWRITE)
+               bpf_map_write_active_inc(map);
 }
 
 /* called for all unmapped memory region (including initial) */
@@ -613,11 +625,8 @@ static void bpf_map_mmap_close(struct vm_area_struct *vma)
 {
        struct bpf_map *map = vma->vm_file->private_data;
 
-       if (vma->vm_flags & VM_MAYWRITE) {
-               mutex_lock(&map->freeze_mutex);
-               map->writecnt--;
-               mutex_unlock(&map->freeze_mutex);
-       }
+       if (vma->vm_flags & VM_MAYWRITE)
+               bpf_map_write_active_dec(map);
 }
 
 static const struct vm_operations_struct bpf_map_default_vmops = {
@@ -668,7 +677,7 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma)
                goto out;
 
        if (vma->vm_flags & VM_MAYWRITE)
-               map->writecnt++;
+               bpf_map_write_active_inc(map);
 out:
        mutex_unlock(&map->freeze_mutex);
        return err;
@@ -1139,6 +1148,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
        map = __bpf_map_get(f);
        if (IS_ERR(map))
                return PTR_ERR(map);
+       bpf_map_write_active_inc(map);
        if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
                err = -EPERM;
                goto err_put;
@@ -1174,6 +1184,7 @@ free_value:
 free_key:
        kvfree(key);
 err_put:
+       bpf_map_write_active_dec(map);
        fdput(f);
        return err;
 }
@@ -1196,6 +1207,7 @@ static int map_delete_elem(union bpf_attr *attr)
        map = __bpf_map_get(f);
        if (IS_ERR(map))
                return PTR_ERR(map);
+       bpf_map_write_active_inc(map);
        if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
                err = -EPERM;
                goto err_put;
@@ -1226,6 +1238,7 @@ static int map_delete_elem(union bpf_attr *attr)
 out:
        kvfree(key);
 err_put:
+       bpf_map_write_active_dec(map);
        fdput(f);
        return err;
 }
@@ -1533,6 +1546,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
        map = __bpf_map_get(f);
        if (IS_ERR(map))
                return PTR_ERR(map);
+       bpf_map_write_active_inc(map);
        if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) ||
            !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
                err = -EPERM;
@@ -1597,6 +1611,7 @@ free_value:
 free_key:
        kvfree(key);
 err_put:
+       bpf_map_write_active_dec(map);
        fdput(f);
        return err;
 }
@@ -1624,8 +1639,7 @@ static int map_freeze(const union bpf_attr *attr)
        }
 
        mutex_lock(&map->freeze_mutex);
-
-       if (map->writecnt) {
+       if (bpf_map_write_active(map)) {
                err = -EBUSY;
                goto err_put;
        }
@@ -4171,6 +4185,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
                            union bpf_attr __user *uattr,
                            int cmd)
 {
+       bool has_read  = cmd == BPF_MAP_LOOKUP_BATCH ||
+                        cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH;
+       bool has_write = cmd != BPF_MAP_LOOKUP_BATCH;
        struct bpf_map *map;
        int err, ufd;
        struct fd f;
@@ -4183,16 +4200,13 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
        map = __bpf_map_get(f);
        if (IS_ERR(map))
                return PTR_ERR(map);
-
-       if ((cmd == BPF_MAP_LOOKUP_BATCH ||
-            cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) &&
-           !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
+       if (has_write)
+               bpf_map_write_active_inc(map);
+       if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
                err = -EPERM;
                goto err_put;
        }
-
-       if (cmd != BPF_MAP_LOOKUP_BATCH &&
-           !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+       if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
                err = -EPERM;
                goto err_put;
        }
@@ -4205,8 +4219,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
                BPF_DO_BATCH(map->ops->map_update_batch);
        else
                BPF_DO_BATCH(map->ops->map_delete_batch);
-
 err_put:
+       if (has_write)
+               bpf_map_write_active_dec(map);
        fdput(f);
        return err;
 }
index 890b3ec..50efda5 100644 (file)
@@ -1151,7 +1151,8 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
                        /* transfer reg's id which is unique for every map_lookup_elem
                         * as UID of the inner map.
                         */
-                       reg->map_uid = reg->id;
+                       if (map_value_has_timer(map->inner_map_meta))
+                               reg->map_uid = reg->id;
                } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
                        reg->type = PTR_TO_XDP_SOCK;
                } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
@@ -4055,7 +4056,22 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
 
 static bool bpf_map_is_rdonly(const struct bpf_map *map)
 {
-       return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
+       /* A map is considered read-only if the following condition are true:
+        *
+        * 1) BPF program side cannot change any of the map content. The
+        *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
+        *    and was set at map creation time.
+        * 2) The map value(s) have been initialized from user space by a
+        *    loader and then "frozen", such that no new map update/delete
+        *    operations from syscall side are possible for the rest of
+        *    the map's lifetime from that point onwards.
+        * 3) Any parallel/pending map update/delete operations from syscall
+        *    side have been completed. Only after that point, it's safe to
+        *    assume that map value(s) are immutable.
+        */
+       return (map->map_flags & BPF_F_RDONLY_PROG) &&
+              READ_ONCE(map->frozen) &&
+              !bpf_map_write_active(map);
 }
 
 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
@@ -11631,6 +11647,13 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
                }
        }
 
+       if (map_value_has_timer(map)) {
+               if (is_tracing_prog_type(prog_type)) {
+                       verbose(env, "tracing progs cannot use bpf_timer yet\n");
+                       return -EINVAL;
+               }
+       }
+
        if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
            !bpf_offload_prog_map_match(prog, map)) {
                verbose(env, "offload device mismatch between prog and map\n");
index f2253ea..523106a 100644 (file)
@@ -7154,7 +7154,6 @@ void perf_output_sample(struct perf_output_handle *handle,
 static u64 perf_virt_to_phys(u64 virt)
 {
        u64 phys_addr = 0;
-       struct page *p = NULL;
 
        if (!virt)
                return 0;
@@ -7173,14 +7172,15 @@ static u64 perf_virt_to_phys(u64 virt)
                 * If failed, leave phys_addr as 0.
                 */
                if (current->mm != NULL) {
+                       struct page *p;
+
                        pagefault_disable();
-                       if (get_user_page_fast_only(virt, 0, &p))
+                       if (get_user_page_fast_only(virt, 0, &p)) {
                                phys_addr = page_to_phys(p) + virt % PAGE_SIZE;
+                               put_page(p);
+                       }
                        pagefault_enable();
                }
-
-               if (p)
-                       put_page(p);
        }
 
        return phys_addr;
index 5de23f3..3244cc5 100644 (file)
@@ -2277,6 +2277,7 @@ static __latent_entropy struct task_struct *copy_process(
        p->pdeath_signal = 0;
        INIT_LIST_HEAD(&p->thread_group);
        p->task_works = NULL;
+       clear_posix_cputimers_work(p);
 
 #ifdef CONFIG_KRETPROBES
        p->kretprobe_instances.first = NULL;
index 6a5ecee..7f350ae 100644 (file)
@@ -529,10 +529,10 @@ static bool msi_check_reservation_mode(struct irq_domain *domain,
 
        /*
         * Checking the first MSI descriptor is sufficient. MSIX supports
-        * masking and MSI does so when the maskbit is set.
+        * masking and MSI does so when the can_mask attribute is set.
         */
        desc = first_msi_entry(dev);
-       return desc->msi_attrib.is_msix || desc->msi_attrib.maskbit;
+       return desc->msi_attrib.is_msix || desc->msi_attrib.can_mask;
 }
 
 int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
index 013bfd6..57b132b 100644 (file)
@@ -3253,6 +3253,11 @@ void defer_console_output(void)
        preempt_enable();
 }
 
+void printk_trigger_flush(void)
+{
+       defer_console_output();
+}
+
 int vprintk_deferred(const char *fmt, va_list args)
 {
        int r;
index 2067080..8629b37 100644 (file)
@@ -31,7 +31,7 @@ static inline void autogroup_destroy(struct kref *kref)
        ag->tg->rt_se = NULL;
        ag->tg->rt_rq = NULL;
 #endif
-       sched_offline_group(ag->tg);
+       sched_release_group(ag->tg);
        sched_destroy_group(ag->tg);
 }
 
index 523fd60..3c9b0fd 100644 (file)
@@ -3726,6 +3726,9 @@ out:
 
 bool cpus_share_cache(int this_cpu, int that_cpu)
 {
+       if (this_cpu == that_cpu)
+               return true;
+
        return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
 }
 
@@ -6625,13 +6628,13 @@ __setup("preempt=", setup_preempt_mode);
 static void __init preempt_dynamic_init(void)
 {
        if (preempt_dynamic_mode == preempt_dynamic_undefined) {
-               if (IS_ENABLED(CONFIG_PREEMPT_NONE_BEHAVIOUR)) {
+               if (IS_ENABLED(CONFIG_PREEMPT_NONE)) {
                        sched_dynamic_update(preempt_dynamic_none);
-               } else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY_BEHAVIOUR)) {
+               } else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
                        sched_dynamic_update(preempt_dynamic_voluntary);
                } else {
                        /* Default static call setting, nothing to do */
-                       WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT_BEHAVIOUR));
+                       WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
                        preempt_dynamic_mode = preempt_dynamic_full;
                        pr_info("Dynamic Preempt: full\n");
                }
@@ -9716,6 +9719,22 @@ static void sched_free_group(struct task_group *tg)
        kmem_cache_free(task_group_cache, tg);
 }
 
+static void sched_free_group_rcu(struct rcu_head *rcu)
+{
+       sched_free_group(container_of(rcu, struct task_group, rcu));
+}
+
+static void sched_unregister_group(struct task_group *tg)
+{
+       unregister_fair_sched_group(tg);
+       unregister_rt_sched_group(tg);
+       /*
+        * We have to wait for yet another RCU grace period to expire, as
+        * print_cfs_stats() might run concurrently.
+        */
+       call_rcu(&tg->rcu, sched_free_group_rcu);
+}
+
 /* allocate runqueue etc for a new task group */
 struct task_group *sched_create_group(struct task_group *parent)
 {
@@ -9759,25 +9778,35 @@ void sched_online_group(struct task_group *tg, struct task_group *parent)
 }
 
 /* rcu callback to free various structures associated with a task group */
-static void sched_free_group_rcu(struct rcu_head *rhp)
+static void sched_unregister_group_rcu(struct rcu_head *rhp)
 {
        /* Now it should be safe to free those cfs_rqs: */
-       sched_free_group(container_of(rhp, struct task_group, rcu));
+       sched_unregister_group(container_of(rhp, struct task_group, rcu));
 }
 
 void sched_destroy_group(struct task_group *tg)
 {
        /* Wait for possible concurrent references to cfs_rqs complete: */
-       call_rcu(&tg->rcu, sched_free_group_rcu);
+       call_rcu(&tg->rcu, sched_unregister_group_rcu);
 }
 
-void sched_offline_group(struct task_group *tg)
+void sched_release_group(struct task_group *tg)
 {
        unsigned long flags;
 
-       /* End participation in shares distribution: */
-       unregister_fair_sched_group(tg);
-
+       /*
+        * Unlink first, to avoid walk_tg_tree_from() from finding us (via
+        * sched_cfs_period_timer()).
+        *
+        * For this to be effective, we have to wait for all pending users of
+        * this task group to leave their RCU critical section to ensure no new
+        * user will see our dying task group any more. Specifically ensure
+        * that tg_unthrottle_up() won't add decayed cfs_rq's to it.
+        *
+        * We therefore defer calling unregister_fair_sched_group() to
+        * sched_unregister_group() which is guarantied to get called only after the
+        * current RCU grace period has expired.
+        */
        spin_lock_irqsave(&task_group_lock, flags);
        list_del_rcu(&tg->list);
        list_del_rcu(&tg->siblings);
@@ -9896,7 +9925,7 @@ static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
 {
        struct task_group *tg = css_tg(css);
 
-       sched_offline_group(tg);
+       sched_release_group(tg);
 }
 
 static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
@@ -9906,7 +9935,7 @@ static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
        /*
         * Relies on the RCU grace period between css_released() and this.
         */
-       sched_free_group(tg);
+       sched_unregister_group(tg);
 }
 
 /*
index 13950be..6e476f6 100644 (file)
@@ -11456,8 +11456,6 @@ void free_fair_sched_group(struct task_group *tg)
 {
        int i;
 
-       destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
-
        for_each_possible_cpu(i) {
                if (tg->cfs_rq)
                        kfree(tg->cfs_rq[i]);
@@ -11534,6 +11532,8 @@ void unregister_fair_sched_group(struct task_group *tg)
        struct rq *rq;
        int cpu;
 
+       destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
+
        for_each_possible_cpu(cpu) {
                if (tg->se[cpu])
                        remove_entity_load_avg(tg->se[cpu]);
index bb945f8..b48baab 100644 (file)
@@ -137,13 +137,17 @@ static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se)
        return rt_rq->rq;
 }
 
-void free_rt_sched_group(struct task_group *tg)
+void unregister_rt_sched_group(struct task_group *tg)
 {
-       int i;
-
        if (tg->rt_se)
                destroy_rt_bandwidth(&tg->rt_bandwidth);
 
+}
+
+void free_rt_sched_group(struct task_group *tg)
+{
+       int i;
+
        for_each_possible_cpu(i) {
                if (tg->rt_rq)
                        kfree(tg->rt_rq[i]);
@@ -250,6 +254,8 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
        return &rq->rt;
 }
 
+void unregister_rt_sched_group(struct task_group *tg) { }
+
 void free_rt_sched_group(struct task_group *tg) { }
 
 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
index 7f1612d..0e66749 100644 (file)
@@ -488,6 +488,7 @@ extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
 extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
 extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
 
+extern void unregister_rt_sched_group(struct task_group *tg);
 extern void free_rt_sched_group(struct task_group *tg);
 extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
 extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
@@ -503,7 +504,7 @@ extern struct task_group *sched_create_group(struct task_group *parent);
 extern void sched_online_group(struct task_group *tg,
                               struct task_group *parent);
 extern void sched_destroy_group(struct task_group *tg);
-extern void sched_offline_group(struct task_group *tg);
+extern void sched_release_group(struct task_group *tg);
 
 extern void sched_move_task(struct task_struct *tsk);
 
index 643d412..96b4e78 100644 (file)
@@ -1159,13 +1159,28 @@ static void posix_cpu_timers_work(struct callback_head *work)
 }
 
 /*
+ * Clear existing posix CPU timers task work.
+ */
+void clear_posix_cputimers_work(struct task_struct *p)
+{
+       /*
+        * A copied work entry from the old task is not meaningful, clear it.
+        * N.B. init_task_work will not do this.
+        */
+       memset(&p->posix_cputimers_work.work, 0,
+              sizeof(p->posix_cputimers_work.work));
+       init_task_work(&p->posix_cputimers_work.work,
+                      posix_cpu_timers_work);
+       p->posix_cputimers_work.scheduled = false;
+}
+
+/*
  * Initialize posix CPU timers task work in init task. Out of line to
  * keep the callback static and to avoid header recursion hell.
  */
 void __init posix_cputimers_init_work(void)
 {
-       init_task_work(&current->posix_cputimers_work.work,
-                      posix_cpu_timers_work);
+       clear_posix_cputimers_work(current);
 }
 
 /*
index 7396488..ae97550 100644 (file)
@@ -1111,8 +1111,6 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
                return &bpf_ktime_get_ns_proto;
        case BPF_FUNC_ktime_get_boot_ns:
                return &bpf_ktime_get_boot_ns_proto;
-       case BPF_FUNC_ktime_get_coarse_ns:
-               return &bpf_ktime_get_coarse_ns_proto;
        case BPF_FUNC_tail_call:
                return &bpf_tail_call_proto;
        case BPF_FUNC_get_current_pid_tgid:
index 0abc9a4..5ea2c9e 100644 (file)
@@ -1953,9 +1953,10 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
                if (!hist_field->type)
                        goto free;
 
-               if (field->filter_type == FILTER_STATIC_STRING)
+               if (field->filter_type == FILTER_STATIC_STRING) {
                        hist_field->fn = hist_field_string;
-               else if (field->filter_type == FILTER_DYN_STRING)
+                       hist_field->size = field->size;
+               } else if (field->filter_type == FILTER_DYN_STRING)
                        hist_field->fn = hist_field_dynstring;
                else
                        hist_field->fn = hist_field_pstring;
@@ -2580,7 +2581,8 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data,
        operand1_str = str;
        str = sep+1;
 
-       if (!operand1_str || !str)
+       /* Binary operator requires both operands */
+       if (*operand1_str == '\0' || *str == '\0')
                goto free;
 
        operand_flags = 0;
@@ -3024,8 +3026,10 @@ static inline void __update_field_vars(struct tracing_map_elt *elt,
                if (val->flags & HIST_FIELD_FL_STRING) {
                        char *str = elt_data->field_var_str[j++];
                        char *val_str = (char *)(uintptr_t)var_val;
+                       unsigned int size;
 
-                       strscpy(str, val_str, STR_VAR_LEN_MAX);
+                       size = min(val->size, STR_VAR_LEN_MAX);
+                       strscpy(str, val_str, size);
                        var_val = (u64)(uintptr_t)str;
                }
                tracing_map_set_var(elt, var_idx, var_val);
@@ -4912,6 +4916,7 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
                        if (hist_field->flags & HIST_FIELD_FL_STRING) {
                                unsigned int str_start, var_str_idx, idx;
                                char *str, *val_str;
+                               unsigned int size;
 
                                str_start = hist_data->n_field_var_str +
                                        hist_data->n_save_var_str;
@@ -4920,7 +4925,9 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
 
                                str = elt_data->field_var_str[idx];
                                val_str = (char *)(uintptr_t)hist_val;
-                               strscpy(str, val_str, STR_VAR_LEN_MAX);
+
+                               size = min(hist_field->size, STR_VAR_LEN_MAX);
+                               strscpy(str, val_str, size);
 
                                hist_val = (u64)(uintptr_t)str;
                        }
index 3e4a165..7520d43 100644 (file)
@@ -55,7 +55,8 @@ struct osnoise_instance {
        struct list_head        list;
        struct trace_array      *tr;
 };
-struct list_head osnoise_instances;
+
+static struct list_head osnoise_instances;
 
 static bool osnoise_has_registered_instances(void)
 {
index 6b629ab..a512b99 100644 (file)
 #ifdef STATIC
 # define UNZSTD_PREBOOT
 # include "xxhash.c"
-# include "zstd/entropy_common.c"
-# include "zstd/fse_decompress.c"
-# include "zstd/huf_decompress.c"
-# include "zstd/zstd_common.c"
-# include "zstd/decompress.c"
+# include "zstd/decompress_sources.h"
 #endif
 
 #include <linux/decompress/mm.h>
 
 static int INIT handle_zstd_error(size_t ret, void (*error)(char *x))
 {
-       const int err = ZSTD_getErrorCode(ret);
+       const zstd_error_code err = zstd_get_error_code(ret);
 
-       if (!ZSTD_isError(ret))
+       if (!zstd_is_error(ret))
                return 0;
 
+       /*
+        * zstd_get_error_name() cannot be used because error takes a char *
+        * not a const char *
+        */
        switch (err) {
        case ZSTD_error_memory_allocation:
                error("ZSTD decompressor ran out of memory");
@@ -124,28 +124,28 @@ static int INIT decompress_single(const u8 *in_buf, long in_len, u8 *out_buf,
                                  long out_len, long *in_pos,
                                  void (*error)(char *x))
 {
-       const size_t wksp_size = ZSTD_DCtxWorkspaceBound();
+       const size_t wksp_size = zstd_dctx_workspace_bound();
        void *wksp = large_malloc(wksp_size);
-       ZSTD_DCtx *dctx = ZSTD_initDCtx(wksp, wksp_size);
+       zstd_dctx *dctx = zstd_init_dctx(wksp, wksp_size);
        int err;
        size_t ret;
 
        if (dctx == NULL) {
-               error("Out of memory while allocating ZSTD_DCtx");
+               error("Out of memory while allocating zstd_dctx");
                err = -1;
                goto out;
        }
        /*
         * Find out how large the frame actually is, there may be junk at
-        * the end of the frame that ZSTD_decompressDCtx() can't handle.
+        * the end of the frame that zstd_decompress_dctx() can't handle.
         */
-       ret = ZSTD_findFrameCompressedSize(in_buf, in_len);
+       ret = zstd_find_frame_compressed_size(in_buf, in_len);
        err = handle_zstd_error(ret, error);
        if (err)
                goto out;
        in_len = (long)ret;
 
-       ret = ZSTD_decompressDCtx(dctx, out_buf, out_len, in_buf, in_len);
+       ret = zstd_decompress_dctx(dctx, out_buf, out_len, in_buf, in_len);
        err = handle_zstd_error(ret, error);
        if (err)
                goto out;
@@ -167,14 +167,14 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len,
                         long *in_pos,
                         void (*error)(char *x))
 {
-       ZSTD_inBuffer in;
-       ZSTD_outBuffer out;
-       ZSTD_frameParams params;
+       zstd_in_buffer in;
+       zstd_out_buffer out;
+       zstd_frame_header header;
        void *in_allocated = NULL;
        void *out_allocated = NULL;
        void *wksp = NULL;
        size_t wksp_size;
-       ZSTD_DStream *dstream;
+       zstd_dstream *dstream;
        int err;
        size_t ret;
 
@@ -238,13 +238,13 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len,
        out.size = out_len;
 
        /*
-        * We need to know the window size to allocate the ZSTD_DStream.
+        * We need to know the window size to allocate the zstd_dstream.
         * Since we are streaming, we need to allocate a buffer for the sliding
         * window. The window size varies from 1 KB to ZSTD_WINDOWSIZE_MAX
         * (8 MB), so it is important to use the actual value so as not to
         * waste memory when it is smaller.
         */
-       ret = ZSTD_getFrameParams(&params, in.src, in.size);
+       ret = zstd_get_frame_header(&header, in.src, in.size);
        err = handle_zstd_error(ret, error);
        if (err)
                goto out;
@@ -253,19 +253,19 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len,
                err = -1;
                goto out;
        }
-       if (params.windowSize > ZSTD_WINDOWSIZE_MAX) {
+       if (header.windowSize > ZSTD_WINDOWSIZE_MAX) {
                error("ZSTD-compressed data has too large a window size");
                err = -1;
                goto out;
        }
 
        /*
-        * Allocate the ZSTD_DStream now that we know how much memory is
+        * Allocate the zstd_dstream now that we know how much memory is
         * required.
         */
-       wksp_size = ZSTD_DStreamWorkspaceBound(params.windowSize);
+       wksp_size = zstd_dstream_workspace_bound(header.windowSize);
        wksp = large_malloc(wksp_size);
-       dstream = ZSTD_initDStream(params.windowSize, wksp, wksp_size);
+       dstream = zstd_init_dstream(header.windowSize, wksp, wksp_size);
        if (dstream == NULL) {
                error("Out of memory while allocating ZSTD_DStream");
                err = -1;
@@ -298,7 +298,7 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len,
                        in.size = in_len;
                }
                /* Returns zero when the frame is complete. */
-               ret = ZSTD_decompressStream(dstream, &out, &in);
+               ret = zstd_decompress_stream(dstream, &out, &in);
                err = handle_zstd_error(ret, error);
                if (err)
                        goto out;
index f9e8900..199ab20 100644 (file)
@@ -75,6 +75,12 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
                touch_softlockup_watchdog();
        }
 
+       /*
+        * Force flush any remote buffers that might be stuck in IRQ context
+        * and therefore could not run their irq_work.
+        */
+       printk_trigger_flush();
+
        clear_bit_unlock(0, &backtrace_flag);
        put_cpu();
 }
index f5d778e..65218ec 100644 (file)
@@ -1,10 +1,46 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+# ################################################################
+# Copyright (c) Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# You may select, at your option, one of the above-listed licenses.
+# ################################################################
 obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o
 obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o
 
 ccflags-y += -O3
 
-zstd_compress-y := fse_compress.o huf_compress.o compress.o \
-                  entropy_common.o fse_decompress.o zstd_common.o
-zstd_decompress-y := huf_decompress.o decompress.o \
-                    entropy_common.o fse_decompress.o zstd_common.o
+zstd_compress-y := \
+               zstd_compress_module.o \
+               common/debug.o \
+               common/entropy_common.o \
+               common/error_private.o \
+               common/fse_decompress.o \
+               common/zstd_common.o \
+               compress/fse_compress.o \
+               compress/hist.o \
+               compress/huf_compress.o \
+               compress/zstd_compress.o \
+               compress/zstd_compress_literals.o \
+               compress/zstd_compress_sequences.o \
+               compress/zstd_compress_superblock.o \
+               compress/zstd_double_fast.o \
+               compress/zstd_fast.o \
+               compress/zstd_lazy.o \
+               compress/zstd_ldm.o \
+               compress/zstd_opt.o \
+
+zstd_decompress-y := \
+               zstd_decompress_module.o \
+               common/debug.o \
+               common/entropy_common.o \
+               common/error_private.o \
+               common/fse_decompress.o \
+               common/zstd_common.o \
+               decompress/huf_decompress.o \
+               decompress/zstd_ddict.o \
+               decompress/zstd_decompress.o \
+               decompress/zstd_decompress_block.o \
diff --git a/lib/zstd/bitstream.h b/lib/zstd/bitstream.h
deleted file mode 100644 (file)
index 5d6343c..0000000
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * bitstream
- * Part of FSE library
- * header file (to include)
- * Copyright (C) 2013-2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-#ifndef BITSTREAM_H_MODULE
-#define BITSTREAM_H_MODULE
-
-/*
-*  This API consists of small unitary functions, which must be inlined for best performance.
-*  Since link-time-optimization is not available for all compilers,
-*  these functions are defined into a .h to be included.
-*/
-
-/*-****************************************
-*  Dependencies
-******************************************/
-#include "error_private.h" /* error codes and messages */
-#include "mem.h"          /* unaligned access routines */
-
-/*=========================================
-*  Target specific
-=========================================*/
-#define STREAM_ACCUMULATOR_MIN_32 25
-#define STREAM_ACCUMULATOR_MIN_64 57
-#define STREAM_ACCUMULATOR_MIN ((U32)(ZSTD_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
-
-/*-******************************************
-*  bitStream encoding API (write forward)
-********************************************/
-/* bitStream can mix input from multiple sources.
-*  A critical property of these streams is that they encode and decode in **reverse** direction.
-*  So the first bit sequence you add will be the last to be read, like a LIFO stack.
-*/
-typedef struct {
-       size_t bitContainer;
-       int bitPos;
-       char *startPtr;
-       char *ptr;
-       char *endPtr;
-} BIT_CStream_t;
-
-ZSTD_STATIC size_t BIT_initCStream(BIT_CStream_t *bitC, void *dstBuffer, size_t dstCapacity);
-ZSTD_STATIC void BIT_addBits(BIT_CStream_t *bitC, size_t value, unsigned nbBits);
-ZSTD_STATIC void BIT_flushBits(BIT_CStream_t *bitC);
-ZSTD_STATIC size_t BIT_closeCStream(BIT_CStream_t *bitC);
-
-/* Start with initCStream, providing the size of buffer to write into.
-*  bitStream will never write outside of this buffer.
-*  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
-*
-*  bits are first added to a local register.
-*  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
-*  Writing data into memory is an explicit operation, performed by the flushBits function.
-*  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
-*  After a flushBits, a maximum of 7 bits might still be stored into local register.
-*
-*  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
-*
-*  Last operation is to close the bitStream.
-*  The function returns the final size of CStream in bytes.
-*  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
-*/
-
-/*-********************************************
-*  bitStream decoding API (read backward)
-**********************************************/
-typedef struct {
-       size_t bitContainer;
-       unsigned bitsConsumed;
-       const char *ptr;
-       const char *start;
-} BIT_DStream_t;
-
-typedef enum {
-       BIT_DStream_unfinished = 0,
-       BIT_DStream_endOfBuffer = 1,
-       BIT_DStream_completed = 2,
-       BIT_DStream_overflow = 3
-} BIT_DStream_status; /* result of BIT_reloadDStream() */
-/* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
-
-ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize);
-ZSTD_STATIC size_t BIT_readBits(BIT_DStream_t *bitD, unsigned nbBits);
-ZSTD_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD);
-ZSTD_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *bitD);
-
-/* Start by invoking BIT_initDStream().
-*  A chunk of the bitStream is then stored into a local register.
-*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
-*  You can then retrieve bitFields stored into the local register, **in reverse order**.
-*  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
-*  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
-*  Otherwise, it can be less than that, so proceed accordingly.
-*  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
-*/
-
-/*-****************************************
-*  unsafe API
-******************************************/
-ZSTD_STATIC void BIT_addBitsFast(BIT_CStream_t *bitC, size_t value, unsigned nbBits);
-/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
-
-ZSTD_STATIC void BIT_flushBitsFast(BIT_CStream_t *bitC);
-/* unsafe version; does not check buffer overflow */
-
-ZSTD_STATIC size_t BIT_readBitsFast(BIT_DStream_t *bitD, unsigned nbBits);
-/* faster, but works only if nbBits >= 1 */
-
-/*-**************************************************************
-*  Internal functions
-****************************************************************/
-ZSTD_STATIC unsigned BIT_highbit32(register U32 val) { return 31 - __builtin_clz(val); }
-
-/*=====    Local Constants   =====*/
-static const unsigned BIT_mask[] = {0,       1,       3,       7,      0xF,      0x1F,     0x3F,     0x7F,      0xFF,
-                                   0x1FF,   0x3FF,   0x7FF,   0xFFF,    0x1FFF,   0x3FFF,   0x7FFF,   0xFFFF,    0x1FFFF,
-                                   0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF}; /* up to 26 bits */
-
-/*-**************************************************************
-*  bitStream encoding
-****************************************************************/
-/*! BIT_initCStream() :
- *  `dstCapacity` must be > sizeof(void*)
- *  @return : 0 if success,
-                         otherwise an error code (can be tested using ERR_isError() ) */
-ZSTD_STATIC size_t BIT_initCStream(BIT_CStream_t *bitC, void *startPtr, size_t dstCapacity)
-{
-       bitC->bitContainer = 0;
-       bitC->bitPos = 0;
-       bitC->startPtr = (char *)startPtr;
-       bitC->ptr = bitC->startPtr;
-       bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr);
-       if (dstCapacity <= sizeof(bitC->ptr))
-               return ERROR(dstSize_tooSmall);
-       return 0;
-}
-
-/*! BIT_addBits() :
-       can add up to 26 bits into `bitC`.
-       Does not check for register overflow ! */
-ZSTD_STATIC void BIT_addBits(BIT_CStream_t *bitC, size_t value, unsigned nbBits)
-{
-       bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
-       bitC->bitPos += nbBits;
-}
-
-/*! BIT_addBitsFast() :
- *  works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
-ZSTD_STATIC void BIT_addBitsFast(BIT_CStream_t *bitC, size_t value, unsigned nbBits)
-{
-       bitC->bitContainer |= value << bitC->bitPos;
-       bitC->bitPos += nbBits;
-}
-
-/*! BIT_flushBitsFast() :
- *  unsafe version; does not check buffer overflow */
-ZSTD_STATIC void BIT_flushBitsFast(BIT_CStream_t *bitC)
-{
-       size_t const nbBytes = bitC->bitPos >> 3;
-       ZSTD_writeLEST(bitC->ptr, bitC->bitContainer);
-       bitC->ptr += nbBytes;
-       bitC->bitPos &= 7;
-       bitC->bitContainer >>= nbBytes * 8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
-}
-
-/*! BIT_flushBits() :
- *  safe version; check for buffer overflow, and prevents it.
- *  note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */
-ZSTD_STATIC void BIT_flushBits(BIT_CStream_t *bitC)
-{
-       size_t const nbBytes = bitC->bitPos >> 3;
-       ZSTD_writeLEST(bitC->ptr, bitC->bitContainer);
-       bitC->ptr += nbBytes;
-       if (bitC->ptr > bitC->endPtr)
-               bitC->ptr = bitC->endPtr;
-       bitC->bitPos &= 7;
-       bitC->bitContainer >>= nbBytes * 8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
-}
-
-/*! BIT_closeCStream() :
- *  @return : size of CStream, in bytes,
-                         or 0 if it could not fit into dstBuffer */
-ZSTD_STATIC size_t BIT_closeCStream(BIT_CStream_t *bitC)
-{
-       BIT_addBitsFast(bitC, 1, 1); /* endMark */
-       BIT_flushBits(bitC);
-
-       if (bitC->ptr >= bitC->endPtr)
-               return 0; /* doesn't fit within authorized budget : cancel */
-
-       return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
-}
-
-/*-********************************************************
-* bitStream decoding
-**********************************************************/
-/*! BIT_initDStream() :
-*   Initialize a BIT_DStream_t.
-*   `bitD` : a pointer to an already allocated BIT_DStream_t structure.
-*   `srcSize` must be the *exact* size of the bitStream, in bytes.
-*   @return : size of stream (== srcSize) or an errorCode if a problem is detected
-*/
-ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize)
-{
-       if (srcSize < 1) {
-               memset(bitD, 0, sizeof(*bitD));
-               return ERROR(srcSize_wrong);
-       }
-
-       if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
-               bitD->start = (const char *)srcBuffer;
-               bitD->ptr = (const char *)srcBuffer + srcSize - sizeof(bitD->bitContainer);
-               bitD->bitContainer = ZSTD_readLEST(bitD->ptr);
-               {
-                       BYTE const lastByte = ((const BYTE *)srcBuffer)[srcSize - 1];
-                       bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
-                       if (lastByte == 0)
-                               return ERROR(GENERIC); /* endMark not present */
-               }
-       } else {
-               bitD->start = (const char *)srcBuffer;
-               bitD->ptr = bitD->start;
-               bitD->bitContainer = *(const BYTE *)(bitD->start);
-               switch (srcSize) {
-               case 7: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[6]) << (sizeof(bitD->bitContainer) * 8 - 16);
-                       fallthrough;
-               case 6: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[5]) << (sizeof(bitD->bitContainer) * 8 - 24);
-                       fallthrough;
-               case 5: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[4]) << (sizeof(bitD->bitContainer) * 8 - 32);
-                       fallthrough;
-               case 4: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[3]) << 24;
-                       fallthrough;
-               case 3: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[2]) << 16;
-                       fallthrough;
-               case 2: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[1]) << 8;
-                       fallthrough;
-               default:;
-               }
-               {
-                       BYTE const lastByte = ((const BYTE *)srcBuffer)[srcSize - 1];
-                       bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
-                       if (lastByte == 0)
-                               return ERROR(GENERIC); /* endMark not present */
-               }
-               bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize) * 8;
-       }
-
-       return srcSize;
-}
-
-ZSTD_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) { return bitContainer >> start; }
-
-ZSTD_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { return (bitContainer >> start) & BIT_mask[nbBits]; }
-
-ZSTD_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) { return bitContainer & BIT_mask[nbBits]; }
-
-/*! BIT_lookBits() :
- *  Provides next n bits from local register.
- *  local register is not modified.
- *  On 32-bits, maxNbBits==24.
- *  On 64-bits, maxNbBits==56.
- *  @return : value extracted
- */
-ZSTD_STATIC size_t BIT_lookBits(const BIT_DStream_t *bitD, U32 nbBits)
-{
-       U32 const bitMask = sizeof(bitD->bitContainer) * 8 - 1;
-       return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask - nbBits) & bitMask);
-}
-
-/*! BIT_lookBitsFast() :
-*   unsafe version; only works only if nbBits >= 1 */
-ZSTD_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t *bitD, U32 nbBits)
-{
-       U32 const bitMask = sizeof(bitD->bitContainer) * 8 - 1;
-       return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask + 1) - nbBits) & bitMask);
-}
-
-ZSTD_STATIC void BIT_skipBits(BIT_DStream_t *bitD, U32 nbBits) { bitD->bitsConsumed += nbBits; }
-
-/*! BIT_readBits() :
- *  Read (consume) next n bits from local register and update.
- *  Pay attention to not read more than nbBits contained into local register.
- *  @return : extracted value.
- */
-ZSTD_STATIC size_t BIT_readBits(BIT_DStream_t *bitD, U32 nbBits)
-{
-       size_t const value = BIT_lookBits(bitD, nbBits);
-       BIT_skipBits(bitD, nbBits);
-       return value;
-}
-
-/*! BIT_readBitsFast() :
-*   unsafe version; only works only if nbBits >= 1 */
-ZSTD_STATIC size_t BIT_readBitsFast(BIT_DStream_t *bitD, U32 nbBits)
-{
-       size_t const value = BIT_lookBitsFast(bitD, nbBits);
-       BIT_skipBits(bitD, nbBits);
-       return value;
-}
-
-/*! BIT_reloadDStream() :
-*   Refill `bitD` from buffer previously set in BIT_initDStream() .
-*   This function is safe, it guarantees it will not read beyond src buffer.
-*   @return : status of `BIT_DStream_t` internal register.
-                         if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
-ZSTD_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD)
-{
-       if (bitD->bitsConsumed > (sizeof(bitD->bitContainer) * 8)) /* should not happen => corruption detected */
-               return BIT_DStream_overflow;
-
-       if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
-               bitD->ptr -= bitD->bitsConsumed >> 3;
-               bitD->bitsConsumed &= 7;
-               bitD->bitContainer = ZSTD_readLEST(bitD->ptr);
-               return BIT_DStream_unfinished;
-       }
-       if (bitD->ptr == bitD->start) {
-               if (bitD->bitsConsumed < sizeof(bitD->bitContainer) * 8)
-                       return BIT_DStream_endOfBuffer;
-               return BIT_DStream_completed;
-       }
-       {
-               U32 nbBytes = bitD->bitsConsumed >> 3;
-               BIT_DStream_status result = BIT_DStream_unfinished;
-               if (bitD->ptr - nbBytes < bitD->start) {
-                       nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
-                       result = BIT_DStream_endOfBuffer;
-               }
-               bitD->ptr -= nbBytes;
-               bitD->bitsConsumed -= nbBytes * 8;
-               bitD->bitContainer = ZSTD_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */
-               return result;
-       }
-}
-
-/*! BIT_endOfDStream() :
-*   @return Tells if DStream has exactly reached its end (all bits consumed).
-*/
-ZSTD_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *DStream)
-{
-       return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer) * 8));
-}
-
-#endif /* BITSTREAM_H_MODULE */
diff --git a/lib/zstd/common/bitstream.h b/lib/zstd/common/bitstream.h
new file mode 100644 (file)
index 0000000..28248ab
--- /dev/null
@@ -0,0 +1,437 @@
+/* ******************************************************************
+ * bitstream
+ * Part of FSE library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+#ifndef BITSTREAM_H_MODULE
+#define BITSTREAM_H_MODULE
+
+/*
+*  This API consists of small unitary functions, which must be inlined for best performance.
+*  Since link-time-optimization is not available for all compilers,
+*  these functions are defined into a .h to be included.
+*/
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include "mem.h"            /* unaligned access routines */
+#include "compiler.h"       /* UNLIKELY() */
+#include "debug.h"          /* assert(), DEBUGLOG(), RAWLOG() */
+#include "error_private.h"  /* error codes and messages */
+
+
+/*=========================================
+*  Target specific
+=========================================*/
+
+#define STREAM_ACCUMULATOR_MIN_32  25
+#define STREAM_ACCUMULATOR_MIN_64  57
+#define STREAM_ACCUMULATOR_MIN    ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
+
+
+/*-******************************************
+*  bitStream encoding API (write forward)
+********************************************/
+/* bitStream can mix input from multiple sources.
+ * A critical property of these streams is that they encode and decode in **reverse** direction.
+ * So the first bit sequence you add will be the last to be read, like a LIFO stack.
+ */
+typedef struct {
+    size_t bitContainer;
+    unsigned bitPos;
+    char*  startPtr;
+    char*  ptr;
+    char*  endPtr;
+} BIT_CStream_t;
+
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
+MEM_STATIC void   BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+MEM_STATIC void   BIT_flushBits(BIT_CStream_t* bitC);
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
+
+/* Start with initCStream, providing the size of buffer to write into.
+*  bitStream will never write outside of this buffer.
+*  `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
+*
+*  bits are first added to a local register.
+*  Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
+*  Writing data into memory is an explicit operation, performed by the flushBits function.
+*  Hence keep track how many bits are potentially stored into local register to avoid register overflow.
+*  After a flushBits, a maximum of 7 bits might still be stored into local register.
+*
+*  Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
+*
+*  Last operation is to close the bitStream.
+*  The function returns the final size of CStream in bytes.
+*  If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
+*/
+
+
+/*-********************************************
+*  bitStream decoding API (read backward)
+**********************************************/
+typedef struct {
+    size_t   bitContainer;
+    unsigned bitsConsumed;
+    const char* ptr;
+    const char* start;
+    const char* limitPtr;
+} BIT_DStream_t;
+
+typedef enum { BIT_DStream_unfinished = 0,
+               BIT_DStream_endOfBuffer = 1,
+               BIT_DStream_completed = 2,
+               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
+               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+
+MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
+MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
+
+
+/* Start by invoking BIT_initDStream().
+*  A chunk of the bitStream is then stored into a local register.
+*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+*  You can then retrieve bitFields stored into the local register, **in reverse order**.
+*  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
+*  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
+*  Otherwise, it can be less than that, so proceed accordingly.
+*  Checking if DStream has reached its end can be performed with BIT_endOfDStream().
+*/
+
+
+/*-****************************************
+*  unsafe API
+******************************************/
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
+/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
+
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
+/* unsafe version; does not check buffer overflow */
+
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
+/* faster, but works only if nbBits >= 1 */
+
+
+
+/*-**************************************************************
+*  Internal functions
+****************************************************************/
+MEM_STATIC unsigned BIT_highbit32 (U32 val)
+{
+    assert(val != 0);
+    {
+#   if (__GNUC__ >= 3)   /* Use GCC Intrinsic */
+        return __builtin_clz (val) ^ 31;
+#   else   /* Software version */
+        static const unsigned DeBruijnClz[32] = { 0,  9,  1, 10, 13, 21,  2, 29,
+                                                 11, 14, 16, 18, 22, 25,  3, 30,
+                                                  8, 12, 20, 28, 15, 17, 24,  7,
+                                                 19, 27, 23,  6, 26,  5,  4, 31 };
+        U32 v = val;
+        v |= v >> 1;
+        v |= v >> 2;
+        v |= v >> 4;
+        v |= v >> 8;
+        v |= v >> 16;
+        return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
+#   endif
+    }
+}
+
+/*=====    Local Constants   =====*/
+static const unsigned BIT_mask[] = {
+    0,          1,         3,         7,         0xF,       0x1F,
+    0x3F,       0x7F,      0xFF,      0x1FF,     0x3FF,     0x7FF,
+    0xFFF,      0x1FFF,    0x3FFF,    0x7FFF,    0xFFFF,    0x1FFFF,
+    0x3FFFF,    0x7FFFF,   0xFFFFF,   0x1FFFFF,  0x3FFFFF,  0x7FFFFF,
+    0xFFFFFF,   0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
+    0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
+#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
+
+/*-**************************************************************
+*  bitStream encoding
+****************************************************************/
+/*! BIT_initCStream() :
+ *  `dstCapacity` must be > sizeof(size_t)
+ *  @return : 0 if success,
+ *            otherwise an error code (can be tested using ERR_isError()) */
+MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
+                                  void* startPtr, size_t dstCapacity)
+{
+    bitC->bitContainer = 0;
+    bitC->bitPos = 0;
+    bitC->startPtr = (char*)startPtr;
+    bitC->ptr = bitC->startPtr;
+    bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
+    if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
+    return 0;
+}
+
+/*! BIT_addBits() :
+ *  can add up to 31 bits into `bitC`.
+ *  Note : does not check for register overflow ! */
+MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
+                            size_t value, unsigned nbBits)
+{
+    DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32);
+    assert(nbBits < BIT_MASK_SIZE);
+    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_addBitsFast() :
+ *  works only if `value` is _clean_,
+ *  meaning all high bits above nbBits are 0 */
+MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
+                                size_t value, unsigned nbBits)
+{
+    assert((value>>nbBits) == 0);
+    assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    bitC->bitContainer |= value << bitC->bitPos;
+    bitC->bitPos += nbBits;
+}
+
+/*! BIT_flushBitsFast() :
+ *  assumption : bitContainer has not overflowed
+ *  unsafe version; does not check buffer overflow */
+MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
+{
+    size_t const nbBytes = bitC->bitPos >> 3;
+    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes*8;
+}
+
+/*! BIT_flushBits() :
+ *  assumption : bitContainer has not overflowed
+ *  safe version; check for buffer overflow, and prevents it.
+ *  note : does not signal buffer overflow.
+ *  overflow will be revealed later on using BIT_closeCStream() */
+MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
+{
+    size_t const nbBytes = bitC->bitPos >> 3;
+    assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
+    MEM_writeLEST(bitC->ptr, bitC->bitContainer);
+    bitC->ptr += nbBytes;
+    if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
+    bitC->bitPos &= 7;
+    bitC->bitContainer >>= nbBytes*8;
+}
+
+/*! BIT_closeCStream() :
+ *  @return : size of CStream, in bytes,
+ *            or 0 if it could not fit into dstBuffer */
+MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
+{
+    BIT_addBitsFast(bitC, 1, 1);   /* endMark */
+    BIT_flushBits(bitC);
+    if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
+    return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
+}
+
+
+/*-********************************************************
+*  bitStream decoding
+**********************************************************/
+/*! BIT_initDStream() :
+ *  Initialize a BIT_DStream_t.
+ * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
+ * `srcSize` must be the *exact* size of the bitStream, in bytes.
+ * @return : size of stream (== srcSize), or an errorCode if a problem is detected
+ */
+MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
+{
+    if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
+
+    bitD->start = (const char*)srcBuffer;
+    bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
+
+    if (srcSize >=  sizeof(bitD->bitContainer)) {  /* normal case */
+        bitD->ptr   = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);
+        { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+          bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;  /* ensures bitsConsumed is always set */
+          if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
+    } else {
+        bitD->ptr   = bitD->start;
+        bitD->bitContainer = *(const BYTE*)(bitD->start);
+        switch(srcSize)
+        {
+        case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
+                ZSTD_FALLTHROUGH;
+
+        case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
+                ZSTD_FALLTHROUGH;
+
+        case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
+                ZSTD_FALLTHROUGH;
+
+        case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
+                ZSTD_FALLTHROUGH;
+
+        case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
+                ZSTD_FALLTHROUGH;
+
+        case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
+                ZSTD_FALLTHROUGH;
+
+        default: break;
+        }
+        {   BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
+            bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
+            if (lastByte == 0) return ERROR(corruption_detected);  /* endMark not present */
+        }
+        bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
+    }
+
+    return srcSize;
+}
+
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
+{
+    return bitContainer >> start;
+}
+
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
+{
+    U32 const regMask = sizeof(bitContainer)*8 - 1;
+    /* if start > regMask, bitstream is corrupted, and result is undefined */
+    assert(nbBits < BIT_MASK_SIZE);
+    return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
+}
+
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
+{
+    assert(nbBits < BIT_MASK_SIZE);
+    return bitContainer & BIT_mask[nbBits];
+}
+
+/*! BIT_lookBits() :
+ *  Provides next n bits from local register.
+ *  local register is not modified.
+ *  On 32-bits, maxNbBits==24.
+ *  On 64-bits, maxNbBits==56.
+ * @return : value extracted */
+MEM_STATIC  FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
+{
+    /* arbitrate between double-shift and shift+mask */
+#if 1
+    /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
+     * bitstream is likely corrupted, and result is undefined */
+    return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
+#else
+    /* this code path is slower on my os-x laptop */
+    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+    return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
+#endif
+}
+
+/*! BIT_lookBitsFast() :
+ *  unsafe version; only works if nbBits >= 1 */
+MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
+{
+    U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
+    assert(nbBits >= 1);
+    return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
+}
+
+MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+{
+    bitD->bitsConsumed += nbBits;
+}
+
+/*! BIT_readBits() :
+ *  Read (consume) next n bits from local register and update.
+ *  Pay attention to not read more than nbBits contained into local register.
+ * @return : extracted value. */
+MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
+{
+    size_t const value = BIT_lookBits(bitD, nbBits);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BIT_readBitsFast() :
+ *  unsafe version; only works only if nbBits >= 1 */
+MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
+{
+    size_t const value = BIT_lookBitsFast(bitD, nbBits);
+    assert(nbBits >= 1);
+    BIT_skipBits(bitD, nbBits);
+    return value;
+}
+
+/*! BIT_reloadDStreamFast() :
+ *  Similar to BIT_reloadDStream(), but with two differences:
+ *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
+ *  2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this
+ *     point you must use BIT_reloadDStream() to reload.
+ */
+MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
+{
+    if (UNLIKELY(bitD->ptr < bitD->limitPtr))
+        return BIT_DStream_overflow;
+    assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
+    bitD->ptr -= bitD->bitsConsumed >> 3;
+    bitD->bitsConsumed &= 7;
+    bitD->bitContainer = MEM_readLEST(bitD->ptr);
+    return BIT_DStream_unfinished;
+}
+
+/*! BIT_reloadDStream() :
+ *  Refill `bitD` from buffer previously set in BIT_initDStream() .
+ *  This function is safe, it guarantees it will not read beyond src buffer.
+ * @return : status of `BIT_DStream_t` internal register.
+ *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
+MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+{
+    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
+        return BIT_DStream_overflow;
+
+    if (bitD->ptr >= bitD->limitPtr) {
+        return BIT_reloadDStreamFast(bitD);
+    }
+    if (bitD->ptr == bitD->start) {
+        if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
+        return BIT_DStream_completed;
+    }
+    /* start < ptr < limitPtr */
+    {   U32 nbBytes = bitD->bitsConsumed >> 3;
+        BIT_DStream_status result = BIT_DStream_unfinished;
+        if (bitD->ptr - nbBytes < bitD->start) {
+            nbBytes = (U32)(bitD->ptr - bitD->start);  /* ptr > start */
+            result = BIT_DStream_endOfBuffer;
+        }
+        bitD->ptr -= nbBytes;
+        bitD->bitsConsumed -= nbBytes*8;
+        bitD->bitContainer = MEM_readLEST(bitD->ptr);   /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
+        return result;
+    }
+}
+
+/*! BIT_endOfDStream() :
+ * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
+ */
+MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
+{
+    return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
+}
+
+
+#endif /* BITSTREAM_H_MODULE */
diff --git a/lib/zstd/common/compiler.h b/lib/zstd/common/compiler.h
new file mode 100644 (file)
index 0000000..a1a051e
--- /dev/null
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPILER_H
+#define ZSTD_COMPILER_H
+
+/*-*******************************************************
+*  Compiler specifics
+*********************************************************/
+/* force inlining */
+
+#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
+#  define INLINE_KEYWORD inline
+#else
+#  define INLINE_KEYWORD
+#endif
+
+#define FORCE_INLINE_ATTR __attribute__((always_inline))
+
+
+/*
+  On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
+  This explictly marks such functions as __cdecl so that the code will still compile
+  if a CC other than __cdecl has been made the default.
+*/
+#define WIN_CDECL
+
+/*
+ * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
+ * parameters. They must be inlined for the compiler to eliminate the constant
+ * branches.
+ */
+#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
+/*
+ * HINT_INLINE is used to help the compiler generate better code. It is *not*
+ * used for "templates", so it can be tweaked based on the compilers
+ * performance.
+ *
+ * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
+ * always_inline attribute.
+ *
+ * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
+ * attribute.
+ */
+#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
+#  define HINT_INLINE static INLINE_KEYWORD
+#else
+#  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
+#endif
+
+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+#define UNUSED_ATTR __attribute__((unused))
+
+/* force no inlining */
+#define FORCE_NOINLINE static __attribute__((__noinline__))
+
+
+/* target attribute */
+#ifndef __has_attribute
+  #define __has_attribute(x) 0  /* Compatibility with non-clang compilers. */
+#endif
+#define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
+
+/* Enable runtime BMI2 dispatch based on the CPU.
+ * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
+ */
+#ifndef DYNAMIC_BMI2
+  #if ((defined(__clang__) && __has_attribute(__target__)) \
+      || (defined(__GNUC__) \
+          && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
+      && (defined(__x86_64__) || defined(_M_X86)) \
+      && !defined(__BMI2__)
+  #  define DYNAMIC_BMI2 1
+  #else
+  #  define DYNAMIC_BMI2 0
+  #endif
+#endif
+
+/* prefetch
+ * can be disabled, by declaring NO_PREFETCH build macro */
+#if ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
+#  define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
+#  define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
+#elif defined(__aarch64__)
+#  define PREFETCH_L1(ptr)  __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
+#  define PREFETCH_L2(ptr)  __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
+#else
+#  define PREFETCH_L1(ptr) (void)(ptr)  /* disabled */
+#  define PREFETCH_L2(ptr) (void)(ptr)  /* disabled */
+#endif  /* NO_PREFETCH */
+
+#define CACHELINE_SIZE 64
+
+#define PREFETCH_AREA(p, s)  {            \
+    const char* const _ptr = (const char*)(p);  \
+    size_t const _size = (size_t)(s);     \
+    size_t _pos;                          \
+    for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) {  \
+        PREFETCH_L2(_ptr + _pos);         \
+    }                                     \
+}
+
+/* vectorization
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
+#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
+#  if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
+#    define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
+#  else
+#    define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")")
+#  endif
+#else
+#  define DONT_VECTORIZE
+#endif
+
+/* Tell the compiler that a branch is likely or unlikely.
+ * Only use these macros if it causes the compiler to generate better code.
+ * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc
+ * and clang, please do.
+ */
+#define LIKELY(x) (__builtin_expect((x), 1))
+#define UNLIKELY(x) (__builtin_expect((x), 0))
+
+/* disable warnings */
+
+/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
+
+
+/* compat. with non-clang compilers */
+#ifndef __has_builtin
+#  define __has_builtin(x) 0
+#endif
+
+/* compat. with non-clang compilers */
+#ifndef __has_feature
+#  define __has_feature(x) 0
+#endif
+
+/* C-language Attributes are added in C23. */
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
+# define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
+#else
+# define ZSTD_HAS_C_ATTRIBUTE(x) 0
+#endif
+
+/* Only use C++ attributes in C++. Some compilers report support for C++
+ * attributes when compiling with C.
+ */
+#define ZSTD_HAS_CPP_ATTRIBUTE(x) 0
+
+/* Define ZSTD_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute.
+ * - C23: https://en.cppreference.com/w/c/language/attributes/fallthrough
+ * - CPP17: https://en.cppreference.com/w/cpp/language/attributes/fallthrough
+ * - Else: __attribute__((__fallthrough__))
+ */
+#define ZSTD_FALLTHROUGH fallthrough
+
+/* detects whether we are being compiled under msan */
+
+
+/* detects whether we are being compiled under asan */
+
+
+#endif /* ZSTD_COMPILER_H */
diff --git a/lib/zstd/common/cpu.h b/lib/zstd/common/cpu.h
new file mode 100644 (file)
index 0000000..0db7b42
--- /dev/null
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMMON_CPU_H
+#define ZSTD_COMMON_CPU_H
+
+/*
+ * Implementation taken from folly/CpuId.h
+ * https://github.com/facebook/folly/blob/master/folly/CpuId.h
+ */
+
+#include "mem.h"
+
+
+typedef struct {
+    U32 f1c;
+    U32 f1d;
+    U32 f7b;
+    U32 f7c;
+} ZSTD_cpuid_t;
+
+MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
+    U32 f1c = 0;
+    U32 f1d = 0;
+    U32 f7b = 0;
+    U32 f7c = 0;
+#if defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
+    /* The following block like the normal cpuid branch below, but gcc
+     * reserves ebx for use of its pic register so we must specially
+     * handle the save and restore to avoid clobbering the register
+     */
+    U32 n;
+    __asm__(
+        "pushl %%ebx\n\t"
+        "cpuid\n\t"
+        "popl %%ebx\n\t"
+        : "=a"(n)
+        : "a"(0)
+        : "ecx", "edx");
+    if (n >= 1) {
+      U32 f1a;
+      __asm__(
+          "pushl %%ebx\n\t"
+          "cpuid\n\t"
+          "popl %%ebx\n\t"
+          : "=a"(f1a), "=c"(f1c), "=d"(f1d)
+          : "a"(1));
+    }
+    if (n >= 7) {
+      __asm__(
+          "pushl %%ebx\n\t"
+          "cpuid\n\t"
+          "movl %%ebx, %%eax\n\t"
+          "popl %%ebx"
+          : "=a"(f7b), "=c"(f7c)
+          : "a"(7), "c"(0)
+          : "edx");
+    }
+#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
+    U32 n;
+    __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
+    if (n >= 1) {
+      U32 f1a;
+      __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
+    }
+    if (n >= 7) {
+      U32 f7a;
+      __asm__("cpuid"
+              : "=a"(f7a), "=b"(f7b), "=c"(f7c)
+              : "a"(7), "c"(0)
+              : "edx");
+    }
+#endif
+    {
+        ZSTD_cpuid_t cpuid;
+        cpuid.f1c = f1c;
+        cpuid.f1d = f1d;
+        cpuid.f7b = f7b;
+        cpuid.f7c = f7c;
+        return cpuid;
+    }
+}
+
+#define X(name, r, bit)                                                        \
+  MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) {                 \
+    return ((cpuid.r) & (1U << bit)) != 0;                                     \
+  }
+
+/* cpuid(1): Processor Info and Feature Bits. */
+#define C(name, bit) X(name, f1c, bit)
+  C(sse3, 0)
+  C(pclmuldq, 1)
+  C(dtes64, 2)
+  C(monitor, 3)
+  C(dscpl, 4)
+  C(vmx, 5)
+  C(smx, 6)
+  C(eist, 7)
+  C(tm2, 8)
+  C(ssse3, 9)
+  C(cnxtid, 10)
+  C(fma, 12)
+  C(cx16, 13)
+  C(xtpr, 14)
+  C(pdcm, 15)
+  C(pcid, 17)
+  C(dca, 18)
+  C(sse41, 19)
+  C(sse42, 20)
+  C(x2apic, 21)
+  C(movbe, 22)
+  C(popcnt, 23)
+  C(tscdeadline, 24)
+  C(aes, 25)
+  C(xsave, 26)
+  C(osxsave, 27)
+  C(avx, 28)
+  C(f16c, 29)
+  C(rdrand, 30)
+#undef C
+#define D(name, bit) X(name, f1d, bit)
+  D(fpu, 0)
+  D(vme, 1)
+  D(de, 2)
+  D(pse, 3)
+  D(tsc, 4)
+  D(msr, 5)
+  D(pae, 6)
+  D(mce, 7)
+  D(cx8, 8)
+  D(apic, 9)
+  D(sep, 11)
+  D(mtrr, 12)
+  D(pge, 13)
+  D(mca, 14)
+  D(cmov, 15)
+  D(pat, 16)
+  D(pse36, 17)
+  D(psn, 18)
+  D(clfsh, 19)
+  D(ds, 21)
+  D(acpi, 22)
+  D(mmx, 23)
+  D(fxsr, 24)
+  D(sse, 25)
+  D(sse2, 26)
+  D(ss, 27)
+  D(htt, 28)
+  D(tm, 29)
+  D(pbe, 31)
+#undef D
+
+/* cpuid(7): Extended Features. */
+#define B(name, bit) X(name, f7b, bit)
+  B(bmi1, 3)
+  B(hle, 4)
+  B(avx2, 5)
+  B(smep, 7)
+  B(bmi2, 8)
+  B(erms, 9)
+  B(invpcid, 10)
+  B(rtm, 11)
+  B(mpx, 14)
+  B(avx512f, 16)
+  B(avx512dq, 17)
+  B(rdseed, 18)
+  B(adx, 19)
+  B(smap, 20)
+  B(avx512ifma, 21)
+  B(pcommit, 22)
+  B(clflushopt, 23)
+  B(clwb, 24)
+  B(avx512pf, 26)
+  B(avx512er, 27)
+  B(avx512cd, 28)
+  B(sha, 29)
+  B(avx512bw, 30)
+  B(avx512vl, 31)
+#undef B
+#define C(name, bit) X(name, f7c, bit)
+  C(prefetchwt1, 0)
+  C(avx512vbmi, 1)
+#undef C
+
+#undef X
+
+#endif /* ZSTD_COMMON_CPU_H */
diff --git a/lib/zstd/common/debug.c b/lib/zstd/common/debug.c
new file mode 100644 (file)
index 0000000..bb863c9
--- /dev/null
@@ -0,0 +1,24 @@
+/* ******************************************************************
+ * debug
+ * Part of FSE library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/*
+ * This module only hosts one global variable
+ * which can be used to dynamically influence the verbosity of traces,
+ * such as DEBUGLOG and RAWLOG
+ */
+
+#include "debug.h"
+
+int g_debuglevel = DEBUGLEVEL;
diff --git a/lib/zstd/common/debug.h b/lib/zstd/common/debug.h
new file mode 100644 (file)
index 0000000..6dd88d1
--- /dev/null
@@ -0,0 +1,101 @@
+/* ******************************************************************
+ * debug
+ * Part of FSE library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/*
+ * The purpose of this header is to enable debug functions.
+ * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time,
+ * and DEBUG_STATIC_ASSERT() for compile-time.
+ *
+ * By default, DEBUGLEVEL==0, which means run-time debug is disabled.
+ *
+ * Level 1 enables assert() only.
+ * Starting level 2, traces can be generated and pushed to stderr.
+ * The higher the level, the more verbose the traces.
+ *
+ * It's possible to dynamically adjust level using variable g_debug_level,
+ * which is only declared if DEBUGLEVEL>=2,
+ * and is a global variable, not multi-thread protected (use with care)
+ */
+
+#ifndef DEBUG_H_12987983217
+#define DEBUG_H_12987983217
+
+
+
+/* static assert is triggered at compile time, leaving no runtime artefact.
+ * static assert only works with compile-time constants.
+ * Also, this variant can only be used inside a function. */
+#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
+
+
+/* DEBUGLEVEL is expected to be defined externally,
+ * typically through compiler command line.
+ * Value must be a number. */
+#ifndef DEBUGLEVEL
+#  define DEBUGLEVEL 0
+#endif
+
+
+/* recommended values for DEBUGLEVEL :
+ * 0 : release mode, no debug, all run-time checks disabled
+ * 1 : enables assert() only, no display
+ * 2 : reserved, for currently active debug path
+ * 3 : events once per object lifetime (CCtx, CDict, etc.)
+ * 4 : events once per frame
+ * 5 : events once per block
+ * 6 : events once per sequence (verbose)
+ * 7+: events at every position (*very* verbose)
+ *
+ * It's generally inconvenient to output traces > 5.
+ * In which case, it's possible to selectively trigger high verbosity levels
+ * by modifying g_debug_level.
+ */
+
+#if (DEBUGLEVEL>=1)
+#  define ZSTD_DEPS_NEED_ASSERT
+#  include "zstd_deps.h"
+#else
+#  ifndef assert   /* assert may be already defined, due to prior #include <assert.h> */
+#    define assert(condition) ((void)0)   /* disable assert (default) */
+#  endif
+#endif
+
+#if (DEBUGLEVEL>=2)
+#  define ZSTD_DEPS_NEED_IO
+#  include "zstd_deps.h"
+extern int g_debuglevel; /* the variable is only declared,
+                            it actually lives in debug.c,
+                            and is shared by the whole process.
+                            It's not thread-safe.
+                            It's useful when enabling very verbose levels
+                            on selective conditions (such as position in src) */
+
+#  define RAWLOG(l, ...) {                                       \
+                if (l<=g_debuglevel) {                           \
+                    ZSTD_DEBUG_PRINT(__VA_ARGS__);               \
+            }   }
+#  define DEBUGLOG(l, ...) {                                     \
+                if (l<=g_debuglevel) {                           \
+                    ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
+                    ZSTD_DEBUG_PRINT(" \n");                     \
+            }   }
+#else
+#  define RAWLOG(l, ...)      {}    /* disabled */
+#  define DEBUGLOG(l, ...)    {}    /* disabled */
+#endif
+
+
+
+#endif /* DEBUG_H_12987983217 */
diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c
new file mode 100644 (file)
index 0000000..53b47a2
--- /dev/null
@@ -0,0 +1,357 @@
+/* ******************************************************************
+ * Common functions of New Generation Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* *************************************
+*  Dependencies
+***************************************/
+#include "mem.h"
+#include "error_private.h"       /* ERR_*, ERROR */
+#define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
+#include "fse.h"
+#define HUF_STATIC_LINKING_ONLY  /* HUF_TABLELOG_ABSOLUTEMAX */
+#include "huf.h"
+
+
+/*===   Version   ===*/
+unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
+
+
+/*===   Error Management   ===*/
+unsigned FSE_isError(size_t code) { return ERR_isError(code); }
+const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+unsigned HUF_isError(size_t code) { return ERR_isError(code); }
+const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+
+/*-**************************************************************
+*  FSE NCount encoding-decoding
+****************************************************************/
+static U32 FSE_ctz(U32 val)
+{
+    assert(val != 0);
+    {
+#   if (__GNUC__ >= 3)   /* GCC Intrinsic */
+        return __builtin_ctz(val);
+#   else   /* Software version */
+        U32 count = 0;
+        while ((val & 1) == 0) {
+            val >>= 1;
+            ++count;
+        }
+        return count;
+#   endif
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+                           const void* headerBuffer, size_t hbSize)
+{
+    const BYTE* const istart = (const BYTE*) headerBuffer;
+    const BYTE* const iend = istart + hbSize;
+    const BYTE* ip = istart;
+    int nbBits;
+    int remaining;
+    int threshold;
+    U32 bitStream;
+    int bitCount;
+    unsigned charnum = 0;
+    unsigned const maxSV1 = *maxSVPtr + 1;
+    int previous0 = 0;
+
+    if (hbSize < 8) {
+        /* This function only works when hbSize >= 8 */
+        char buffer[8] = {0};
+        ZSTD_memcpy(buffer, headerBuffer, hbSize);
+        {   size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
+                                                    buffer, sizeof(buffer));
+            if (FSE_isError(countSize)) return countSize;
+            if (countSize > hbSize) return ERROR(corruption_detected);
+            return countSize;
+    }   }
+    assert(hbSize >= 8);
+
+    /* init */
+    ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0]));   /* all symbols not present in NCount have a frequency of 0 */
+    bitStream = MEM_readLE32(ip);
+    nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG;   /* extract tableLog */
+    if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
+    bitStream >>= 4;
+    bitCount = 4;
+    *tableLogPtr = nbBits;
+    remaining = (1<<nbBits)+1;
+    threshold = 1<<nbBits;
+    nbBits++;
+
+    for (;;) {
+        if (previous0) {
+            /* Count the number of repeats. Each time the
+             * 2-bit repeat code is 0b11 there is another
+             * repeat.
+             * Avoid UB by setting the high bit to 1.
+             */
+            int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
+            while (repeats >= 12) {
+                charnum += 3 * 12;
+                if (LIKELY(ip <= iend-7)) {
+                    ip += 3;
+                } else {
+                    bitCount -= (int)(8 * (iend - 7 - ip));
+                    bitCount &= 31;
+                    ip = iend - 4;
+                }
+                bitStream = MEM_readLE32(ip) >> bitCount;
+                repeats = FSE_ctz(~bitStream | 0x80000000) >> 1;
+            }
+            charnum += 3 * repeats;
+            bitStream >>= 2 * repeats;
+            bitCount += 2 * repeats;
+
+            /* Add the final repeat which isn't 0b11. */
+            assert((bitStream & 3) < 3);
+            charnum += bitStream & 3;
+            bitCount += 2;
+
+            /* This is an error, but break and return an error
+             * at the end, because returning out of a loop makes
+             * it harder for the compiler to optimize.
+             */
+            if (charnum >= maxSV1) break;
+
+            /* We don't need to set the normalized count to 0
+             * because we already memset the whole buffer to 0.
+             */
+
+            if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                assert((bitCount >> 3) <= 3); /* For first condition to work */
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                bitCount &= 31;
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> bitCount;
+        }
+        {
+            int const max = (2*threshold-1) - remaining;
+            int count;
+
+            if ((bitStream & (threshold-1)) < (U32)max) {
+                count = bitStream & (threshold-1);
+                bitCount += nbBits-1;
+            } else {
+                count = bitStream & (2*threshold-1);
+                if (count >= threshold) count -= max;
+                bitCount += nbBits;
+            }
+
+            count--;   /* extra accuracy */
+            /* When it matters (small blocks), this is a
+             * predictable branch, because we don't use -1.
+             */
+            if (count >= 0) {
+                remaining -= count;
+            } else {
+                assert(count == -1);
+                remaining += count;
+            }
+            normalizedCounter[charnum++] = (short)count;
+            previous0 = !count;
+
+            assert(threshold > 1);
+            if (remaining < threshold) {
+                /* This branch can be folded into the
+                 * threshold update condition because we
+                 * know that threshold > 1.
+                 */
+                if (remaining <= 1) break;
+                nbBits = BIT_highbit32(remaining) + 1;
+                threshold = 1 << (nbBits - 1);
+            }
+            if (charnum >= maxSV1) break;
+
+            if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
+                ip += bitCount>>3;
+                bitCount &= 7;
+            } else {
+                bitCount -= (int)(8 * (iend - 4 - ip));
+                bitCount &= 31;
+                ip = iend - 4;
+            }
+            bitStream = MEM_readLE32(ip) >> bitCount;
+    }   }
+    if (remaining != 1) return ERROR(corruption_detected);
+    /* Only possible when there are too many zeros. */
+    if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall);
+    if (bitCount > 32) return ERROR(corruption_detected);
+    *maxSVPtr = charnum-1;
+
+    ip += (bitCount+7)>>3;
+    return ip-istart;
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t FSE_readNCount_body_default(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+#endif
+
+size_t FSE_readNCount_bmi2(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+    }
+#endif
+    (void)bmi2;
+    return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize);
+}
+
+size_t FSE_readNCount(
+        short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
+        const void* headerBuffer, size_t hbSize)
+{
+    return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
+}
+
+
+/*! HUF_readStats() :
+    Read compact Huffman tree, saved by HUF_writeCTable().
+    `huffWeight` is destination buffer.
+    `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
+    @return : size read from `src` , or an error Code .
+    Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
+*/
+size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize)
+{
+    U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+    return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                   U32* nbSymbolsPtr, U32* tableLogPtr,
+                   const void* src, size_t srcSize,
+                   void* workSpace, size_t wkspSize,
+                   int bmi2)
+{
+    U32 weightTotal;
+    const BYTE* ip = (const BYTE*) src;
+    size_t iSize;
+    size_t oSize;
+
+    if (!srcSize) return ERROR(srcSize_wrong);
+    iSize = ip[0];
+    /* ZSTD_memset(huffWeight, 0, hwSize);   *//* is not necessary, even though some analyzer complain ... */
+
+    if (iSize >= 128) {  /* special header */
+        oSize = iSize - 127;
+        iSize = ((oSize+1)/2);
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        if (oSize >= hwSize) return ERROR(corruption_detected);
+        ip += 1;
+        {   U32 n;
+            for (n=0; n<oSize; n+=2) {
+                huffWeight[n]   = ip[n/2] >> 4;
+                huffWeight[n+1] = ip[n/2] & 15;
+    }   }   }
+    else  {   /* header compressed with FSE (normal case) */
+        if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
+        /* max (hwSize-1) values decoded, as last one is implied */
+        oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2);
+        if (FSE_isError(oSize)) return oSize;
+    }
+
+    /* collect weight stats */
+    ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
+    weightTotal = 0;
+    {   U32 n; for (n=0; n<oSize; n++) {
+            if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
+            rankStats[huffWeight[n]]++;
+            weightTotal += (1 << huffWeight[n]) >> 1;
+    }   }
+    if (weightTotal == 0) return ERROR(corruption_detected);
+
+    /* get last non-null symbol weight (implied, total must be 2^n) */
+    {   U32 const tableLog = BIT_highbit32(weightTotal) + 1;
+        if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
+        *tableLogPtr = tableLog;
+        /* determine last weight */
+        {   U32 const total = 1 << tableLog;
+            U32 const rest = total - weightTotal;
+            U32 const verif = 1 << BIT_highbit32(rest);
+            U32 const lastWeight = BIT_highbit32(rest) + 1;
+            if (verif != rest) return ERROR(corruption_detected);    /* last value must be a clean power of 2 */
+            huffWeight[oSize] = (BYTE)lastWeight;
+            rankStats[lastWeight]++;
+    }   }
+
+    /* check tree construction validity */
+    if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected);   /* by construction : at least 2 elts of rank 1, must be even */
+
+    /* results */
+    *nbSymbolsPtr = (U32)(oSize+1);
+    return iSize+1;
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize)
+{
+    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0);
+}
+
+#if DYNAMIC_BMI2
+static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize)
+{
+    return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1);
+}
+#endif
+
+size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+                     U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize,
+                     void* workSpace, size_t wkspSize,
+                     int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
+    }
+#endif
+    (void)bmi2;
+    return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
+}
diff --git a/lib/zstd/common/error_private.c b/lib/zstd/common/error_private.c
new file mode 100644 (file)
index 0000000..6d1135f
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* The purpose of this file is to have a single list of error strings embedded in binary */
+
+#include "error_private.h"
+
+const char* ERR_getErrorString(ERR_enum code)
+{
+#ifdef ZSTD_STRIP_ERROR_STRINGS
+    (void)code;
+    return "Error strings stripped";
+#else
+    static const char* const notErrorCode = "Unspecified error code";
+    switch( code )
+    {
+    case PREFIX(no_error): return "No error detected";
+    case PREFIX(GENERIC):  return "Error (generic)";
+    case PREFIX(prefix_unknown): return "Unknown frame descriptor";
+    case PREFIX(version_unsupported): return "Version not supported";
+    case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
+    case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
+    case PREFIX(corruption_detected): return "Corrupted block detected";
+    case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
+    case PREFIX(parameter_unsupported): return "Unsupported parameter";
+    case PREFIX(parameter_outOfBound): return "Parameter is out of bound";
+    case PREFIX(init_missing): return "Context should be init first";
+    case PREFIX(memory_allocation): return "Allocation error : not enough memory";
+    case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough";
+    case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
+    case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
+    case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
+    case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
+    case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
+    case PREFIX(dictionary_wrong): return "Dictionary mismatch";
+    case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
+    case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
+    case PREFIX(srcSize_wrong): return "Src size is incorrect";
+    case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer";
+        /* following error codes are not stable and may be removed or changed in a future version */
+    case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
+    case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
+    case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong";
+    case PREFIX(srcBuffer_wrong): return "Source buffer is wrong";
+    case PREFIX(maxCode):
+    default: return notErrorCode;
+    }
+#endif
+}
diff --git a/lib/zstd/common/error_private.h b/lib/zstd/common/error_private.h
new file mode 100644 (file)
index 0000000..d14e686
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* Note : this module is expected to remain private, do not expose it */
+
+#ifndef ERROR_H_MODULE
+#define ERROR_H_MODULE
+
+
+
+/* ****************************************
+*  Dependencies
+******************************************/
+#include "zstd_deps.h"    /* size_t */
+#include <linux/zstd_errors.h>  /* enum list */
+
+
+/* ****************************************
+*  Compiler-specific
+******************************************/
+#define ERR_STATIC static __attribute__((unused))
+
+
+/*-****************************************
+*  Customization (error_public.h)
+******************************************/
+typedef ZSTD_ErrorCode ERR_enum;
+#define PREFIX(name) ZSTD_error_##name
+
+
+/*-****************************************
+*  Error codes handling
+******************************************/
+#undef ERROR   /* already defined on Visual Studio */
+#define ERROR(name) ZSTD_ERROR(name)
+#define ZSTD_ERROR(name) ((size_t)-PREFIX(name))
+
+ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
+
+ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
+
+/* check and forward error code */
+#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
+#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
+
+
+/*-****************************************
+*  Error Strings
+******************************************/
+
+const char* ERR_getErrorString(ERR_enum code);   /* error_private.c */
+
+ERR_STATIC const char* ERR_getErrorName(size_t code)
+{
+    return ERR_getErrorString(ERR_getErrorCode(code));
+}
+
+
+#endif /* ERROR_H_MODULE */
diff --git a/lib/zstd/common/fse.h b/lib/zstd/common/fse.h
new file mode 100644 (file)
index 0000000..0bb174c
--- /dev/null
@@ -0,0 +1,710 @@
+/* ******************************************************************
+ * FSE : Finite State Entropy codec
+ * Public Prototypes declaration
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+#ifndef FSE_H
+#define FSE_H
+
+
+/*-*****************************************
+*  Dependencies
+******************************************/
+#include "zstd_deps.h"    /* size_t, ptrdiff_t */
+
+
+/*-*****************************************
+*  FSE_PUBLIC_API : control library symbols visibility
+******************************************/
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+#  define FSE_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
+#  define FSE_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+#  define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#  define FSE_PUBLIC_API
+#endif
+
+/*------   Version   ------*/
+#define FSE_VERSION_MAJOR    0
+#define FSE_VERSION_MINOR    9
+#define FSE_VERSION_RELEASE  0
+
+#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
+#define FSE_QUOTE(str) #str
+#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
+#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
+
+#define FSE_VERSION_NUMBER  (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
+FSE_PUBLIC_API unsigned FSE_versionNumber(void);   /*< library version number; to be used when checking dll version */
+
+
+/*-****************************************
+*  FSE simple functions
+******************************************/
+/*! FSE_compress() :
+    Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
+    'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
+    @return : size of compressed data (<= dstCapacity).
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
+                     if FSE_isError(return), compression failed (more details using FSE_getErrorName())
+*/
+FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+/*! FSE_decompress():
+    Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
+    into already allocated destination buffer 'dst', of size 'dstCapacity'.
+    @return : size of regenerated data (<= maxDstSize),
+              or an error code, which can be tested using FSE_isError() .
+
+    ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
+    Why ? : making this distinction requires a header.
+    Header management is intentionally delegated to the user layer, which can better manage special cases.
+*/
+FSE_PUBLIC_API size_t FSE_decompress(void* dst,  size_t dstCapacity,
+                               const void* cSrc, size_t cSrcSize);
+
+
+/*-*****************************************
+*  Tool functions
+******************************************/
+FSE_PUBLIC_API size_t FSE_compressBound(size_t size);       /* maximum compressed size */
+
+/* Error Management */
+FSE_PUBLIC_API unsigned    FSE_isError(size_t code);        /* tells if a return value is an error code */
+FSE_PUBLIC_API const char* FSE_getErrorName(size_t code);   /* provides error code string (useful for debugging) */
+
+
+/*-*****************************************
+*  FSE advanced functions
+******************************************/
+/*! FSE_compress2() :
+    Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
+    Both parameters can be defined as '0' to mean : use default value
+    @return : size of compressed data
+    Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
+                     if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
+                     if FSE_isError(return), it's an error code.
+*/
+FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+
+
+/*-*****************************************
+*  FSE detailed API
+******************************************/
+/*!
+FSE_compress() does the following:
+1. count symbol occurrence from source[] into table count[] (see hist.h)
+2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
+3. save normalized counters to memory buffer using writeNCount()
+4. build encoding table 'CTable' from normalized counters
+5. encode the data stream using encoding table 'CTable'
+
+FSE_decompress() does the following:
+1. read normalized counters with readNCount()
+2. build decoding table 'DTable' from normalized counters
+3. decode the data stream using decoding table 'DTable'
+
+The following API allows targeting specific sub-functions for advanced tasks.
+For example, it's possible to compress several blocks using the same 'CTable',
+or to save and provide normalized distribution using external method.
+*/
+
+/* *** COMPRESSION *** */
+
+/*! FSE_optimalTableLog():
+    dynamically downsize 'tableLog' when conditions are met.
+    It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
+    @return : recommended tableLog (necessarily <= 'maxTableLog') */
+FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+
+/*! FSE_normalizeCount():
+    normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
+    'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
+    useLowProbCount is a boolean parameter which trades off compressed size for
+    faster header decoding. When it is set to 1, the compressed data will be slightly
+    smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be
+    faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0
+    is a good default, since header deserialization makes a big speed difference.
+    Otherwise, useLowProbCount=1 is a good default, since the speed difference is small.
+    @return : tableLog,
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
+                    const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount);
+
+/*! FSE_NCountWriteBound():
+    Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
+    Typically useful for allocation purpose. */
+FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_writeNCount():
+    Compactly save 'normalizedCounter' into 'buffer'.
+    @return : size of the compressed table,
+              or an errorCode, which can be tested using FSE_isError(). */
+FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
+                                 const short* normalizedCounter,
+                                 unsigned maxSymbolValue, unsigned tableLog);
+
+/*! Constructor and Destructor of FSE_CTable.
+    Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
+typedef unsigned FSE_CTable;   /* don't allocate that. It's only meant to be more restrictive than void* */
+FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
+FSE_PUBLIC_API void        FSE_freeCTable (FSE_CTable* ct);
+
+/*! FSE_buildCTable():
+    Builds `ct`, which must be already allocated, using FSE_createCTable().
+    @return : 0, or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_compress_usingCTable():
+    Compress `src` using `ct` into `dst` which must be already allocated.
+    @return : size of compressed data (<= `dstCapacity`),
+              or 0 if compressed data could not fit into `dst`,
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
+
+/*!
+Tutorial :
+----------
+The first step is to count all symbols. FSE_count() does this job very fast.
+Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
+'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
+maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
+FSE_count() will return the number of occurrence of the most frequent symbol.
+This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+The next step is to normalize the frequencies.
+FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
+It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
+You can use 'tableLog'==0 to mean "use default tableLog value".
+If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
+which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
+
+The result of FSE_normalizeCount() will be saved into a table,
+called 'normalizedCounter', which is a table of signed short.
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
+The return value is tableLog if everything proceeded as expected.
+It is 0 if there is a single symbol within distribution.
+If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
+
+'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
+'buffer' must be already allocated.
+For guaranteed success, buffer size must be at least FSE_headerBound().
+The result of the function is the number of bytes written into 'buffer'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
+
+'normalizedCounter' can then be used to create the compression table 'CTable'.
+The space required by 'CTable' must be already allocated, using FSE_createCTable().
+You can then use FSE_buildCTable() to fill 'CTable'.
+If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
+
+'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
+Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
+The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
+If it returns '0', compressed data could not fit into 'dst'.
+If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
+*/
+
+
+/* *** DECOMPRESSION *** */
+
+/*! FSE_readNCount():
+    Read compactly saved 'normalizedCounter' from 'rBuffer'.
+    @return : size read from 'rBuffer',
+              or an errorCode, which can be tested using FSE_isError().
+              maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
+FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
+                           unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
+                           const void* rBuffer, size_t rBuffSize);
+
+/*! FSE_readNCount_bmi2():
+ * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise.
+ */
+FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter,
+                           unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
+                           const void* rBuffer, size_t rBuffSize, int bmi2);
+
+/*! Constructor and Destructor of FSE_DTable.
+    Note that its size depends on 'tableLog' */
+typedef unsigned FSE_DTable;   /* don't allocate that. It's just a way to be more restrictive than void* */
+FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
+FSE_PUBLIC_API void        FSE_freeDTable(FSE_DTable* dt);
+
+/*! FSE_buildDTable():
+    Builds 'dt', which must be already allocated, using FSE_createDTable().
+    return : 0, or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
+
+/*! FSE_decompress_usingDTable():
+    Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
+    into `dst` which must be already allocated.
+    @return : size of regenerated data (necessarily <= `dstCapacity`),
+              or an errorCode, which can be tested using FSE_isError() */
+FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
+
+/*!
+Tutorial :
+----------
+(Note : these functions only decompress FSE-compressed blocks.
+ If block is uncompressed, use memcpy() instead
+ If block is a single repeated byte, use memset() instead )
+
+The first step is to obtain the normalized frequencies of symbols.
+This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
+'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
+In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
+or size the table to handle worst case situations (typically 256).
+FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
+The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
+Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
+This is performed by the function FSE_buildDTable().
+The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
+If there is an error, the function will return an error code, which can be tested using FSE_isError().
+
+`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
+`cSrcSize` must be strictly correct, otherwise decompression will fail.
+FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
+If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
+*/
+
+#endif  /* FSE_H */
+
+#if !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
+#define FSE_H_FSE_STATIC_LINKING_ONLY
+
+/* *** Dependency *** */
+#include "bitstream.h"
+
+
+/* *****************************************
+*  Static allocation
+*******************************************/
+/* FSE buffer bounds */
+#define FSE_NCOUNTBOUND 512
+#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */)
+#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
+#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue)   (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2))
+#define FSE_DTABLE_SIZE_U32(maxTableLog)                   (1 + (1<<(maxTableLog)))
+
+/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
+#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue)   (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
+#define FSE_DTABLE_SIZE(maxTableLog)                   (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
+
+
+/* *****************************************
+ *  FSE advanced API
+ ***************************************** */
+
+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
+/*< same as FSE_optimalTableLog(), which used `minus==2` */
+
+/* FSE_compress_wksp() :
+ * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
+ * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
+ */
+#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue)   ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
+size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
+size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
+/*< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
+
+size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
+/*< build a fake FSE_CTable, designed to compress always the same symbolValue */
+
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
+ */
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2)))
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
+size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+
+#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8)
+#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned))
+FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
+/*< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
+/*< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
+
+size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
+/*< build a fake FSE_DTable, designed to always generate the same symbolValue */
+
+#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1)
+#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned))
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize);
+/*< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */
+
+size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2);
+/*< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */
+
+typedef enum {
+   FSE_repeat_none,  /*< Cannot use the previous table */
+   FSE_repeat_check, /*< Can use the previous table but it must be checked */
+   FSE_repeat_valid  /*< Can use the previous table and it is assumed to be valid */
+ } FSE_repeat;
+
+/* *****************************************
+*  FSE symbol compression API
+*******************************************/
+/*!
+   This API consists of small unitary functions, which highly benefit from being inlined.
+   Hence their body are included in next section.
+*/
+typedef struct {
+    ptrdiff_t   value;
+    const void* stateTable;
+    const void* symbolTT;
+    unsigned    stateLog;
+} FSE_CState_t;
+
+static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
+
+static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
+
+static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
+
+/*<
+These functions are inner components of FSE_compress_usingCTable().
+They allow the creation of custom streams, mixing multiple tables and bit sources.
+
+A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
+So the first symbol you will encode is the last you will decode, like a LIFO stack.
+
+You will need a few variables to track your CStream. They are :
+
+FSE_CTable    ct;         // Provided by FSE_buildCTable()
+BIT_CStream_t bitStream;  // bitStream tracking structure
+FSE_CState_t  state;      // State tracking structure (can have several)
+
+
+The first thing to do is to init bitStream and state.
+    size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
+    FSE_initCState(&state, ct);
+
+Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
+You can then encode your input data, byte after byte.
+FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
+Remember decoding will be done in reverse direction.
+    FSE_encodeByte(&bitStream, &state, symbol);
+
+At any time, you can also add any bit sequence.
+Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
+    BIT_addBits(&bitStream, bitField, nbBits);
+
+The above methods don't commit data to memory, they just store it into local register, for speed.
+Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+Writing data to memory is a manual operation, performed by the flushBits function.
+    BIT_flushBits(&bitStream);
+
+Your last FSE encoding operation shall be to flush your last state value(s).
+    FSE_flushState(&bitStream, &state);
+
+Finally, you must close the bitStream.
+The function returns the size of CStream in bytes.
+If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
+If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
+    size_t size = BIT_closeCStream(&bitStream);
+*/
+
+
+/* *****************************************
+*  FSE symbol decompression API
+*******************************************/
+typedef struct {
+    size_t      state;
+    const void* table;   /* precise table may vary, depending on U16 */
+} FSE_DState_t;
+
+
+static void     FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
+
+static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+
+static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
+
+/*<
+Let's now decompose FSE_decompress_usingDTable() into its unitary components.
+You will decode FSE-encoded symbols from the bitStream,
+and also any other bitFields you put in, **in reverse order**.
+
+You will need a few variables to track your bitStream. They are :
+
+BIT_DStream_t DStream;    // Stream context
+FSE_DState_t  DState;     // State context. Multiple ones are possible
+FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
+
+The first thing to do is to init the bitStream.
+    errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
+
+You should then retrieve your initial state(s)
+(in reverse flushing order if you have several ones) :
+    errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
+
+You can then decode your data, symbol after symbol.
+For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
+Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
+    unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
+
+You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
+Note : maximum allowed nbBits is 25, for 32-bits compatibility
+    size_t bitField = BIT_readBits(&DStream, nbBits);
+
+All above operations only read from local register (which size depends on size_t).
+Refueling the register from memory is manually performed by the reload method.
+    endSignal = FSE_reloadDStream(&DStream);
+
+BIT_reloadDStream() result tells if there is still some more data to read from DStream.
+BIT_DStream_unfinished : there is still some data left into the DStream.
+BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
+BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
+BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
+
+When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
+to properly detect the exact end of stream.
+After each decoded symbol, check if DStream is fully consumed using this simple test :
+    BIT_reloadDStream(&DStream) >= BIT_DStream_completed
+
+When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
+Checking if DStream has reached its end is performed by :
+    BIT_endOfDStream(&DStream);
+Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
+    FSE_endOfDState(&DState);
+*/
+
+
+/* *****************************************
+*  FSE unsafe API
+*******************************************/
+static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
+/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
+
+
+/* *****************************************
+*  Implementation of inlined functions
+*******************************************/
+typedef struct {
+    int deltaFindState;
+    U32 deltaNbBits;
+} FSE_symbolCompressionTransform; /* total 8 bytes */
+
+MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
+{
+    const void* ptr = ct;
+    const U16* u16ptr = (const U16*) ptr;
+    const U32 tableLog = MEM_read16(ptr);
+    statePtr->value = (ptrdiff_t)1<<tableLog;
+    statePtr->stateTable = u16ptr+2;
+    statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
+    statePtr->stateLog = tableLog;
+}
+
+
+/*! FSE_initCState2() :
+*   Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
+*   uses the smallest state value possible, saving the cost of this symbol */
+MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol)
+{
+    FSE_initCState(statePtr, ct);
+    {   const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+        const U16* stateTable = (const U16*)(statePtr->stateTable);
+        U32 nbBitsOut  = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16);
+        statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
+        statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+    }
+}
+
+MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol)
+{
+    FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
+    const U16* const stateTable = (const U16*)(statePtr->stateTable);
+    U32 const nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
+    BIT_addBits(bitC, statePtr->value, nbBitsOut);
+    statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
+}
+
+MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
+{
+    BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
+    BIT_flushBits(bitC);
+}
+
+
+/* FSE_getMaxNbBits() :
+ * Approximate maximum cost of a symbol, in bits.
+ * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
+ * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
+{
+    const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
+    return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
+}
+
+/* FSE_bitCost() :
+ * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
+ * note 1 : assume symbolValue is valid (<= maxSymbolValue)
+ * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
+MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
+{
+    const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
+    U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
+    U32 const threshold = (minNbBits+1) << 16;
+    assert(tableLog < 16);
+    assert(accuracyLog < 31-tableLog);  /* ensure enough room for renormalization double shift */
+    {   U32 const tableSize = 1 << tableLog;
+        U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
+        U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog;   /* linear interpolation (very approximate) */
+        U32 const bitMultiplier = 1 << accuracyLog;
+        assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
+        assert(normalizedDeltaFromThreshold <= bitMultiplier);
+        return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
+    }
+}
+
+
+/* ======    Decompression    ====== */
+
+typedef struct {
+    U16 tableLog;
+    U16 fastMode;
+} FSE_DTableHeader;   /* sizeof U32 */
+
+typedef struct
+{
+    unsigned short newState;
+    unsigned char  symbol;
+    unsigned char  nbBits;
+} FSE_decode_t;   /* size == U32 */
+
+MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
+    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    return DInfo.symbol;
+}
+
+MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.newState + lowBits;
+}
+
+MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+/*! FSE_decodeSymbolFast() :
+    unsafe, only works if no symbol has a probability > 50% */
+MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
+{
+    FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    BYTE const symbol = DInfo.symbol;
+    size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
+
+    DStatePtr->state = DInfo.newState + lowBits;
+    return symbol;
+}
+
+MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
+{
+    return DStatePtr->state == 0;
+}
+
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/* **************************************************************
+*  Tuning parameters
+****************************************************************/
+/*!MEMORY_USAGE :
+*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+*  Increasing memory usage improves compression ratio
+*  Reduced memory usage can improve speed, due to cache effect
+*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+#ifndef FSE_MAX_MEMORY_USAGE
+#  define FSE_MAX_MEMORY_USAGE 14
+#endif
+#ifndef FSE_DEFAULT_MEMORY_USAGE
+#  define FSE_DEFAULT_MEMORY_USAGE 13
+#endif
+#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE)
+#  error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE"
+#endif
+
+/*!FSE_MAX_SYMBOL_VALUE :
+*  Maximum symbol value authorized.
+*  Required for proper stack allocation */
+#ifndef FSE_MAX_SYMBOL_VALUE
+#  define FSE_MAX_SYMBOL_VALUE 255
+#endif
+
+/* **************************************************************
+*  template functions type & suffix
+****************************************************************/
+#define FSE_FUNCTION_TYPE BYTE
+#define FSE_FUNCTION_EXTENSION
+#define FSE_DECODE_TYPE FSE_decode_t
+
+
+#endif   /* !FSE_COMMONDEFS_ONLY */
+
+
+/* ***************************************************************
+*  Constants
+*****************************************************************/
+#define FSE_MAX_TABLELOG  (FSE_MAX_MEMORY_USAGE-2)
+#define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
+#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
+#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
+#define FSE_MIN_TABLELOG 5
+
+#define FSE_TABLELOG_ABSOLUTE_MAX 15
+#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
+#  error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
+#endif
+
+#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3)
+
+
+#endif /* FSE_STATIC_LINKING_ONLY */
+
+
diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c
new file mode 100644 (file)
index 0000000..2c8bbe3
--- /dev/null
@@ -0,0 +1,390 @@
+/* ******************************************************************
+ * FSE : Finite State Entropy decoder
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include "debug.h"      /* assert */
+#include "bitstream.h"
+#include "compiler.h"
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+#include "error_private.h"
+#define ZSTD_DEPS_NEED_MALLOC
+#include "zstd_deps.h"
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_isError ERR_isError
+#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+FSE_DTable* FSE_createDTable (unsigned tableLog)
+{
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
+}
+
+void FSE_freeDTable (FSE_DTable* dt)
+{
+    ZSTD_free(dt);
+}
+
+static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+{
+    void* const tdPtr = dt+1;   /* because *dt is unsigned, 32-bits aligned on 32-bits */
+    FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
+    U16* symbolNext = (U16*)workSpace;
+    BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1);
+
+    U32 const maxSV1 = maxSymbolValue + 1;
+    U32 const tableSize = 1 << tableLog;
+    U32 highThreshold = tableSize-1;
+
+    /* Sanity Checks */
+    if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge);
+    if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
+
+    /* Init, lay down lowprob symbols */
+    {   FSE_DTableHeader DTableH;
+        DTableH.tableLog = (U16)tableLog;
+        DTableH.fastMode = 1;
+        {   S16 const largeLimit= (S16)(1 << (tableLog-1));
+            U32 s;
+            for (s=0; s<maxSV1; s++) {
+                if (normalizedCounter[s]==-1) {
+                    tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
+                    symbolNext[s] = 1;
+                } else {
+                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+                    symbolNext[s] = normalizedCounter[s];
+        }   }   }
+        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
+    }
+
+    /* Spread symbols */
+    if (highThreshold == tableSize - 1) {
+        size_t const tableMask = tableSize-1;
+        size_t const step = FSE_TABLESTEP(tableSize);
+        /* First lay down the symbols in order.
+         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
+         * misses since small blocks generally have small table logs, so nearly
+         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
+         * our buffer to handle the over-write.
+         */
+        {
+            U64 const add = 0x0101010101010101ull;
+            size_t pos = 0;
+            U64 sv = 0;
+            U32 s;
+            for (s=0; s<maxSV1; ++s, sv += add) {
+                int i;
+                int const n = normalizedCounter[s];
+                MEM_write64(spread + pos, sv);
+                for (i = 8; i < n; i += 8) {
+                    MEM_write64(spread + pos + i, sv);
+                }
+                pos += n;
+            }
+        }
+        /* Now we spread those positions across the table.
+         * The benefit of doing it in two stages is that we avoid the the
+         * variable size inner loop, which caused lots of branch misses.
+         * Now we can run through all the positions without any branch misses.
+         * We unroll the loop twice, since that is what emperically worked best.
+         */
+        {
+            size_t position = 0;
+            size_t s;
+            size_t const unroll = 2;
+            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+            for (s = 0; s < (size_t)tableSize; s += unroll) {
+                size_t u;
+                for (u = 0; u < unroll; ++u) {
+                    size_t const uPosition = (position + (u * step)) & tableMask;
+                    tableDecode[uPosition].symbol = spread[s + u];
+                }
+                position = (position + (unroll * step)) & tableMask;
+            }
+            assert(position == 0);
+        }
+    } else {
+        U32 const tableMask = tableSize-1;
+        U32 const step = FSE_TABLESTEP(tableSize);
+        U32 s, position = 0;
+        for (s=0; s<maxSV1; s++) {
+            int i;
+            for (i=0; i<normalizedCounter[s]; i++) {
+                tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }   }
+        if (position!=0) return ERROR(GENERIC);   /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
+
+    /* Build Decoding table */
+    {   U32 u;
+        for (u=0; u<tableSize; u++) {
+            FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
+            U32 const nextState = symbolNext[symbol]++;
+            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
+            tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+    }   }
+
+    return 0;
+}
+
+size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize);
+}
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+/*-*******************************************************
+*  Decompression (Byte symbols)
+*********************************************************/
+size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->newState = 0;
+    cell->symbol = symbolValue;
+    cell->nbBits = 0;
+
+    return 0;
+}
+
+
+size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
+{
+    void* ptr = dt;
+    FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
+    void* dPtr = dt + 1;
+    FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSV1 = tableMask+1;
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);         /* min size */
+
+    /* Build Decoding Table */
+    DTableH->tableLog = (U16)nbBits;
+    DTableH->fastMode = 1;
+    for (s=0; s<maxSV1; s++) {
+        dinfo[s].newState = 0;
+        dinfo[s].symbol = (BYTE)s;
+        dinfo[s].nbBits = (BYTE)nbBits;
+    }
+
+    return 0;
+}
+
+FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
+          void* dst, size_t maxDstSize,
+    const void* cSrc, size_t cSrcSize,
+    const FSE_DTable* dt, const unsigned fast)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const omax = op + maxDstSize;
+    BYTE* const olimit = omax-3;
+
+    BIT_DStream_t bitD;
+    FSE_DState_t state1;
+    FSE_DState_t state2;
+
+    /* Init */
+    CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
+
+    FSE_initDState(&state1, &bitD, dt);
+    FSE_initDState(&state2, &bitD, dt);
+
+#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
+
+    /* 4 symbols per loop */
+    for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) {
+        op[0] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[1] = FSE_GETSYMBOL(&state2);
+
+        if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
+
+        op[2] = FSE_GETSYMBOL(&state1);
+
+        if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8)    /* This test must be static */
+            BIT_reloadDStream(&bitD);
+
+        op[3] = FSE_GETSYMBOL(&state2);
+    }
+
+    /* tail */
+    /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
+    while (1) {
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+        *op++ = FSE_GETSYMBOL(&state1);
+        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+            *op++ = FSE_GETSYMBOL(&state2);
+            break;
+        }
+
+        if (op>(omax-2)) return ERROR(dstSize_tooSmall);
+        *op++ = FSE_GETSYMBOL(&state2);
+        if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
+            *op++ = FSE_GETSYMBOL(&state1);
+            break;
+    }   }
+
+    return op-ostart;
+}
+
+
+size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
+                            const void* cSrc, size_t cSrcSize,
+                            const FSE_DTable* dt)
+{
+    const void* ptr = dt;
+    const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+    const U32 fastMode = DTableH->fastMode;
+
+    /* select fast mode (static) */
+    if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
+    return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
+}
+
+
+size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+typedef struct {
+    short ncount[FSE_MAX_SYMBOL_VALUE + 1];
+    FSE_DTable dtable[1]; /* Dynamically sized */
+} FSE_DecompressWksp;
+
+
+FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
+        void* dst, size_t dstCapacity,
+        const void* cSrc, size_t cSrcSize,
+        unsigned maxLog, void* workSpace, size_t wkspSize,
+        int bmi2)
+{
+    const BYTE* const istart = (const BYTE*)cSrc;
+    const BYTE* ip = istart;
+    unsigned tableLog;
+    unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
+    FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
+
+    DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
+    if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
+
+    /* normal FSE decoding mode */
+    {
+        size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
+        if (FSE_isError(NCountLength)) return NCountLength;
+        if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
+        assert(NCountLength <= cSrcSize);
+        ip += NCountLength;
+        cSrcSize -= NCountLength;
+    }
+
+    if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge);
+    workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog);
+    wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
+
+    CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
+
+    {
+        const void* ptr = wksp->dtable;
+        const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
+        const U32 fastMode = DTableH->fastMode;
+
+        /* select fast mode (static) */
+        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
+        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
+    }
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0);
+}
+
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+{
+    return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
+}
+#endif
+
+size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
+    }
+#endif
+    (void)bmi2;
+    return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize);
+}
+
+
+typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
+
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
diff --git a/lib/zstd/common/huf.h b/lib/zstd/common/huf.h
new file mode 100644 (file)
index 0000000..88c5586
--- /dev/null
@@ -0,0 +1,356 @@
+/* ******************************************************************
+ * huff0 huffman codec,
+ * part of Finite State Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ * You can contact the author at :
+ * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+
+#ifndef HUF_H_298734234
+#define HUF_H_298734234
+
+/* *** Dependencies *** */
+#include "zstd_deps.h"    /* size_t */
+
+
+/* *** library symbols visibility *** */
+/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual,
+ *        HUF symbols remain "private" (internal symbols for library only).
+ *        Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */
+#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
+#  define HUF_PUBLIC_API __attribute__ ((visibility ("default")))
+#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1)   /* Visual expected */
+#  define HUF_PUBLIC_API __declspec(dllexport)
+#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
+#  define HUF_PUBLIC_API __declspec(dllimport)  /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */
+#else
+#  define HUF_PUBLIC_API
+#endif
+
+
+/* ========================== */
+/* ***  simple functions  *** */
+/* ========================== */
+
+/* HUF_compress() :
+ *  Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
+ * 'dst' buffer must be already allocated.
+ *  Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
+ * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
+ * @return : size of compressed data (<= `dstCapacity`).
+ *  Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
+ *                   if HUF_isError(return), compression failed (more details using HUF_getErrorName())
+ */
+HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity,
+                             const void* src, size_t srcSize);
+
+/* HUF_decompress() :
+ *  Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
+ *  into already allocated buffer 'dst', of minimum size 'dstSize'.
+ * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data.
+ *  Note : in contrast with FSE, HUF_decompress can regenerate
+ *         RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
+ *         because it knows size to regenerate (originalSize).
+ * @return : size of regenerated data (== originalSize),
+ *           or an error code, which can be tested using HUF_isError()
+ */
+HUF_PUBLIC_API size_t HUF_decompress(void* dst,  size_t originalSize,
+                               const void* cSrc, size_t cSrcSize);
+
+
+/* ***   Tool functions *** */
+#define HUF_BLOCKSIZE_MAX (128 * 1024)                  /*< maximum input size for a single block compressed with HUF_compress */
+HUF_PUBLIC_API size_t HUF_compressBound(size_t size);   /*< maximum compressed size (worst case) */
+
+/* Error Management */
+HUF_PUBLIC_API unsigned    HUF_isError(size_t code);       /*< tells if a return value is an error code */
+HUF_PUBLIC_API const char* HUF_getErrorName(size_t code);  /*< provides error code string (useful for debugging) */
+
+
+/* ***   Advanced function   *** */
+
+/* HUF_compress2() :
+ *  Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`.
+ * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX .
+ * `tableLog` must be `<= HUF_TABLELOG_MAX` . */
+HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                               unsigned maxSymbolValue, unsigned tableLog);
+
+/* HUF_compress4X_wksp() :
+ *  Same as HUF_compress2(), but uses externally allocated `workSpace`.
+ * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
+#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
+#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
+HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
+                                     const void* src, size_t srcSize,
+                                     unsigned maxSymbolValue, unsigned tableLog,
+                                     void* workSpace, size_t wkspSize);
+
+#endif   /* HUF_H_298734234 */
+
+/* ******************************************************************
+ *  WARNING !!
+ *  The following section contains advanced and experimental definitions
+ *  which shall never be used in the context of a dynamic library,
+ *  because they are not guaranteed to remain stable in the future.
+ *  Only consider them in association with static linking.
+ * *****************************************************************/
+#if !defined(HUF_H_HUF_STATIC_LINKING_ONLY)
+#define HUF_H_HUF_STATIC_LINKING_ONLY
+
+/* *** Dependencies *** */
+#include "mem.h"   /* U32 */
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+
+
+/* *** Constants *** */
+#define HUF_TABLELOG_MAX      12      /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_TABLELOG_DEFAULT  11      /* default tableLog value when none specified */
+#define HUF_SYMBOLVALUE_MAX  255
+
+#define HUF_TABLELOG_ABSOLUTEMAX  15  /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
+#  error "HUF_TABLELOG_MAX is too large !"
+#endif
+
+
+/* ****************************************
+*  Static allocation
+******************************************/
+/* HUF buffer bounds */
+#define HUF_CTABLEBOUND 129
+#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8)   /* only true when incompressible is pre-filtered with fast heuristic */
+#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size))   /* Macro version, useful for static allocation */
+
+/* static allocation of HUF's Compression Table */
+/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
+struct HUF_CElt_s {
+  U16  val;
+  BYTE nbBits;
+};   /* typedef'd to HUF_CElt */
+typedef struct HUF_CElt_s HUF_CElt;   /* consider it an incomplete type */
+#define HUF_CTABLE_SIZE_U32(maxSymbolValue)   ((maxSymbolValue)+1)   /* Use tables of U32, for proper alignment */
+#define HUF_CTABLE_SIZE(maxSymbolValue)       (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
+#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
+    HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
+
+/* static allocation of HUF's DTable */
+typedef U32 HUF_DTable;
+#define HUF_DTABLE_SIZE(maxTableLog)   (1 + (1<<(maxTableLog)))
+#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \
+        HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
+#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
+        HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
+
+
+/* ****************************************
+*  Advanced decompression functions
+******************************************/
+size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< double-symbols decoder */
+#endif
+
+size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< decodes RLE and uncompressed */
+size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /*< considers RLE and uncompressed as errors */
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /*< considers RLE and uncompressed as errors */
+size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< single-symbol decoder */
+size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< double-symbols decoder */
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< double-symbols decoder */
+#endif
+
+
+/* ****************************************
+ *  HUF detailed API
+ * ****************************************/
+
+/*! HUF_compress() does the following:
+ *  1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h")
+ *  2. (optional) refine tableLog using HUF_optimalTableLog()
+ *  3. build Huffman table from count using HUF_buildCTable()
+ *  4. save Huffman table to memory buffer using HUF_writeCTable()
+ *  5. encode the data stream using HUF_compress4X_usingCTable()
+ *
+ *  The following API allows targeting specific sub-functions for advanced tasks.
+ *  For example, it's possible to compress several blocks using the same 'CTable',
+ *  or to save and regenerate 'CTable' using external methods.
+ */
+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
+size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);   /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
+
+typedef enum {
+   HUF_repeat_none,  /*< Cannot use the previous table */
+   HUF_repeat_check, /*< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
+   HUF_repeat_valid  /*< Can use the previous table and it is assumed to be valid */
+ } HUF_repeat;
+/* HUF_compress4X_repeat() :
+ *  Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ *  If it uses hufTable it does not modify hufTable or repeat.
+ *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ *  If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
+                       const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned tableLog,
+                       void* workSpace, size_t wkspSize,    /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
+
+/* HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE.
+ */
+#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
+#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
+size_t HUF_buildCTable_wksp (HUF_CElt* tree,
+                       const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
+                             void* workSpace, size_t wkspSize);
+
+/*! HUF_readStats() :
+ *  Read compact Huffman tree, saved by HUF_writeCTable().
+ * `huffWeight` is destination buffer.
+ * @return : size read from `src` , or an error Code .
+ *  Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
+size_t HUF_readStats(BYTE* huffWeight, size_t hwSize,
+                     U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
+                     const void* src, size_t srcSize);
+
+/*! HUF_readStats_wksp() :
+ * Same as HUF_readStats() but takes an external workspace which must be
+ * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1)
+#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned))
+size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
+                          U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr,
+                          const void* src, size_t srcSize,
+                          void* workspace, size_t wkspSize,
+                          int bmi2);
+
+/* HUF_readCTable() :
+ *  Loading a CTable saved with HUF_writeCTable() */
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
+
+/* HUF_getNbBits() :
+ *  Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
+ *  Note 1 : is not inlined, as HUF_CElt definition is private
+ *  Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
+U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
+
+/*
+ * HUF_decompress() does the following:
+ * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
+ * 2. build Huffman table from save, using HUF_readDTableX?()
+ * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
+ */
+
+/* HUF_selectDecoder() :
+ *  Tells which decoder is likely to decode faster,
+ *  based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
+ *  Assumption : 0 < dstSize <= 128 KB */
+U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
+
+/*
+ *  The minimum workspace size for the `workSpace` used in
+ *  HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp().
+ *
+ *  The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
+ *  HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
+ *  Buffer overflow errors may potentially occur if code modifications result in
+ *  a required workspace size greater than that specified in the following
+ *  macro.
+ */
+#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9))
+#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
+size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
+#endif
+
+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+
+
+/* ====================== */
+/* single stream variants */
+/* ====================== */
+
+size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
+size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);  /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+/* HUF_compress1X_repeat() :
+ *  Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
+ *  If it uses hufTable it does not modify hufTable or repeat.
+ *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
+ *  If preferRepeat then the old table will always be used if valid. */
+size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
+                       const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned tableLog,
+                       void* workSpace, size_t wkspSize,   /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
+                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
+
+size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* double-symbol decoder */
+#endif
+
+size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
+size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< single-symbol decoder */
+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< single-symbol decoder */
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /*< double-symbols decoder */
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);   /*< double-symbols decoder */
+#endif
+
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);   /*< automatic selection of sing or double symbol decoder, based on DTable */
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
+#endif
+
+/* BMI2 variants.
+ * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
+ */
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+#endif
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
+#endif
+
+#endif /* HUF_STATIC_LINKING_ONLY */
+
diff --git a/lib/zstd/common/mem.h b/lib/zstd/common/mem.h
new file mode 100644 (file)
index 0000000..dcdd586
--- /dev/null
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef MEM_H_MODULE
+#define MEM_H_MODULE
+
+/*-****************************************
+*  Dependencies
+******************************************/
+#include <asm/unaligned.h>  /* get_unaligned, put_unaligned* */
+#include <linux/compiler.h>  /* inline */
+#include <linux/swab.h>  /* swab32, swab64 */
+#include <linux/types.h>  /* size_t, ptrdiff_t */
+#include "debug.h"  /* DEBUG_STATIC_ASSERT */
+
+/*-****************************************
+*  Compiler specifics
+******************************************/
+#define MEM_STATIC static inline
+
+/*-**************************************************************
+*  Basic Types
+*****************************************************************/
+typedef uint8_t  BYTE;
+typedef uint16_t U16;
+typedef int16_t  S16;
+typedef uint32_t U32;
+typedef int32_t  S32;
+typedef uint64_t U64;
+typedef int64_t  S64;
+
+/*-**************************************************************
+*  Memory I/O API
+*****************************************************************/
+/*=== Static platform detection ===*/
+MEM_STATIC unsigned MEM_32bits(void);
+MEM_STATIC unsigned MEM_64bits(void);
+MEM_STATIC unsigned MEM_isLittleEndian(void);
+
+/*=== Native unaligned read/write ===*/
+MEM_STATIC U16 MEM_read16(const void* memPtr);
+MEM_STATIC U32 MEM_read32(const void* memPtr);
+MEM_STATIC U64 MEM_read64(const void* memPtr);
+MEM_STATIC size_t MEM_readST(const void* memPtr);
+
+MEM_STATIC void MEM_write16(void* memPtr, U16 value);
+MEM_STATIC void MEM_write32(void* memPtr, U32 value);
+MEM_STATIC void MEM_write64(void* memPtr, U64 value);
+
+/*=== Little endian unaligned read/write ===*/
+MEM_STATIC U16 MEM_readLE16(const void* memPtr);
+MEM_STATIC U32 MEM_readLE24(const void* memPtr);
+MEM_STATIC U32 MEM_readLE32(const void* memPtr);
+MEM_STATIC U64 MEM_readLE64(const void* memPtr);
+MEM_STATIC size_t MEM_readLEST(const void* memPtr);
+
+MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val);
+MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val);
+MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32);
+MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64);
+MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val);
+
+/*=== Big endian unaligned read/write ===*/
+MEM_STATIC U32 MEM_readBE32(const void* memPtr);
+MEM_STATIC U64 MEM_readBE64(const void* memPtr);
+MEM_STATIC size_t MEM_readBEST(const void* memPtr);
+
+MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32);
+MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64);
+MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val);
+
+/*=== Byteswap ===*/
+MEM_STATIC U32 MEM_swap32(U32 in);
+MEM_STATIC U64 MEM_swap64(U64 in);
+MEM_STATIC size_t MEM_swapST(size_t in);
+
+/*-**************************************************************
+*  Memory I/O Implementation
+*****************************************************************/
+MEM_STATIC unsigned MEM_32bits(void)
+{
+    return sizeof(size_t) == 4;
+}
+
+MEM_STATIC unsigned MEM_64bits(void)
+{
+    return sizeof(size_t) == 8;
+}
+
+#if defined(__LITTLE_ENDIAN)
+#define MEM_LITTLE_ENDIAN 1
+#else
+#define MEM_LITTLE_ENDIAN 0
+#endif
+
+MEM_STATIC unsigned MEM_isLittleEndian(void)
+{
+    return MEM_LITTLE_ENDIAN;
+}
+
+MEM_STATIC U16 MEM_read16(const void *memPtr)
+{
+    return get_unaligned((const U16 *)memPtr);
+}
+
+MEM_STATIC U32 MEM_read32(const void *memPtr)
+{
+    return get_unaligned((const U32 *)memPtr);
+}
+
+MEM_STATIC U64 MEM_read64(const void *memPtr)
+{
+    return get_unaligned((const U64 *)memPtr);
+}
+
+MEM_STATIC size_t MEM_readST(const void *memPtr)
+{
+    return get_unaligned((const size_t *)memPtr);
+}
+
+MEM_STATIC void MEM_write16(void *memPtr, U16 value)
+{
+    put_unaligned(value, (U16 *)memPtr);
+}
+
+MEM_STATIC void MEM_write32(void *memPtr, U32 value)
+{
+    put_unaligned(value, (U32 *)memPtr);
+}
+
+MEM_STATIC void MEM_write64(void *memPtr, U64 value)
+{
+    put_unaligned(value, (U64 *)memPtr);
+}
+
+/*=== Little endian r/w ===*/
+
+MEM_STATIC U16 MEM_readLE16(const void *memPtr)
+{
+    return get_unaligned_le16(memPtr);
+}
+
+MEM_STATIC void MEM_writeLE16(void *memPtr, U16 val)
+{
+    put_unaligned_le16(val, memPtr);
+}
+
+MEM_STATIC U32 MEM_readLE24(const void *memPtr)
+{
+    return MEM_readLE16(memPtr) + (((const BYTE *)memPtr)[2] << 16);
+}
+
+MEM_STATIC void MEM_writeLE24(void *memPtr, U32 val)
+{
+       MEM_writeLE16(memPtr, (U16)val);
+       ((BYTE *)memPtr)[2] = (BYTE)(val >> 16);
+}
+
+MEM_STATIC U32 MEM_readLE32(const void *memPtr)
+{
+    return get_unaligned_le32(memPtr);
+}
+
+MEM_STATIC void MEM_writeLE32(void *memPtr, U32 val32)
+{
+    put_unaligned_le32(val32, memPtr);
+}
+
+MEM_STATIC U64 MEM_readLE64(const void *memPtr)
+{
+    return get_unaligned_le64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLE64(void *memPtr, U64 val64)
+{
+    put_unaligned_le64(val64, memPtr);
+}
+
+MEM_STATIC size_t MEM_readLEST(const void *memPtr)
+{
+       if (MEM_32bits())
+               return (size_t)MEM_readLE32(memPtr);
+       else
+               return (size_t)MEM_readLE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeLEST(void *memPtr, size_t val)
+{
+       if (MEM_32bits())
+               MEM_writeLE32(memPtr, (U32)val);
+       else
+               MEM_writeLE64(memPtr, (U64)val);
+}
+
+/*=== Big endian r/w ===*/
+
+MEM_STATIC U32 MEM_readBE32(const void *memPtr)
+{
+    return get_unaligned_be32(memPtr);
+}
+
+MEM_STATIC void MEM_writeBE32(void *memPtr, U32 val32)
+{
+    put_unaligned_be32(val32, memPtr);
+}
+
+MEM_STATIC U64 MEM_readBE64(const void *memPtr)
+{
+    return get_unaligned_be64(memPtr);
+}
+
+MEM_STATIC void MEM_writeBE64(void *memPtr, U64 val64)
+{
+    put_unaligned_be64(val64, memPtr);
+}
+
+MEM_STATIC size_t MEM_readBEST(const void *memPtr)
+{
+       if (MEM_32bits())
+               return (size_t)MEM_readBE32(memPtr);
+       else
+               return (size_t)MEM_readBE64(memPtr);
+}
+
+MEM_STATIC void MEM_writeBEST(void *memPtr, size_t val)
+{
+       if (MEM_32bits())
+               MEM_writeBE32(memPtr, (U32)val);
+       else
+               MEM_writeBE64(memPtr, (U64)val);
+}
+
+MEM_STATIC U32 MEM_swap32(U32 in)
+{
+    return swab32(in);
+}
+
+MEM_STATIC U64 MEM_swap64(U64 in)
+{
+    return swab64(in);
+}
+
+MEM_STATIC size_t MEM_swapST(size_t in)
+{
+    if (MEM_32bits())
+        return (size_t)MEM_swap32((U32)in);
+    else
+        return (size_t)MEM_swap64((U64)in);
+}
+
+#endif /* MEM_H_MODULE */
diff --git a/lib/zstd/common/zstd_common.c b/lib/zstd/common/zstd_common.c
new file mode 100644 (file)
index 0000000..3d7e35b
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#define ZSTD_DEPS_NEED_MALLOC
+#include "zstd_deps.h"   /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
+#include "error_private.h"
+#include "zstd_internal.h"
+
+
+/*-****************************************
+*  Version
+******************************************/
+unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; }
+
+const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
+
+
+/*-****************************************
+*  ZSTD Error Management
+******************************************/
+#undef ZSTD_isError   /* defined within zstd_internal.h */
+/*! ZSTD_isError() :
+ *  tells if a return value is an error code
+ *  symbol is required for external callers */
+unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
+
+/*! ZSTD_getErrorName() :
+ *  provides error code string from function result (useful for debugging) */
+const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
+
+/*! ZSTD_getError() :
+ *  convert a `size_t` function result into a proper ZSTD_errorCode enum */
+ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
+
+/*! ZSTD_getErrorString() :
+ *  provides error code string from enum */
+const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
+
+
+
+/*=**************************************************************
+*  Custom allocator
+****************************************************************/
+void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
+{
+    if (customMem.customAlloc)
+        return customMem.customAlloc(customMem.opaque, size);
+    return ZSTD_malloc(size);
+}
+
+void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
+{
+    if (customMem.customAlloc) {
+        /* calloc implemented as malloc+memset;
+         * not as efficient as calloc, but next best guess for custom malloc */
+        void* const ptr = customMem.customAlloc(customMem.opaque, size);
+        ZSTD_memset(ptr, 0, size);
+        return ptr;
+    }
+    return ZSTD_calloc(1, size);
+}
+
+void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
+{
+    if (ptr!=NULL) {
+        if (customMem.customFree)
+            customMem.customFree(customMem.opaque, ptr);
+        else
+            ZSTD_free(ptr);
+    }
+}
diff --git a/lib/zstd/common/zstd_deps.h b/lib/zstd/common/zstd_deps.h
new file mode 100644 (file)
index 0000000..7a5bf44
--- /dev/null
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*
+ * This file provides common libc dependencies that zstd requires.
+ * The purpose is to allow replacing this file with a custom implementation
+ * to compile zstd without libc support.
+ */
+
+/* Need:
+ * NULL
+ * INT_MAX
+ * UINT_MAX
+ * ZSTD_memcpy()
+ * ZSTD_memset()
+ * ZSTD_memmove()
+ */
+#ifndef ZSTD_DEPS_COMMON
+#define ZSTD_DEPS_COMMON
+
+#include <linux/limits.h>
+#include <linux/stddef.h>
+
+#define ZSTD_memcpy(d,s,n) __builtin_memcpy((d),(s),(n))
+#define ZSTD_memmove(d,s,n) __builtin_memmove((d),(s),(n))
+#define ZSTD_memset(d,s,n) __builtin_memset((d),(s),(n))
+
+#endif /* ZSTD_DEPS_COMMON */
+
+/*
+ * Define malloc as always failing. That means the user must
+ * either use ZSTD_customMem or statically allocate memory.
+ * Need:
+ * ZSTD_malloc()
+ * ZSTD_free()
+ * ZSTD_calloc()
+ */
+#ifdef ZSTD_DEPS_NEED_MALLOC
+#ifndef ZSTD_DEPS_MALLOC
+#define ZSTD_DEPS_MALLOC
+
+#define ZSTD_malloc(s) ({ (void)(s); NULL; })
+#define ZSTD_free(p) ((void)(p))
+#define ZSTD_calloc(n,s) ({ (void)(n); (void)(s); NULL; })
+
+#endif /* ZSTD_DEPS_MALLOC */
+#endif /* ZSTD_DEPS_NEED_MALLOC */
+
+/*
+ * Provides 64-bit math support.
+ * Need:
+ * U64 ZSTD_div64(U64 dividend, U32 divisor)
+ */
+#ifdef ZSTD_DEPS_NEED_MATH64
+#ifndef ZSTD_DEPS_MATH64
+#define ZSTD_DEPS_MATH64
+
+#include <linux/math64.h>
+
+static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) {
+  return div_u64(dividend, divisor);
+}
+
+#endif /* ZSTD_DEPS_MATH64 */
+#endif /* ZSTD_DEPS_NEED_MATH64 */
+
+/*
+ * This is only requested when DEBUGLEVEL >= 1, meaning
+ * it is disabled in production.
+ * Need:
+ * assert()
+ */
+#ifdef ZSTD_DEPS_NEED_ASSERT
+#ifndef ZSTD_DEPS_ASSERT
+#define ZSTD_DEPS_ASSERT
+
+#include <linux/kernel.h>
+
+#define assert(x) WARN_ON((x))
+
+#endif /* ZSTD_DEPS_ASSERT */
+#endif /* ZSTD_DEPS_NEED_ASSERT */
+
+/*
+ * This is only requested when DEBUGLEVEL >= 2, meaning
+ * it is disabled in production.
+ * Need:
+ * ZSTD_DEBUG_PRINT()
+ */
+#ifdef ZSTD_DEPS_NEED_IO
+#ifndef ZSTD_DEPS_IO
+#define ZSTD_DEPS_IO
+
+#include <linux/printk.h>
+
+#define ZSTD_DEBUG_PRINT(...) pr_debug(__VA_ARGS__)
+
+#endif /* ZSTD_DEPS_IO */
+#endif /* ZSTD_DEPS_NEED_IO */
+
+/*
+ * Only requested when MSAN is enabled.
+ * Need:
+ * intptr_t
+ */
+#ifdef ZSTD_DEPS_NEED_STDINT
+#ifndef ZSTD_DEPS_STDINT
+#define ZSTD_DEPS_STDINT
+
+/*
+ * The Linux Kernel doesn't provide intptr_t, only uintptr_t, which
+ * is an unsigned long.
+ */
+typedef long intptr_t;
+
+#endif /* ZSTD_DEPS_STDINT */
+#endif /* ZSTD_DEPS_NEED_STDINT */
diff --git a/lib/zstd/common/zstd_internal.h b/lib/zstd/common/zstd_internal.h
new file mode 100644 (file)
index 0000000..fc6f3a9
--- /dev/null
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CCOMMON_H_MODULE
+#define ZSTD_CCOMMON_H_MODULE
+
+/* this module contains definitions which must be identical
+ * across compression, decompression and dictBuilder.
+ * It also contains a few functions useful to at least 2 of them
+ * and which benefit from being inlined */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "compiler.h"
+#include "mem.h"
+#include "debug.h"                 /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
+#include "error_private.h"
+#define ZSTD_STATIC_LINKING_ONLY
+#include <linux/zstd.h>
+#define FSE_STATIC_LINKING_ONLY
+#include "fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "huf.h"
+#include <linux/xxhash.h>                /* XXH_reset, update, digest */
+#define ZSTD_TRACE 0
+
+
+/* ---- static assert (debug) --- */
+#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
+#define ZSTD_isError ERR_isError   /* for inlining */
+#define FSE_isError  ERR_isError
+#define HUF_isError  ERR_isError
+
+
+/*-*************************************
+*  shared macros
+***************************************/
+#undef MIN
+#undef MAX
+#define MIN(a,b) ((a)<(b) ? (a) : (b))
+#define MAX(a,b) ((a)>(b) ? (a) : (b))
+
+/*
+ * Ignore: this is an internal helper.
+ *
+ * This is a helper function to help force C99-correctness during compilation.
+ * Under strict compilation modes, variadic macro arguments can't be empty.
+ * However, variadic function arguments can be. Using a function therefore lets
+ * us statically check that at least one (string) argument was passed,
+ * independent of the compilation flags.
+ */
+static INLINE_KEYWORD UNUSED_ATTR
+void _force_has_format_string(const char *format, ...) {
+  (void)format;
+}
+
+/*
+ * Ignore: this is an internal helper.
+ *
+ * We want to force this function invocation to be syntactically correct, but
+ * we don't want to force runtime evaluation of its arguments.
+ */
+#define _FORCE_HAS_FORMAT_STRING(...) \
+  if (0) { \
+    _force_has_format_string(__VA_ARGS__); \
+  }
+
+/*
+ * Return the specified error if the condition evaluates to true.
+ *
+ * In debug modes, prints additional information.
+ * In order to do that (particularly, printing the conditional that failed),
+ * this can't just wrap RETURN_ERROR().
+ */
+#define RETURN_ERROR_IF(cond, err, ...) \
+  if (cond) { \
+    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
+           __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
+    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+    RAWLOG(3, ": " __VA_ARGS__); \
+    RAWLOG(3, "\n"); \
+    return ERROR(err); \
+  }
+
+/*
+ * Unconditionally return the specified error.
+ *
+ * In debug modes, prints additional information.
+ */
+#define RETURN_ERROR(err, ...) \
+  do { \
+    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
+           __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
+    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+    RAWLOG(3, ": " __VA_ARGS__); \
+    RAWLOG(3, "\n"); \
+    return ERROR(err); \
+  } while(0);
+
+/*
+ * If the provided expression evaluates to an error code, returns that error code.
+ *
+ * In debug modes, prints additional information.
+ */
+#define FORWARD_IF_ERROR(err, ...) \
+  do { \
+    size_t const err_code = (err); \
+    if (ERR_isError(err_code)) { \
+      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
+             __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
+      _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+      RAWLOG(3, ": " __VA_ARGS__); \
+      RAWLOG(3, "\n"); \
+      return err_code; \
+    } \
+  } while(0);
+
+
+/*-*************************************
+*  Common constants
+***************************************/
+#define ZSTD_OPT_NUM    (1<<12)
+
+#define ZSTD_REP_NUM      3                 /* number of repcodes */
+#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
+static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+#define BIT7 128
+#define BIT6  64
+#define BIT5  32
+#define BIT4  16
+#define BIT1   2
+#define BIT0   1
+
+#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
+static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
+static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
+
+#define ZSTD_FRAMEIDSIZE 4   /* magic number size */
+
+#define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
+static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
+typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
+
+#define ZSTD_FRAMECHECKSUMSIZE 4
+
+#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
+#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
+
+#define HufLog 12
+typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
+
+#define LONGNBSEQ 0x7F00
+
+#define MINMATCH 3
+
+#define Litbits  8
+#define MaxLit ((1<<Litbits) - 1)
+#define MaxML   52
+#define MaxLL   35
+#define DefaultMaxOff 28
+#define MaxOff  31
+#define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
+#define MLFSELog    9
+#define LLFSELog    9
+#define OffFSELog   8
+#define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
+
+#define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
+/* Each table cannot take more than #symbols * FSELog bits */
+#define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
+
+static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = {
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     1, 1, 1, 1, 2, 2, 3, 3,
+     4, 6, 7, 8, 9,10,11,12,
+    13,14,15,16
+};
+static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
+     4, 3, 2, 2, 2, 2, 2, 2,
+     2, 2, 2, 2, 2, 1, 1, 1,
+     2, 2, 2, 2, 2, 2, 2, 2,
+     2, 3, 2, 1, 1, 1, 1, 1,
+    -1,-1,-1,-1
+};
+#define LL_DEFAULTNORMLOG 6  /* for static allocation */
+static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
+
+static UNUSED_ATTR const U32 ML_bits[MaxML+1] = {
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     0, 0, 0, 0, 0, 0, 0, 0,
+     1, 1, 1, 1, 2, 2, 3, 3,
+     4, 4, 5, 7, 8, 9,10,11,
+    12,13,14,15,16
+};
+static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
+     1, 4, 3, 2, 2, 2, 2, 2,
+     2, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1,-1,-1,
+    -1,-1,-1,-1,-1
+};
+#define ML_DEFAULTNORMLOG 6  /* for static allocation */
+static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
+
+static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
+     1, 1, 1, 1, 1, 1, 2, 2,
+     2, 1, 1, 1, 1, 1, 1, 1,
+     1, 1, 1, 1, 1, 1, 1, 1,
+    -1,-1,-1,-1,-1
+};
+#define OF_DEFAULTNORMLOG 5  /* for static allocation */
+static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
+
+
+/*-*******************************************
+*  Shared functions to include for inlining
+*********************************************/
+static void ZSTD_copy8(void* dst, const void* src) {
+    ZSTD_memcpy(dst, src, 8);
+}
+
+#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+static void ZSTD_copy16(void* dst, const void* src) {
+    ZSTD_memcpy(dst, src, 16);
+}
+#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
+
+#define WILDCOPY_OVERLENGTH 32
+#define WILDCOPY_VECLEN 16
+
+typedef enum {
+    ZSTD_no_overlap,
+    ZSTD_overlap_src_before_dst
+    /*  ZSTD_overlap_dst_before_src, */
+} ZSTD_overlap_e;
+
+/*! ZSTD_wildcopy() :
+ *  Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
+ *           The src buffer must be before the dst buffer.
+ */
+MEM_STATIC FORCE_INLINE_ATTR
+void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
+{
+    ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + length;
+
+    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
+
+    if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
+        /* Handle short offset copies. */
+        do {
+            COPY8(op, ip)
+        } while (op < oend);
+    } else {
+        assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
+        /* Separate out the first COPY16() call because the copy length is
+         * almost certain to be short, so the branches have different
+         * probabilities. Since it is almost certain to be short, only do
+         * one COPY16() in the first call. Then, do two calls per loop since
+         * at that point it is more likely to have a high trip count.
+         */
+#ifdef __aarch64__
+        do {
+            COPY16(op, ip);
+        }
+        while (op < oend);
+#else
+        ZSTD_copy16(op, ip);
+        if (16 >= length) return;
+        op += 16;
+        ip += 16;
+        do {
+            COPY16(op, ip);
+            COPY16(op, ip);
+        }
+        while (op < oend);
+#endif
+    }
+}
+
+MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    size_t const length = MIN(dstCapacity, srcSize);
+    if (length > 0) {
+        ZSTD_memcpy(dst, src, length);
+    }
+    return length;
+}
+
+/* define "workspace is too large" as this number of times larger than needed */
+#define ZSTD_WORKSPACETOOLARGE_FACTOR 3
+
+/* when workspace is continuously too large
+ * during at least this number of times,
+ * context's memory usage is considered wasteful,
+ * because it's sized to handle a worst case scenario which rarely happens.
+ * In which case, resize it down to free some memory */
+#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
+
+/* Controls whether the input/output buffer is buffered or stable. */
+typedef enum {
+    ZSTD_bm_buffered = 0,  /* Buffer the input/output */
+    ZSTD_bm_stable = 1     /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
+} ZSTD_bufferMode_e;
+
+
+/*-*******************************************
+*  Private declarations
+*********************************************/
+typedef struct seqDef_s {
+    U32 offset;         /* Offset code of the sequence */
+    U16 litLength;
+    U16 matchLength;
+} seqDef;
+
+typedef struct {
+    seqDef* sequencesStart;
+    seqDef* sequences;      /* ptr to end of sequences */
+    BYTE* litStart;
+    BYTE* lit;              /* ptr to end of literals */
+    BYTE* llCode;
+    BYTE* mlCode;
+    BYTE* ofCode;
+    size_t maxNbSeq;
+    size_t maxNbLit;
+
+    /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength
+     * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
+     * the existing value of the litLength or matchLength by 0x10000.
+     */
+    U32   longLengthID;   /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */
+    U32   longLengthPos;  /* Index of the sequence to apply long length modification to */
+} seqStore_t;
+
+typedef struct {
+    U32 litLength;
+    U32 matchLength;
+} ZSTD_sequenceLength;
+
+/*
+ * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
+ * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
+ */
+MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
+{
+    ZSTD_sequenceLength seqLen;
+    seqLen.litLength = seq->litLength;
+    seqLen.matchLength = seq->matchLength + MINMATCH;
+    if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
+        if (seqStore->longLengthID == 1) {
+            seqLen.litLength += 0xFFFF;
+        }
+        if (seqStore->longLengthID == 2) {
+            seqLen.matchLength += 0xFFFF;
+        }
+    }
+    return seqLen;
+}
+
+/*
+ * Contains the compressed frame size and an upper-bound for the decompressed frame size.
+ * Note: before using `compressedSize`, check for errors using ZSTD_isError().
+ *       similarly, before using `decompressedBound`, check for errors using:
+ *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
+ */
+typedef struct {
+    size_t compressedSize;
+    unsigned long long decompressedBound;
+} ZSTD_frameSizeInfo;   /* decompress & legacy */
+
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
+
+/* custom memory allocation functions */
+void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
+void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
+void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
+
+
+MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
+{
+    assert(val != 0);
+    {
+#   if (__GNUC__ >= 3)   /* GCC Intrinsic */
+        return __builtin_clz (val) ^ 31;
+#   else   /* Software version */
+        static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
+        U32 v = val;
+        v |= v >> 1;
+        v |= v >> 2;
+        v |= v >> 4;
+        v |= v >> 8;
+        v |= v >> 16;
+        return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
+#   endif
+    }
+}
+
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);   /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
+
+
+typedef struct {
+    blockType_e blockType;
+    U32 lastBlock;
+    U32 origSize;
+} blockProperties_t;   /* declared here for decompress and fullbench */
+
+/*! ZSTD_getcBlockSize() :
+ *  Provides the size of compressed block from block header `src` */
+/* Used by: decompress, fullbench (does not get its definition from here) */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+                          blockProperties_t* bpPtr);
+
+/*! ZSTD_decodeSeqHeaders() :
+ *  decode sequence header from src */
+/* Used by: decompress, fullbench (does not get its definition from here) */
+size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
+                       const void* src, size_t srcSize);
+
+
+
+#endif   /* ZSTD_CCOMMON_H_MODULE */
diff --git a/lib/zstd/compress.c b/lib/zstd/compress.c
deleted file mode 100644 (file)
index b080264..0000000
+++ /dev/null
@@ -1,3485 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-/*-*************************************
-*  Dependencies
-***************************************/
-#include "fse.h"
-#include "huf.h"
-#include "mem.h"
-#include "zstd_internal.h" /* includes zstd.h */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h> /* memset */
-
-/*-*************************************
-*  Constants
-***************************************/
-static const U32 g_searchStrength = 8; /* control skip over incompressible data */
-#define HASH_READ_SIZE 8
-typedef enum { ZSTDcs_created = 0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
-
-/*-*************************************
-*  Helper functions
-***************************************/
-size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; }
-
-/*-*************************************
-*  Sequence storage
-***************************************/
-static void ZSTD_resetSeqStore(seqStore_t *ssPtr)
-{
-       ssPtr->lit = ssPtr->litStart;
-       ssPtr->sequences = ssPtr->sequencesStart;
-       ssPtr->longLengthID = 0;
-}
-
-/*-*************************************
-*  Context memory management
-***************************************/
-struct ZSTD_CCtx_s {
-       const BYTE *nextSrc;  /* next block here to continue on curr prefix */
-       const BYTE *base;     /* All regular indexes relative to this position */
-       const BYTE *dictBase; /* extDict indexes relative to this position */
-       U32 dictLimit;  /* below that point, need extDict */
-       U32 lowLimit;    /* below that point, no more data */
-       U32 nextToUpdate;     /* index from which to continue dictionary update */
-       U32 nextToUpdate3;    /* index from which to continue dictionary update */
-       U32 hashLog3;    /* dispatch table : larger == faster, more memory */
-       U32 loadedDictEnd;    /* index of end of dictionary */
-       U32 forceWindow;      /* force back-references to respect limit of 1<<wLog, even for dictionary */
-       U32 forceRawDict;     /* Force loading dictionary in "content-only" mode (no header analysis) */
-       ZSTD_compressionStage_e stage;
-       U32 rep[ZSTD_REP_NUM];
-       U32 repToConfirm[ZSTD_REP_NUM];
-       U32 dictID;
-       ZSTD_parameters params;
-       void *workSpace;
-       size_t workSpaceSize;
-       size_t blockSize;
-       U64 frameContentSize;
-       struct xxh64_state xxhState;
-       ZSTD_customMem customMem;
-
-       seqStore_t seqStore; /* sequences storage ptrs */
-       U32 *hashTable;
-       U32 *hashTable3;
-       U32 *chainTable;
-       HUF_CElt *hufTable;
-       U32 flagStaticTables;
-       HUF_repeat flagStaticHufTable;
-       FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
-       FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
-       FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
-       unsigned tmpCounters[HUF_COMPRESS_WORKSPACE_SIZE_U32];
-};
-
-size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams)
-{
-       size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << cParams.windowLog);
-       U32 const divider = (cParams.searchLength == 3) ? 3 : 4;
-       size_t const maxNbSeq = blockSize / divider;
-       size_t const tokenSpace = blockSize + 11 * maxNbSeq;
-       size_t const chainSize = (cParams.strategy == ZSTD_fast) ? 0 : (1 << cParams.chainLog);
-       size_t const hSize = ((size_t)1) << cParams.hashLog;
-       U32 const hashLog3 = (cParams.searchLength > 3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
-       size_t const h3Size = ((size_t)1) << hashLog3;
-       size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
-       size_t const optSpace =
-           ((MaxML + 1) + (MaxLL + 1) + (MaxOff + 1) + (1 << Litbits)) * sizeof(U32) + (ZSTD_OPT_NUM + 1) * (sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
-       size_t const workspaceSize = tableSpace + (256 * sizeof(U32)) /* huffTable */ + tokenSpace +
-                                    (((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
-
-       return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_CCtx)) + ZSTD_ALIGN(workspaceSize);
-}
-
-static ZSTD_CCtx *ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
-{
-       ZSTD_CCtx *cctx;
-       if (!customMem.customAlloc || !customMem.customFree)
-               return NULL;
-       cctx = (ZSTD_CCtx *)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
-       if (!cctx)
-               return NULL;
-       memset(cctx, 0, sizeof(ZSTD_CCtx));
-       cctx->customMem = customMem;
-       return cctx;
-}
-
-ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize)
-{
-       ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-       ZSTD_CCtx *cctx = ZSTD_createCCtx_advanced(stackMem);
-       if (cctx) {
-               cctx->workSpace = ZSTD_stackAllocAll(cctx->customMem.opaque, &cctx->workSpaceSize);
-       }
-       return cctx;
-}
-
-size_t ZSTD_freeCCtx(ZSTD_CCtx *cctx)
-{
-       if (cctx == NULL)
-               return 0; /* support free on NULL */
-       ZSTD_free(cctx->workSpace, cctx->customMem);
-       ZSTD_free(cctx, cctx->customMem);
-       return 0; /* reserved as a potential error code in the future */
-}
-
-const seqStore_t *ZSTD_getSeqStore(const ZSTD_CCtx *ctx) /* hidden interface */ { return &(ctx->seqStore); }
-
-static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx *cctx) { return cctx->params; }
-
-/** ZSTD_checkParams() :
-       ensure param values remain within authorized range.
-       @return : 0, or an error code if one value is beyond authorized range */
-size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
-{
-#define CLAMPCHECK(val, min, max)                                       \
-       {                                                               \
-               if ((val < min) | (val > max))                          \
-                       return ERROR(compressionParameter_unsupported); \
-       }
-       CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
-       CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
-       CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
-       CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
-       CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
-       CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
-       if ((U32)(cParams.strategy) > (U32)ZSTD_btopt2)
-               return ERROR(compressionParameter_unsupported);
-       return 0;
-}
-
-/** ZSTD_cycleLog() :
- *  condition for correct operation : hashLog > 1 */
-static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
-{
-       U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
-       return hashLog - btScale;
-}
-
-/** ZSTD_adjustCParams() :
-       optimize `cPar` for a given input (`srcSize` and `dictSize`).
-       mostly downsizing to reduce memory consumption and initialization.
-       Both `srcSize` and `dictSize` are optional (use 0 if unknown),
-       but if both are 0, no optimization can be done.
-       Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
-ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
-{
-       if (srcSize + dictSize == 0)
-               return cPar; /* no size information available : no adjustment */
-
-       /* resize params, to use less memory when necessary */
-       {
-               U32 const minSrcSize = (srcSize == 0) ? 500 : 0;
-               U64 const rSize = srcSize + dictSize + minSrcSize;
-               if (rSize < ((U64)1 << ZSTD_WINDOWLOG_MAX)) {
-                       U32 const srcLog = MAX(ZSTD_HASHLOG_MIN, ZSTD_highbit32((U32)(rSize)-1) + 1);
-                       if (cPar.windowLog > srcLog)
-                               cPar.windowLog = srcLog;
-               }
-       }
-       if (cPar.hashLog > cPar.windowLog)
-               cPar.hashLog = cPar.windowLog;
-       {
-               U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
-               if (cycleLog > cPar.windowLog)
-                       cPar.chainLog -= (cycleLog - cPar.windowLog);
-       }
-
-       if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
-               cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */
-
-       return cPar;
-}
-
-static U32 ZSTD_equivalentParams(ZSTD_parameters param1, ZSTD_parameters param2)
-{
-       return (param1.cParams.hashLog == param2.cParams.hashLog) & (param1.cParams.chainLog == param2.cParams.chainLog) &
-              (param1.cParams.strategy == param2.cParams.strategy) & ((param1.cParams.searchLength == 3) == (param2.cParams.searchLength == 3));
-}
-
-/*! ZSTD_continueCCtx() :
-       reuse CCtx without reset (note : requires no dictionary) */
-static size_t ZSTD_continueCCtx(ZSTD_CCtx *cctx, ZSTD_parameters params, U64 frameContentSize)
-{
-       U32 const end = (U32)(cctx->nextSrc - cctx->base);
-       cctx->params = params;
-       cctx->frameContentSize = frameContentSize;
-       cctx->lowLimit = end;
-       cctx->dictLimit = end;
-       cctx->nextToUpdate = end + 1;
-       cctx->stage = ZSTDcs_init;
-       cctx->dictID = 0;
-       cctx->loadedDictEnd = 0;
-       {
-               int i;
-               for (i = 0; i < ZSTD_REP_NUM; i++)
-                       cctx->rep[i] = repStartValue[i];
-       }
-       cctx->seqStore.litLengthSum = 0; /* force reset of btopt stats */
-       xxh64_reset(&cctx->xxhState, 0);
-       return 0;
-}
-
-typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e;
-
-/*! ZSTD_resetCCtx_advanced() :
-       note : `params` must be validated */
-static size_t ZSTD_resetCCtx_advanced(ZSTD_CCtx *zc, ZSTD_parameters params, U64 frameContentSize, ZSTD_compResetPolicy_e const crp)
-{
-       if (crp == ZSTDcrp_continue)
-               if (ZSTD_equivalentParams(params, zc->params)) {
-                       zc->flagStaticTables = 0;
-                       zc->flagStaticHufTable = HUF_repeat_none;
-                       return ZSTD_continueCCtx(zc, params, frameContentSize);
-               }
-
-       {
-               size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog);
-               U32 const divider = (params.cParams.searchLength == 3) ? 3 : 4;
-               size_t const maxNbSeq = blockSize / divider;
-               size_t const tokenSpace = blockSize + 11 * maxNbSeq;
-               size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog);
-               size_t const hSize = ((size_t)1) << params.cParams.hashLog;
-               U32 const hashLog3 = (params.cParams.searchLength > 3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
-               size_t const h3Size = ((size_t)1) << hashLog3;
-               size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
-               void *ptr;
-
-               /* Check if workSpace is large enough, alloc a new one if needed */
-               {
-                       size_t const optSpace = ((MaxML + 1) + (MaxLL + 1) + (MaxOff + 1) + (1 << Litbits)) * sizeof(U32) +
-                                               (ZSTD_OPT_NUM + 1) * (sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
-                       size_t const neededSpace = tableSpace + (256 * sizeof(U32)) /* huffTable */ + tokenSpace +
-                                                  (((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
-                       if (zc->workSpaceSize < neededSpace) {
-                               ZSTD_free(zc->workSpace, zc->customMem);
-                               zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
-                               if (zc->workSpace == NULL)
-                                       return ERROR(memory_allocation);
-                               zc->workSpaceSize = neededSpace;
-                       }
-               }
-
-               if (crp != ZSTDcrp_noMemset)
-                       memset(zc->workSpace, 0, tableSpace); /* reset tables only */
-               xxh64_reset(&zc->xxhState, 0);
-               zc->hashLog3 = hashLog3;
-               zc->hashTable = (U32 *)(zc->workSpace);
-               zc->chainTable = zc->hashTable + hSize;
-               zc->hashTable3 = zc->chainTable + chainSize;
-               ptr = zc->hashTable3 + h3Size;
-               zc->hufTable = (HUF_CElt *)ptr;
-               zc->flagStaticTables = 0;
-               zc->flagStaticHufTable = HUF_repeat_none;
-               ptr = ((U32 *)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */
-
-               zc->nextToUpdate = 1;
-               zc->nextSrc = NULL;
-               zc->base = NULL;
-               zc->dictBase = NULL;
-               zc->dictLimit = 0;
-               zc->lowLimit = 0;
-               zc->params = params;
-               zc->blockSize = blockSize;
-               zc->frameContentSize = frameContentSize;
-               {
-                       int i;
-                       for (i = 0; i < ZSTD_REP_NUM; i++)
-                               zc->rep[i] = repStartValue[i];
-               }
-
-               if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) {
-                       zc->seqStore.litFreq = (U32 *)ptr;
-                       zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1 << Litbits);
-                       zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL + 1);
-                       zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML + 1);
-                       ptr = zc->seqStore.offCodeFreq + (MaxOff + 1);
-                       zc->seqStore.matchTable = (ZSTD_match_t *)ptr;
-                       ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM + 1;
-                       zc->seqStore.priceTable = (ZSTD_optimal_t *)ptr;
-                       ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM + 1;
-                       zc->seqStore.litLengthSum = 0;
-               }
-               zc->seqStore.sequencesStart = (seqDef *)ptr;
-               ptr = zc->seqStore.sequencesStart + maxNbSeq;
-               zc->seqStore.llCode = (BYTE *)ptr;
-               zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
-               zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
-               zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
-
-               zc->stage = ZSTDcs_init;
-               zc->dictID = 0;
-               zc->loadedDictEnd = 0;
-
-               return 0;
-       }
-}
-
-/* ZSTD_invalidateRepCodes() :
- * ensures next compression will not use repcodes from previous block.
- * Note : only works with regular variant;
- *        do not use with extDict variant ! */
-void ZSTD_invalidateRepCodes(ZSTD_CCtx *cctx)
-{
-       int i;
-       for (i = 0; i < ZSTD_REP_NUM; i++)
-               cctx->rep[i] = 0;
-}
-
-/*! ZSTD_copyCCtx() :
-*   Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
-*   Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
-*   @return : 0, or an error code */
-size_t ZSTD_copyCCtx(ZSTD_CCtx *dstCCtx, const ZSTD_CCtx *srcCCtx, unsigned long long pledgedSrcSize)
-{
-       if (srcCCtx->stage != ZSTDcs_init)
-               return ERROR(stage_wrong);
-
-       memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
-       {
-               ZSTD_parameters params = srcCCtx->params;
-               params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
-               ZSTD_resetCCtx_advanced(dstCCtx, params, pledgedSrcSize, ZSTDcrp_noMemset);
-       }
-
-       /* copy tables */
-       {
-               size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
-               size_t const hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog;
-               size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
-               size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
-               memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace);
-       }
-
-       /* copy dictionary offsets */
-       dstCCtx->nextToUpdate = srcCCtx->nextToUpdate;
-       dstCCtx->nextToUpdate3 = srcCCtx->nextToUpdate3;
-       dstCCtx->nextSrc = srcCCtx->nextSrc;
-       dstCCtx->base = srcCCtx->base;
-       dstCCtx->dictBase = srcCCtx->dictBase;
-       dstCCtx->dictLimit = srcCCtx->dictLimit;
-       dstCCtx->lowLimit = srcCCtx->lowLimit;
-       dstCCtx->loadedDictEnd = srcCCtx->loadedDictEnd;
-       dstCCtx->dictID = srcCCtx->dictID;
-
-       /* copy entropy tables */
-       dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
-       dstCCtx->flagStaticHufTable = srcCCtx->flagStaticHufTable;
-       if (srcCCtx->flagStaticTables) {
-               memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
-               memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
-               memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
-       }
-       if (srcCCtx->flagStaticHufTable) {
-               memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256 * 4);
-       }
-
-       return 0;
-}
-
-/*! ZSTD_reduceTable() :
-*   reduce table indexes by `reducerValue` */
-static void ZSTD_reduceTable(U32 *const table, U32 const size, U32 const reducerValue)
-{
-       U32 u;
-       for (u = 0; u < size; u++) {
-               if (table[u] < reducerValue)
-                       table[u] = 0;
-               else
-                       table[u] -= reducerValue;
-       }
-}
-
-/*! ZSTD_reduceIndex() :
-*   rescale all indexes to avoid future overflow (indexes are U32) */
-static void ZSTD_reduceIndex(ZSTD_CCtx *zc, const U32 reducerValue)
-{
-       {
-               U32 const hSize = 1 << zc->params.cParams.hashLog;
-               ZSTD_reduceTable(zc->hashTable, hSize, reducerValue);
-       }
-
-       {
-               U32 const chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog);
-               ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue);
-       }
-
-       {
-               U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
-               ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue);
-       }
-}
-
-/*-*******************************************************
-*  Block entropic compression
-*********************************************************/
-
-/* See doc/zstd_compression_format.md for detailed format description */
-
-size_t ZSTD_noCompressBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-       if (srcSize + ZSTD_blockHeaderSize > dstCapacity)
-               return ERROR(dstSize_tooSmall);
-       memcpy((BYTE *)dst + ZSTD_blockHeaderSize, src, srcSize);
-       ZSTD_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw);
-       return ZSTD_blockHeaderSize + srcSize;
-}
-
-static size_t ZSTD_noCompressLiterals(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-       BYTE *const ostart = (BYTE * const)dst;
-       U32 const flSize = 1 + (srcSize > 31) + (srcSize > 4095);
-
-       if (srcSize + flSize > dstCapacity)
-               return ERROR(dstSize_tooSmall);
-
-       switch (flSize) {
-       case 1: /* 2 - 1 - 5 */ ostart[0] = (BYTE)((U32)set_basic + (srcSize << 3)); break;
-       case 2: /* 2 - 2 - 12 */ ZSTD_writeLE16(ostart, (U16)((U32)set_basic + (1 << 2) + (srcSize << 4))); break;
-       default: /*note : should not be necessary : flSize is within {1,2,3} */
-       case 3: /* 2 - 2 - 20 */ ZSTD_writeLE32(ostart, (U32)((U32)set_basic + (3 << 2) + (srcSize << 4))); break;
-       }
-
-       memcpy(ostart + flSize, src, srcSize);
-       return srcSize + flSize;
-}
-
-static size_t ZSTD_compressRleLiteralsBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-       BYTE *const ostart = (BYTE * const)dst;
-       U32 const flSize = 1 + (srcSize > 31) + (srcSize > 4095);
-
-       (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
-
-       switch (flSize) {
-       case 1: /* 2 - 1 - 5 */ ostart[0] = (BYTE)((U32)set_rle + (srcSize << 3)); break;
-       case 2: /* 2 - 2 - 12 */ ZSTD_writeLE16(ostart, (U16)((U32)set_rle + (1 << 2) + (srcSize << 4))); break;
-       default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */
-       case 3: /* 2 - 2 - 20 */ ZSTD_writeLE32(ostart, (U32)((U32)set_rle + (3 << 2) + (srcSize << 4))); break;
-       }
-
-       ostart[flSize] = *(const BYTE *)src;
-       return flSize + 1;
-}
-
-static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
-
-static size_t ZSTD_compressLiterals(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-       size_t const minGain = ZSTD_minGain(srcSize);
-       size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
-       BYTE *const ostart = (BYTE *)dst;
-       U32 singleStream = srcSize < 256;
-       symbolEncodingType_e hType = set_compressed;
-       size_t cLitSize;
-
-/* small ? don't even attempt compression (speed opt) */
-#define LITERAL_NOENTROPY 63
-       {
-               size_t const minLitSize = zc->flagStaticHufTable == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY;
-               if (srcSize <= minLitSize)
-                       return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
-       }
-
-       if (dstCapacity < lhSize + 1)
-               return ERROR(dstSize_tooSmall); /* not enough space for compression */
-       {
-               HUF_repeat repeat = zc->flagStaticHufTable;
-               int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
-               if (repeat == HUF_repeat_valid && lhSize == 3)
-                       singleStream = 1;
-               cLitSize = singleStream ? HUF_compress1X_repeat(ostart + lhSize, dstCapacity - lhSize, src, srcSize, 255, 11, zc->tmpCounters,
-                                                               sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat)
-                                       : HUF_compress4X_repeat(ostart + lhSize, dstCapacity - lhSize, src, srcSize, 255, 11, zc->tmpCounters,
-                                                               sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat);
-               if (repeat != HUF_repeat_none) {
-                       hType = set_repeat;
-               } /* reused the existing table */
-               else {
-                       zc->flagStaticHufTable = HUF_repeat_check;
-               } /* now have a table to reuse */
-       }
-
-       if ((cLitSize == 0) | (cLitSize >= srcSize - minGain)) {
-               zc->flagStaticHufTable = HUF_repeat_none;
-               return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
-       }
-       if (cLitSize == 1) {
-               zc->flagStaticHufTable = HUF_repeat_none;
-               return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
-       }
-
-       /* Build header */
-       switch (lhSize) {
-       case 3: /* 2 - 2 - 10 - 10 */
-       {
-               U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 14);
-               ZSTD_writeLE24(ostart, lhc);
-               break;
-       }
-       case 4: /* 2 - 2 - 14 - 14 */
-       {
-               U32 const lhc = hType + (2 << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 18);
-               ZSTD_writeLE32(ostart, lhc);
-               break;
-       }
-       default: /* should not be necessary, lhSize is only {3,4,5} */
-       case 5:  /* 2 - 2 - 18 - 18 */
-       {
-               U32 const lhc = hType + (3 << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 22);
-               ZSTD_writeLE32(ostart, lhc);
-               ostart[4] = (BYTE)(cLitSize >> 10);
-               break;
-       }
-       }
-       return lhSize + cLitSize;
-}
-
-static const BYTE LL_Code[64] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 16, 17, 17, 18, 18,
-                                19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23,
-                                23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24};
-
-static const BYTE ML_Code[128] = {0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
-                                 26, 27, 28, 29, 30, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38,
-                                 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
-                                 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 42, 42,
-                                 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42};
-
-void ZSTD_seqToCodes(const seqStore_t *seqStorePtr)
-{
-       BYTE const LL_deltaCode = 19;
-       BYTE const ML_deltaCode = 36;
-       const seqDef *const sequences = seqStorePtr->sequencesStart;
-       BYTE *const llCodeTable = seqStorePtr->llCode;
-       BYTE *const ofCodeTable = seqStorePtr->ofCode;
-       BYTE *const mlCodeTable = seqStorePtr->mlCode;
-       U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-       U32 u;
-       for (u = 0; u < nbSeq; u++) {
-               U32 const llv = sequences[u].litLength;
-               U32 const mlv = sequences[u].matchLength;
-               llCodeTable[u] = (llv > 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv];
-               ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
-               mlCodeTable[u] = (mlv > 127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv];
-       }
-       if (seqStorePtr->longLengthID == 1)
-               llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
-       if (seqStorePtr->longLengthID == 2)
-               mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
-}
-
-ZSTD_STATIC size_t ZSTD_compressSequences_internal(ZSTD_CCtx *zc, void *dst, size_t dstCapacity)
-{
-       const int longOffsets = zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN;
-       const seqStore_t *seqStorePtr = &(zc->seqStore);
-       FSE_CTable *CTable_LitLength = zc->litlengthCTable;
-       FSE_CTable *CTable_OffsetBits = zc->offcodeCTable;
-       FSE_CTable *CTable_MatchLength = zc->matchlengthCTable;
-       U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
-       const seqDef *const sequences = seqStorePtr->sequencesStart;
-       const BYTE *const ofCodeTable = seqStorePtr->ofCode;
-       const BYTE *const llCodeTable = seqStorePtr->llCode;
-       const BYTE *const mlCodeTable = seqStorePtr->mlCode;
-       BYTE *const ostart = (BYTE *)dst;
-       BYTE *const oend = ostart + dstCapacity;
-       BYTE *op = ostart;
-       size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
-       BYTE *seqHead;
-
-       U32 *count;
-       S16 *norm;
-       U32 *workspace;
-       size_t workspaceSize = sizeof(zc->tmpCounters);
-       {
-               size_t spaceUsed32 = 0;
-               count = (U32 *)zc->tmpCounters + spaceUsed32;
-               spaceUsed32 += MaxSeq + 1;
-               norm = (S16 *)((U32 *)zc->tmpCounters + spaceUsed32);
-               spaceUsed32 += ALIGN(sizeof(S16) * (MaxSeq + 1), sizeof(U32)) >> 2;
-
-               workspace = (U32 *)zc->tmpCounters + spaceUsed32;
-               workspaceSize -= (spaceUsed32 << 2);
-       }
-
-       /* Compress literals */
-       {
-               const BYTE *const literals = seqStorePtr->litStart;
-               size_t const litSize = seqStorePtr->lit - literals;
-               size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize);
-               if (ZSTD_isError(cSize))
-                       return cSize;
-               op += cSize;
-       }
-
-       /* Sequences Header */
-       if ((oend - op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */)
-               return ERROR(dstSize_tooSmall);
-       if (nbSeq < 0x7F)
-               *op++ = (BYTE)nbSeq;
-       else if (nbSeq < LONGNBSEQ)
-               op[0] = (BYTE)((nbSeq >> 8) + 0x80), op[1] = (BYTE)nbSeq, op += 2;
-       else
-               op[0] = 0xFF, ZSTD_writeLE16(op + 1, (U16)(nbSeq - LONGNBSEQ)), op += 3;
-       if (nbSeq == 0)
-               return op - ostart;
-
-       /* seqHead : flags for FSE encoding type */
-       seqHead = op++;
-
-#define MIN_SEQ_FOR_DYNAMIC_FSE 64
-#define MAX_SEQ_FOR_STATIC_FSE 1000
-
-       /* convert length/distances into codes */
-       ZSTD_seqToCodes(seqStorePtr);
-
-       /* CTable for Literal Lengths */
-       {
-               U32 max = MaxLL;
-               size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace);
-               if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
-                       *op++ = llCodeTable[0];
-                       FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
-                       LLtype = set_rle;
-               } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
-                       LLtype = set_repeat;
-               } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog - 1)))) {
-                       FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, workspace, workspaceSize);
-                       LLtype = set_basic;
-               } else {
-                       size_t nbSeq_1 = nbSeq;
-                       const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
-                       if (count[llCodeTable[nbSeq - 1]] > 1) {
-                               count[llCodeTable[nbSeq - 1]]--;
-                               nbSeq_1--;
-                       }
-                       FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
-                       {
-                               size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
-                               if (FSE_isError(NCountSize))
-                                       return NCountSize;
-                               op += NCountSize;
-                       }
-                       FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, workspace, workspaceSize);
-                       LLtype = set_compressed;
-               }
-       }
-
-       /* CTable for Offsets */
-       {
-               U32 max = MaxOff;
-               size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace);
-               if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
-                       *op++ = ofCodeTable[0];
-                       FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
-                       Offtype = set_rle;
-               } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
-                       Offtype = set_repeat;
-               } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog - 1)))) {
-                       FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, workspace, workspaceSize);
-                       Offtype = set_basic;
-               } else {
-                       size_t nbSeq_1 = nbSeq;
-                       const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
-                       if (count[ofCodeTable[nbSeq - 1]] > 1) {
-                               count[ofCodeTable[nbSeq - 1]]--;
-                               nbSeq_1--;
-                       }
-                       FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
-                       {
-                               size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
-                               if (FSE_isError(NCountSize))
-                                       return NCountSize;
-                               op += NCountSize;
-                       }
-                       FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, workspace, workspaceSize);
-                       Offtype = set_compressed;
-               }
-       }
-
-       /* CTable for MatchLengths */
-       {
-               U32 max = MaxML;
-               size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace);
-               if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
-                       *op++ = *mlCodeTable;
-                       FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
-                       MLtype = set_rle;
-               } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
-                       MLtype = set_repeat;
-               } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog - 1)))) {
-                       FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, workspace, workspaceSize);
-                       MLtype = set_basic;
-               } else {
-                       size_t nbSeq_1 = nbSeq;
-                       const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
-                       if (count[mlCodeTable[nbSeq - 1]] > 1) {
-                               count[mlCodeTable[nbSeq - 1]]--;
-                               nbSeq_1--;
-                       }
-                       FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
-                       {
-                               size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
-                               if (FSE_isError(NCountSize))
-                                       return NCountSize;
-                               op += NCountSize;
-                       }
-                       FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, workspace, workspaceSize);
-                       MLtype = set_compressed;
-               }
-       }
-
-       *seqHead = (BYTE)((LLtype << 6) + (Offtype << 4) + (MLtype << 2));
-       zc->flagStaticTables = 0;
-
-       /* Encoding Sequences */
-       {
-               BIT_CStream_t blockStream;
-               FSE_CState_t stateMatchLength;
-               FSE_CState_t stateOffsetBits;
-               FSE_CState_t stateLitLength;
-
-               CHECK_E(BIT_initCStream(&blockStream, op, oend - op), dstSize_tooSmall); /* not enough space remaining */
-
-               /* first symbols */
-               FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq - 1]);
-               FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq - 1]);
-               FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq - 1]);
-               BIT_addBits(&blockStream, sequences[nbSeq - 1].litLength, LL_bits[llCodeTable[nbSeq - 1]]);
-               if (ZSTD_32bits())
-                       BIT_flushBits(&blockStream);
-               BIT_addBits(&blockStream, sequences[nbSeq - 1].matchLength, ML_bits[mlCodeTable[nbSeq - 1]]);
-               if (ZSTD_32bits())
-                       BIT_flushBits(&blockStream);
-               if (longOffsets) {
-                       U32 const ofBits = ofCodeTable[nbSeq - 1];
-                       int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN - 1);
-                       if (extraBits) {
-                               BIT_addBits(&blockStream, sequences[nbSeq - 1].offset, extraBits);
-                               BIT_flushBits(&blockStream);
-                       }
-                       BIT_addBits(&blockStream, sequences[nbSeq - 1].offset >> extraBits, ofBits - extraBits);
-               } else {
-                       BIT_addBits(&blockStream, sequences[nbSeq - 1].offset, ofCodeTable[nbSeq - 1]);
-               }
-               BIT_flushBits(&blockStream);
-
-               {
-                       size_t n;
-                       for (n = nbSeq - 2; n < nbSeq; n--) { /* intentional underflow */
-                               BYTE const llCode = llCodeTable[n];
-                               BYTE const ofCode = ofCodeTable[n];
-                               BYTE const mlCode = mlCodeTable[n];
-                               U32 const llBits = LL_bits[llCode];
-                               U32 const ofBits = ofCode; /* 32b*/ /* 64b*/
-                               U32 const mlBits = ML_bits[mlCode];
-                               /* (7)*/                                                            /* (7)*/
-                               FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */  /* 15 */
-                               FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
-                               if (ZSTD_32bits())
-                                       BIT_flushBits(&blockStream);                              /* (7)*/
-                               FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
-                               if (ZSTD_32bits() || (ofBits + mlBits + llBits >= 64 - 7 - (LLFSELog + MLFSELog + OffFSELog)))
-                                       BIT_flushBits(&blockStream); /* (7)*/
-                               BIT_addBits(&blockStream, sequences[n].litLength, llBits);
-                               if (ZSTD_32bits() && ((llBits + mlBits) > 24))
-                                       BIT_flushBits(&blockStream);
-                               BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
-                               if (ZSTD_32bits())
-                                       BIT_flushBits(&blockStream); /* (7)*/
-                               if (longOffsets) {
-                                       int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN - 1);
-                                       if (extraBits) {
-                                               BIT_addBits(&blockStream, sequences[n].offset, extraBits);
-                                               BIT_flushBits(&blockStream); /* (7)*/
-                                       }
-                                       BIT_addBits(&blockStream, sequences[n].offset >> extraBits, ofBits - extraBits); /* 31 */
-                               } else {
-                                       BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
-                               }
-                               BIT_flushBits(&blockStream); /* (7)*/
-                       }
-               }
-
-               FSE_flushCState(&blockStream, &stateMatchLength);
-               FSE_flushCState(&blockStream, &stateOffsetBits);
-               FSE_flushCState(&blockStream, &stateLitLength);
-
-               {
-                       size_t const streamSize = BIT_closeCStream(&blockStream);
-                       if (streamSize == 0)
-                               return ERROR(dstSize_tooSmall); /* not enough space */
-                       op += streamSize;
-               }
-       }
-       return op - ostart;
-}
-
-ZSTD_STATIC size_t ZSTD_compressSequences(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, size_t srcSize)
-{
-       size_t const cSize = ZSTD_compressSequences_internal(zc, dst, dstCapacity);
-       size_t const minGain = ZSTD_minGain(srcSize);
-       size_t const maxCSize = srcSize - minGain;
-       /* If the srcSize <= dstCapacity, then there is enough space to write a
-        * raw uncompressed block. Since we ran out of space, the block must not
-        * be compressible, so fall back to a raw uncompressed block.
-        */
-       int const uncompressibleError = cSize == ERROR(dstSize_tooSmall) && srcSize <= dstCapacity;
-       int i;
-
-       if (ZSTD_isError(cSize) && !uncompressibleError)
-               return cSize;
-       if (cSize >= maxCSize || uncompressibleError) {
-               zc->flagStaticHufTable = HUF_repeat_none;
-               return 0;
-       }
-       /* confirm repcodes */
-       for (i = 0; i < ZSTD_REP_NUM; i++)
-               zc->rep[i] = zc->repToConfirm[i];
-       return cSize;
-}
-
-/*! ZSTD_storeSeq() :
-       Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
-       `offsetCode` : distance to match, or 0 == repCode.
-       `matchCode` : matchLength - MINMATCH
-*/
-ZSTD_STATIC void ZSTD_storeSeq(seqStore_t *seqStorePtr, size_t litLength, const void *literals, U32 offsetCode, size_t matchCode)
-{
-       /* copy Literals */
-       ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
-       seqStorePtr->lit += litLength;
-
-       /* literal Length */
-       if (litLength > 0xFFFF) {
-               seqStorePtr->longLengthID = 1;
-               seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-       }
-       seqStorePtr->sequences[0].litLength = (U16)litLength;
-
-       /* match offset */
-       seqStorePtr->sequences[0].offset = offsetCode + 1;
-
-       /* match Length */
-       if (matchCode > 0xFFFF) {
-               seqStorePtr->longLengthID = 2;
-               seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-       }
-       seqStorePtr->sequences[0].matchLength = (U16)matchCode;
-
-       seqStorePtr->sequences++;
-}
-
-/*-*************************************
-*  Match length counter
-***************************************/
-static unsigned ZSTD_NbCommonBytes(register size_t val)
-{
-       if (ZSTD_isLittleEndian()) {
-               if (ZSTD_64bits()) {
-                       return (__builtin_ctzll((U64)val) >> 3);
-               } else { /* 32 bits */
-                       return (__builtin_ctz((U32)val) >> 3);
-               }
-       } else { /* Big Endian CPU */
-               if (ZSTD_64bits()) {
-                       return (__builtin_clzll(val) >> 3);
-               } else { /* 32 bits */
-                       return (__builtin_clz((U32)val) >> 3);
-               }
-       }
-}
-
-static size_t ZSTD_count(const BYTE *pIn, const BYTE *pMatch, const BYTE *const pInLimit)
-{
-       const BYTE *const pStart = pIn;
-       const BYTE *const pInLoopLimit = pInLimit - (sizeof(size_t) - 1);
-
-       while (pIn < pInLoopLimit) {
-               size_t const diff = ZSTD_readST(pMatch) ^ ZSTD_readST(pIn);
-               if (!diff) {
-                       pIn += sizeof(size_t);
-                       pMatch += sizeof(size_t);
-                       continue;
-               }
-               pIn += ZSTD_NbCommonBytes(diff);
-               return (size_t)(pIn - pStart);
-       }
-       if (ZSTD_64bits())
-               if ((pIn < (pInLimit - 3)) && (ZSTD_read32(pMatch) == ZSTD_read32(pIn))) {
-                       pIn += 4;
-                       pMatch += 4;
-               }
-       if ((pIn < (pInLimit - 1)) && (ZSTD_read16(pMatch) == ZSTD_read16(pIn))) {
-               pIn += 2;
-               pMatch += 2;
-       }
-       if ((pIn < pInLimit) && (*pMatch == *pIn))
-               pIn++;
-       return (size_t)(pIn - pStart);
-}
-
-/** ZSTD_count_2segments() :
-*   can count match length with `ip` & `match` in 2 different segments.
-*   convention : on reaching mEnd, match count continue starting from iStart
-*/
-static size_t ZSTD_count_2segments(const BYTE *ip, const BYTE *match, const BYTE *iEnd, const BYTE *mEnd, const BYTE *iStart)
-{
-       const BYTE *const vEnd = MIN(ip + (mEnd - match), iEnd);
-       size_t const matchLength = ZSTD_count(ip, match, vEnd);
-       if (match + matchLength != mEnd)
-               return matchLength;
-       return matchLength + ZSTD_count(ip + matchLength, iStart, iEnd);
-}
-
-/*-*************************************
-*  Hashes
-***************************************/
-static const U32 prime3bytes = 506832829U;
-static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32 - 24)) * prime3bytes) >> (32 - h); }
-ZSTD_STATIC size_t ZSTD_hash3Ptr(const void *ptr, U32 h) { return ZSTD_hash3(ZSTD_readLE32(ptr), h); } /* only in zstd_opt.h */
-
-static const U32 prime4bytes = 2654435761U;
-static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32 - h); }
-static size_t ZSTD_hash4Ptr(const void *ptr, U32 h) { return ZSTD_hash4(ZSTD_read32(ptr), h); }
-
-static const U64 prime5bytes = 889523592379ULL;
-static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64 - 40)) * prime5bytes) >> (64 - h)); }
-static size_t ZSTD_hash5Ptr(const void *p, U32 h) { return ZSTD_hash5(ZSTD_readLE64(p), h); }
-
-static const U64 prime6bytes = 227718039650203ULL;
-static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64 - 48)) * prime6bytes) >> (64 - h)); }
-static size_t ZSTD_hash6Ptr(const void *p, U32 h) { return ZSTD_hash6(ZSTD_readLE64(p), h); }
-
-static const U64 prime7bytes = 58295818150454627ULL;
-static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64 - 56)) * prime7bytes) >> (64 - h)); }
-static size_t ZSTD_hash7Ptr(const void *p, U32 h) { return ZSTD_hash7(ZSTD_readLE64(p), h); }
-
-static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
-static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u)*prime8bytes) >> (64 - h)); }
-static size_t ZSTD_hash8Ptr(const void *p, U32 h) { return ZSTD_hash8(ZSTD_readLE64(p), h); }
-
-static size_t ZSTD_hashPtr(const void *p, U32 hBits, U32 mls)
-{
-       switch (mls) {
-       // case 3: return ZSTD_hash3Ptr(p, hBits);
-       default:
-       case 4: return ZSTD_hash4Ptr(p, hBits);
-       case 5: return ZSTD_hash5Ptr(p, hBits);
-       case 6: return ZSTD_hash6Ptr(p, hBits);
-       case 7: return ZSTD_hash7Ptr(p, hBits);
-       case 8: return ZSTD_hash8Ptr(p, hBits);
-       }
-}
-
-/*-*************************************
-*  Fast Scan
-***************************************/
-static void ZSTD_fillHashTable(ZSTD_CCtx *zc, const void *end, const U32 mls)
-{
-       U32 *const hashTable = zc->hashTable;
-       U32 const hBits = zc->params.cParams.hashLog;
-       const BYTE *const base = zc->base;
-       const BYTE *ip = base + zc->nextToUpdate;
-       const BYTE *const iend = ((const BYTE *)end) - HASH_READ_SIZE;
-       const size_t fastHashFillStep = 3;
-
-       while (ip <= iend) {
-               hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
-               ip += fastHashFillStep;
-       }
-}
-
-FORCE_INLINE
-void ZSTD_compressBlock_fast_generic(ZSTD_CCtx *cctx, const void *src, size_t srcSize, const U32 mls)
-{
-       U32 *const hashTable = cctx->hashTable;
-       U32 const hBits = cctx->params.cParams.hashLog;
-       seqStore_t *seqStorePtr = &(cctx->seqStore);
-       const BYTE *const base = cctx->base;
-       const BYTE *const istart = (const BYTE *)src;
-       const BYTE *ip = istart;
-       const BYTE *anchor = istart;
-       const U32 lowestIndex = cctx->dictLimit;
-       const BYTE *const lowest = base + lowestIndex;
-       const BYTE *const iend = istart + srcSize;
-       const BYTE *const ilimit = iend - HASH_READ_SIZE;
-       U32 offset_1 = cctx->rep[0], offset_2 = cctx->rep[1];
-       U32 offsetSaved = 0;
-
-       /* init */
-       ip += (ip == lowest);
-       {
-               U32 const maxRep = (U32)(ip - lowest);
-               if (offset_2 > maxRep)
-                       offsetSaved = offset_2, offset_2 = 0;
-               if (offset_1 > maxRep)
-                       offsetSaved = offset_1, offset_1 = 0;
-       }
-
-       /* Main Search Loop */
-       while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
-               size_t mLength;
-               size_t const h = ZSTD_hashPtr(ip, hBits, mls);
-               U32 const curr = (U32)(ip - base);
-               U32 const matchIndex = hashTable[h];
-               const BYTE *match = base + matchIndex;
-               hashTable[h] = curr; /* update hash table */
-
-               if ((offset_1 > 0) & (ZSTD_read32(ip + 1 - offset_1) == ZSTD_read32(ip + 1))) {
-                       mLength = ZSTD_count(ip + 1 + 4, ip + 1 + 4 - offset_1, iend) + 4;
-                       ip++;
-                       ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH);
-               } else {
-                       U32 offset;
-                       if ((matchIndex <= lowestIndex) || (ZSTD_read32(match) != ZSTD_read32(ip))) {
-                               ip += ((ip - anchor) >> g_searchStrength) + 1;
-                               continue;
-                       }
-                       mLength = ZSTD_count(ip + 4, match + 4, iend) + 4;
-                       offset = (U32)(ip - match);
-                       while (((ip > anchor) & (match > lowest)) && (ip[-1] == match[-1])) {
-                               ip--;
-                               match--;
-                               mLength++;
-                       } /* catch up */
-                       offset_2 = offset_1;
-                       offset_1 = offset;
-
-                       ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-               }
-
-               /* match found */
-               ip += mLength;
-               anchor = ip;
-
-               if (ip <= ilimit) {
-                       /* Fill Table */
-                       hashTable[ZSTD_hashPtr(base + curr + 2, hBits, mls)] = curr + 2; /* here because curr+2 could be > iend-8 */
-                       hashTable[ZSTD_hashPtr(ip - 2, hBits, mls)] = (U32)(ip - 2 - base);
-                       /* check immediate repcode */
-                       while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) {
-                               /* store sequence */
-                               size_t const rLength = ZSTD_count(ip + 4, ip + 4 - offset_2, iend) + 4;
-                               {
-                                       U32 const tmpOff = offset_2;
-                                       offset_2 = offset_1;
-                                       offset_1 = tmpOff;
-                               } /* swap offset_2 <=> offset_1 */
-                               hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
-                               ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength - MINMATCH);
-                               ip += rLength;
-                               anchor = ip;
-                               continue; /* faster when present ... (?) */
-                       }
-               }
-       }
-
-       /* save reps for next block */
-       cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
-       cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
-
-       /* Last Literals */
-       {
-               size_t const lastLLSize = iend - anchor;
-               memcpy(seqStorePtr->lit, anchor, lastLLSize);
-               seqStorePtr->lit += lastLLSize;
-       }
-}
-
-static void ZSTD_compressBlock_fast(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-       const U32 mls = ctx->params.cParams.searchLength;
-       switch (mls) {
-       default: /* includes case 3 */
-       case 4: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return;
-       case 5: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return;
-       case 6: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return;
-       case 7: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return;
-       }
-}
-
-static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 mls)
-{
-       U32 *hashTable = ctx->hashTable;
-       const U32 hBits = ctx->params.cParams.hashLog;
-       seqStore_t *seqStorePtr = &(ctx->seqStore);
-       const BYTE *const base = ctx->base;
-       const BYTE *const dictBase = ctx->dictBase;
-       const BYTE *const istart = (const BYTE *)src;
-       const BYTE *ip = istart;
-       const BYTE *anchor = istart;
-       const U32 lowestIndex = ctx->lowLimit;
-       const BYTE *const dictStart = dictBase + lowestIndex;
-       const U32 dictLimit = ctx->dictLimit;
-       const BYTE *const lowPrefixPtr = base + dictLimit;
-       const BYTE *const dictEnd = dictBase + dictLimit;
-       const BYTE *const iend = istart + srcSize;
-       const BYTE *const ilimit = iend - 8;
-       U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
-
-       /* Search Loop */
-       while (ip < ilimit) { /* < instead of <=, because (ip+1) */
-               const size_t h = ZSTD_hashPtr(ip, hBits, mls);
-               const U32 matchIndex = hashTable[h];
-               const BYTE *matchBase = matchIndex < dictLimit ? dictBase : base;
-               const BYTE *match = matchBase + matchIndex;
-               const U32 curr = (U32)(ip - base);
-               const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
-               const BYTE *repBase = repIndex < dictLimit ? dictBase : base;
-               const BYTE *repMatch = repBase + repIndex;
-               size_t mLength;
-               hashTable[h] = curr; /* update hash table */
-
-               if ((((U32)((dictLimit - 1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) &&
-                   (ZSTD_read32(repMatch) == ZSTD_read32(ip + 1))) {
-                       const BYTE *repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
-                       mLength = ZSTD_count_2segments(ip + 1 + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32;
-                       ip++;
-                       ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH);
-               } else {
-                       if ((matchIndex < lowestIndex) || (ZSTD_read32(match) != ZSTD_read32(ip))) {
-                               ip += ((ip - anchor) >> g_searchStrength) + 1;
-                               continue;
-                       }
-                       {
-                               const BYTE *matchEnd = matchIndex < dictLimit ? dictEnd : iend;
-                               const BYTE *lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
-                               U32 offset;
-                               mLength = ZSTD_count_2segments(ip + EQUAL_READ32, match + EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32;
-                               while (((ip > anchor) & (match > lowMatchPtr)) && (ip[-1] == match[-1])) {
-                                       ip--;
-                                       match--;
-                                       mLength++;
-                               } /* catch up */
-                               offset = curr - matchIndex;
-                               offset_2 = offset_1;
-                               offset_1 = offset;
-                               ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-                       }
-               }
-
-               /* found a match : store it */
-               ip += mLength;
-               anchor = ip;
-
-               if (ip <= ilimit) {
-                       /* Fill Table */
-                       hashTable[ZSTD_hashPtr(base + curr + 2, hBits, mls)] = curr + 2;
-                       hashTable[ZSTD_hashPtr(ip - 2, hBits, mls)] = (U32)(ip - 2 - base);
-                       /* check immediate repcode */
-                       while (ip <= ilimit) {
-                               U32 const curr2 = (U32)(ip - base);
-                               U32 const repIndex2 = curr2 - offset_2;
-                               const BYTE *repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
-                               if ((((U32)((dictLimit - 1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
-                                   && (ZSTD_read32(repMatch2) == ZSTD_read32(ip))) {
-                                       const BYTE *const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
-                                       size_t repLength2 =
-                                           ZSTD_count_2segments(ip + EQUAL_READ32, repMatch2 + EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
-                                       U32 tmpOffset = offset_2;
-                                       offset_2 = offset_1;
-                                       offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
-                                       ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2 - MINMATCH);
-                                       hashTable[ZSTD_hashPtr(ip, hBits, mls)] = curr2;
-                                       ip += repLength2;
-                                       anchor = ip;
-                                       continue;
-                               }
-                               break;
-                       }
-               }
-       }
-
-       /* save reps for next block */
-       ctx->repToConfirm[0] = offset_1;
-       ctx->repToConfirm[1] = offset_2;
-
-       /* Last Literals */
-       {
-               size_t const lastLLSize = iend - anchor;
-               memcpy(seqStorePtr->lit, anchor, lastLLSize);
-               seqStorePtr->lit += lastLLSize;
-       }
-}
-
-static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-       U32 const mls = ctx->params.cParams.searchLength;
-       switch (mls) {
-       default: /* includes case 3 */
-       case 4: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return;
-       case 5: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return;
-       case 6: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return;
-       case 7: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return;
-       }
-}
-
-/*-*************************************
-*  Double Fast
-***************************************/
-static void ZSTD_fillDoubleHashTable(ZSTD_CCtx *cctx, const void *end, const U32 mls)
-{
-       U32 *const hashLarge = cctx->hashTable;
-       U32 const hBitsL = cctx->params.cParams.hashLog;
-       U32 *const hashSmall = cctx->chainTable;
-       U32 const hBitsS = cctx->params.cParams.chainLog;
-       const BYTE *const base = cctx->base;
-       const BYTE *ip = base + cctx->nextToUpdate;
-       const BYTE *const iend = ((const BYTE *)end) - HASH_READ_SIZE;
-       const size_t fastHashFillStep = 3;
-
-       while (ip <= iend) {
-               hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
-               hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
-               ip += fastHashFillStep;
-       }
-}
-
-FORCE_INLINE
-void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx *cctx, const void *src, size_t srcSize, const U32 mls)
-{
-       U32 *const hashLong = cctx->hashTable;
-       const U32 hBitsL = cctx->params.cParams.hashLog;
-       U32 *const hashSmall = cctx->chainTable;
-       const U32 hBitsS = cctx->params.cParams.chainLog;
-       seqStore_t *seqStorePtr = &(cctx->seqStore);
-       const BYTE *const base = cctx->base;
-       const BYTE *const istart = (const BYTE *)src;
-       const BYTE *ip = istart;
-       const BYTE *anchor = istart;
-       const U32 lowestIndex = cctx->dictLimit;
-       const BYTE *const lowest = base + lowestIndex;
-       const BYTE *const iend = istart + srcSize;
-       const BYTE *const ilimit = iend - HASH_READ_SIZE;
-       U32 offset_1 = cctx->rep[0], offset_2 = cctx->rep[1];
-       U32 offsetSaved = 0;
-
-       /* init */
-       ip += (ip == lowest);
-       {
-               U32 const maxRep = (U32)(ip - lowest);
-               if (offset_2 > maxRep)
-                       offsetSaved = offset_2, offset_2 = 0;
-               if (offset_1 > maxRep)
-                       offsetSaved = offset_1, offset_1 = 0;
-       }
-
-       /* Main Search Loop */
-       while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
-               size_t mLength;
-               size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
-               size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
-               U32 const curr = (U32)(ip - base);
-               U32 const matchIndexL = hashLong[h2];
-               U32 const matchIndexS = hashSmall[h];
-               const BYTE *matchLong = base + matchIndexL;
-               const BYTE *match = base + matchIndexS;
-               hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
-
-               if ((offset_1 > 0) & (ZSTD_read32(ip + 1 - offset_1) == ZSTD_read32(ip + 1))) { /* note : by construction, offset_1 <= curr */
-                       mLength = ZSTD_count(ip + 1 + 4, ip + 1 + 4 - offset_1, iend) + 4;
-                       ip++;
-                       ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH);
-               } else {
-                       U32 offset;
-                       if ((matchIndexL > lowestIndex) && (ZSTD_read64(matchLong) == ZSTD_read64(ip))) {
-                               mLength = ZSTD_count(ip + 8, matchLong + 8, iend) + 8;
-                               offset = (U32)(ip - matchLong);
-                               while (((ip > anchor) & (matchLong > lowest)) && (ip[-1] == matchLong[-1])) {
-                                       ip--;
-                                       matchLong--;
-                                       mLength++;
-                               } /* catch up */
-                       } else if ((matchIndexS > lowestIndex) && (ZSTD_read32(match) == ZSTD_read32(ip))) {
-                               size_t const h3 = ZSTD_hashPtr(ip + 1, hBitsL, 8);
-                               U32 const matchIndex3 = hashLong[h3];
-                               const BYTE *match3 = base + matchIndex3;
-                               hashLong[h3] = curr + 1;
-                               if ((matchIndex3 > lowestIndex) && (ZSTD_read64(match3) == ZSTD_read64(ip + 1))) {
-                                       mLength = ZSTD_count(ip + 9, match3 + 8, iend) + 8;
-                                       ip++;
-                                       offset = (U32)(ip - match3);
-                                       while (((ip > anchor) & (match3 > lowest)) && (ip[-1] == match3[-1])) {
-                                               ip--;
-                                               match3--;
-                                               mLength++;
-                                       } /* catch up */
-                               } else {
-                                       mLength = ZSTD_count(ip + 4, match + 4, iend) + 4;
-                                       offset = (U32)(ip - match);
-                                       while (((ip > anchor) & (match > lowest)) && (ip[-1] == match[-1])) {
-                                               ip--;
-                                               match--;
-                                               mLength++;
-                                       } /* catch up */
-                               }
-                       } else {
-                               ip += ((ip - anchor) >> g_searchStrength) + 1;
-                               continue;
-                       }
-
-                       offset_2 = offset_1;
-                       offset_1 = offset;
-
-                       ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-               }
-
-               /* match found */
-               ip += mLength;
-               anchor = ip;
-
-               if (ip <= ilimit) {
-                       /* Fill Table */
-                       hashLong[ZSTD_hashPtr(base + curr + 2, hBitsL, 8)] = hashSmall[ZSTD_hashPtr(base + curr + 2, hBitsS, mls)] =
-                           curr + 2; /* here because curr+2 could be > iend-8 */
-                       hashLong[ZSTD_hashPtr(ip - 2, hBitsL, 8)] = hashSmall[ZSTD_hashPtr(ip - 2, hBitsS, mls)] = (U32)(ip - 2 - base);
-
-                       /* check immediate repcode */
-                       while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) {
-                               /* store sequence */
-                               size_t const rLength = ZSTD_count(ip + 4, ip + 4 - offset_2, iend) + 4;
-                               {
-                                       U32 const tmpOff = offset_2;
-                                       offset_2 = offset_1;
-                                       offset_1 = tmpOff;
-                               } /* swap offset_2 <=> offset_1 */
-                               hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
-                               hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
-                               ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength - MINMATCH);
-                               ip += rLength;
-                               anchor = ip;
-                               continue; /* faster when present ... (?) */
-                       }
-               }
-       }
-
-       /* save reps for next block */
-       cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved;
-       cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved;
-
-       /* Last Literals */
-       {
-               size_t const lastLLSize = iend - anchor;
-               memcpy(seqStorePtr->lit, anchor, lastLLSize);
-               seqStorePtr->lit += lastLLSize;
-       }
-}
-
-static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-       const U32 mls = ctx->params.cParams.searchLength;
-       switch (mls) {
-       default: /* includes case 3 */
-       case 4: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return;
-       case 5: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return;
-       case 6: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return;
-       case 7: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return;
-       }
-}
-
-static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 mls)
-{
-       U32 *const hashLong = ctx->hashTable;
-       U32 const hBitsL = ctx->params.cParams.hashLog;
-       U32 *const hashSmall = ctx->chainTable;
-       U32 const hBitsS = ctx->params.cParams.chainLog;
-       seqStore_t *seqStorePtr = &(ctx->seqStore);
-       const BYTE *const base = ctx->base;
-       const BYTE *const dictBase = ctx->dictBase;
-       const BYTE *const istart = (const BYTE *)src;
-       const BYTE *ip = istart;
-       const BYTE *anchor = istart;
-       const U32 lowestIndex = ctx->lowLimit;
-       const BYTE *const dictStart = dictBase + lowestIndex;
-       const U32 dictLimit = ctx->dictLimit;
-       const BYTE *const lowPrefixPtr = base + dictLimit;
-       const BYTE *const dictEnd = dictBase + dictLimit;
-       const BYTE *const iend = istart + srcSize;
-       const BYTE *const ilimit = iend - 8;
-       U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
-
-       /* Search Loop */
-       while (ip < ilimit) { /* < instead of <=, because (ip+1) */
-               const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
-               const U32 matchIndex = hashSmall[hSmall];
-               const BYTE *matchBase = matchIndex < dictLimit ? dictBase : base;
-               const BYTE *match = matchBase + matchIndex;
-
-               const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
-               const U32 matchLongIndex = hashLong[hLong];
-               const BYTE *matchLongBase = matchLongIndex < dictLimit ? dictBase : base;
-               const BYTE *matchLong = matchLongBase + matchLongIndex;
-
-               const U32 curr = (U32)(ip - base);
-               const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
-               const BYTE *repBase = repIndex < dictLimit ? dictBase : base;
-               const BYTE *repMatch = repBase + repIndex;
-               size_t mLength;
-               hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
-
-               if ((((U32)((dictLimit - 1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) &&
-                   (ZSTD_read32(repMatch) == ZSTD_read32(ip + 1))) {
-                       const BYTE *repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
-                       mLength = ZSTD_count_2segments(ip + 1 + 4, repMatch + 4, iend, repMatchEnd, lowPrefixPtr) + 4;
-                       ip++;
-                       ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH);
-               } else {
-                       if ((matchLongIndex > lowestIndex) && (ZSTD_read64(matchLong) == ZSTD_read64(ip))) {
-                               const BYTE *matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
-                               const BYTE *lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr;
-                               U32 offset;
-                               mLength = ZSTD_count_2segments(ip + 8, matchLong + 8, iend, matchEnd, lowPrefixPtr) + 8;
-                               offset = curr - matchLongIndex;
-                               while (((ip > anchor) & (matchLong > lowMatchPtr)) && (ip[-1] == matchLong[-1])) {
-                                       ip--;
-                                       matchLong--;
-                                       mLength++;
-                               } /* catch up */
-                               offset_2 = offset_1;
-                               offset_1 = offset;
-                               ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-
-                       } else if ((matchIndex > lowestIndex) && (ZSTD_read32(match) == ZSTD_read32(ip))) {
-                               size_t const h3 = ZSTD_hashPtr(ip + 1, hBitsL, 8);
-                               U32 const matchIndex3 = hashLong[h3];
-                               const BYTE *const match3Base = matchIndex3 < dictLimit ? dictBase : base;
-                               const BYTE *match3 = match3Base + matchIndex3;
-                               U32 offset;
-                               hashLong[h3] = curr + 1;
-                               if ((matchIndex3 > lowestIndex) && (ZSTD_read64(match3) == ZSTD_read64(ip + 1))) {
-                                       const BYTE *matchEnd = matchIndex3 < dictLimit ? dictEnd : iend;
-                                       const BYTE *lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr;
-                                       mLength = ZSTD_count_2segments(ip + 9, match3 + 8, iend, matchEnd, lowPrefixPtr) + 8;
-                                       ip++;
-                                       offset = curr + 1 - matchIndex3;
-                                       while (((ip > anchor) & (match3 > lowMatchPtr)) && (ip[-1] == match3[-1])) {
-                                               ip--;
-                                               match3--;
-                                               mLength++;
-                                       } /* catch up */
-                               } else {
-                                       const BYTE *matchEnd = matchIndex < dictLimit ? dictEnd : iend;
-                                       const BYTE *lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
-                                       mLength = ZSTD_count_2segments(ip + 4, match + 4, iend, matchEnd, lowPrefixPtr) + 4;
-                                       offset = curr - matchIndex;
-                                       while (((ip > anchor) & (match > lowMatchPtr)) && (ip[-1] == match[-1])) {
-                                               ip--;
-                                               match--;
-                                               mLength++;
-                                       } /* catch up */
-                               }
-                               offset_2 = offset_1;
-                               offset_1 = offset;
-                               ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH);
-
-                       } else {
-                               ip += ((ip - anchor) >> g_searchStrength) + 1;
-                               continue;
-                       }
-               }
-
-               /* found a match : store it */
-               ip += mLength;
-               anchor = ip;
-
-               if (ip <= ilimit) {
-                       /* Fill Table */
-                       hashSmall[ZSTD_hashPtr(base + curr + 2, hBitsS, mls)] = curr + 2;
-                       hashLong[ZSTD_hashPtr(base + curr + 2, hBitsL, 8)] = curr + 2;
-                       hashSmall[ZSTD_hashPtr(ip - 2, hBitsS, mls)] = (U32)(ip - 2 - base);
-                       hashLong[ZSTD_hashPtr(ip - 2, hBitsL, 8)] = (U32)(ip - 2 - base);
-                       /* check immediate repcode */
-                       while (ip <= ilimit) {
-                               U32 const curr2 = (U32)(ip - base);
-                               U32 const repIndex2 = curr2 - offset_2;
-                               const BYTE *repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
-                               if ((((U32)((dictLimit - 1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
-                                   && (ZSTD_read32(repMatch2) == ZSTD_read32(ip))) {
-                                       const BYTE *const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
-                                       size_t const repLength2 =
-                                           ZSTD_count_2segments(ip + EQUAL_READ32, repMatch2 + EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
-                                       U32 tmpOffset = offset_2;
-                                       offset_2 = offset_1;
-                                       offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
-                                       ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2 - MINMATCH);
-                                       hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = curr2;
-                                       hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = curr2;
-                                       ip += repLength2;
-                                       anchor = ip;
-                                       continue;
-                               }
-                               break;
-                       }
-               }
-       }
-
-       /* save reps for next block */
-       ctx->repToConfirm[0] = offset_1;
-       ctx->repToConfirm[1] = offset_2;
-
-       /* Last Literals */
-       {
-               size_t const lastLLSize = iend - anchor;
-               memcpy(seqStorePtr->lit, anchor, lastLLSize);
-               seqStorePtr->lit += lastLLSize;
-       }
-}
-
-static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-       U32 const mls = ctx->params.cParams.searchLength;
-       switch (mls) {
-       default: /* includes case 3 */
-       case 4: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return;
-       case 5: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return;
-       case 6: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return;
-       case 7: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return;
-       }
-}
-
-/*-*************************************
-*  Binary Tree search
-***************************************/
-/** ZSTD_insertBt1() : add one or multiple positions to tree.
-*   ip : assumed <= iend-8 .
-*   @return : nb of positions added */
-static U32 ZSTD_insertBt1(ZSTD_CCtx *zc, const BYTE *const ip, const U32 mls, const BYTE *const iend, U32 nbCompares, U32 extDict)
-{
-       U32 *const hashTable = zc->hashTable;
-       U32 const hashLog = zc->params.cParams.hashLog;
-       size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
-       U32 *const bt = zc->chainTable;
-       U32 const btLog = zc->params.cParams.chainLog - 1;
-       U32 const btMask = (1 << btLog) - 1;
-       U32 matchIndex = hashTable[h];
-       size_t commonLengthSmaller = 0, commonLengthLarger = 0;
-       const BYTE *const base = zc->base;
-       const BYTE *const dictBase = zc->dictBase;
-       const U32 dictLimit = zc->dictLimit;
-       const BYTE *const dictEnd = dictBase + dictLimit;
-       const BYTE *const prefixStart = base + dictLimit;
-       const BYTE *match;
-       const U32 curr = (U32)(ip - base);
-       const U32 btLow = btMask >= curr ? 0 : curr - btMask;
-       U32 *smallerPtr = bt + 2 * (curr & btMask);
-       U32 *largerPtr = smallerPtr + 1;
-       U32 dummy32; /* to be nullified at the end */
-       U32 const windowLow = zc->lowLimit;
-       U32 matchEndIdx = curr + 8;
-       size_t bestLength = 8;
-
-       hashTable[h] = curr; /* Update Hash Table */
-
-       while (nbCompares-- && (matchIndex > windowLow)) {
-               U32 *const nextPtr = bt + 2 * (matchIndex & btMask);
-               size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
-
-               if ((!extDict) || (matchIndex + matchLength >= dictLimit)) {
-                       match = base + matchIndex;
-                       if (match[matchLength] == ip[matchLength])
-                               matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iend) + 1;
-               } else {
-                       match = dictBase + matchIndex;
-                       matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iend, dictEnd, prefixStart);
-                       if (matchIndex + matchLength >= dictLimit)
-                               match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
-               }
-
-               if (matchLength > bestLength) {
-                       bestLength = matchLength;
-                       if (matchLength > matchEndIdx - matchIndex)
-                               matchEndIdx = matchIndex + (U32)matchLength;
-               }
-
-               if (ip + matchLength == iend) /* equal : no way to know if inf or sup */
-                       break;                /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
-
-               if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */
-                       /* match is smaller than curr */
-                       *smallerPtr = matchIndex;         /* update smaller idx */
-                       commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
-                       if (matchIndex <= btLow) {
-                               smallerPtr = &dummy32;
-                               break;
-                       }                         /* beyond tree size, stop the search */
-                       smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */
-                       matchIndex = nextPtr[1];  /* new matchIndex larger than previous (closer to curr) */
-               } else {
-                       /* match is larger than curr */
-                       *largerPtr = matchIndex;
-                       commonLengthLarger = matchLength;
-                       if (matchIndex <= btLow) {
-                               largerPtr = &dummy32;
-                               break;
-                       } /* beyond tree size, stop the search */
-                       largerPtr = nextPtr;
-                       matchIndex = nextPtr[0];
-               }
-       }
-
-       *smallerPtr = *largerPtr = 0;
-       if (bestLength > 384)
-               return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
-       if (matchEndIdx > curr + 8)
-               return matchEndIdx - curr - 8;
-       return 1;
-}
-
-static size_t ZSTD_insertBtAndFindBestMatch(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, size_t *offsetPtr, U32 nbCompares, const U32 mls,
-                                           U32 extDict)
-{
-       U32 *const hashTable = zc->hashTable;
-       U32 const hashLog = zc->params.cParams.hashLog;
-       size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
-       U32 *const bt = zc->chainTable;
-       U32 const btLog = zc->params.cParams.chainLog - 1;
-       U32 const btMask = (1 << btLog) - 1;
-       U32 matchIndex = hashTable[h];
-       size_t commonLengthSmaller = 0, commonLengthLarger = 0;
-       const BYTE *const base = zc->base;
-       const BYTE *const dictBase = zc->dictBase;
-       const U32 dictLimit = zc->dictLimit;
-       const BYTE *const dictEnd = dictBase + dictLimit;
-       const BYTE *const prefixStart = base + dictLimit;
-       const U32 curr = (U32)(ip - base);
-       const U32 btLow = btMask >= curr ? 0 : curr - btMask;
-       const U32 windowLow = zc->lowLimit;
-       U32 *smallerPtr = bt + 2 * (curr & btMask);
-       U32 *largerPtr = bt + 2 * (curr & btMask) + 1;
-       U32 matchEndIdx = curr + 8;
-       U32 dummy32; /* to be nullified at the end */
-       size_t bestLength = 0;
-
-       hashTable[h] = curr; /* Update Hash Table */
-
-       while (nbCompares-- && (matchIndex > windowLow)) {
-               U32 *const nextPtr = bt + 2 * (matchIndex & btMask);
-               size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
-               const BYTE *match;
-
-               if ((!extDict) || (matchIndex + matchLength >= dictLimit)) {
-                       match = base + matchIndex;
-                       if (match[matchLength] == ip[matchLength])
-                               matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iend) + 1;
-               } else {
-                       match = dictBase + matchIndex;
-                       matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iend, dictEnd, prefixStart);
-                       if (matchIndex + matchLength >= dictLimit)
-                               match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
-               }
-
-               if (matchLength > bestLength) {
-                       if (matchLength > matchEndIdx - matchIndex)
-                               matchEndIdx = matchIndex + (U32)matchLength;
-                       if ((4 * (int)(matchLength - bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)offsetPtr[0] + 1)))
-                               bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
-                       if (ip + matchLength == iend) /* equal : no way to know if inf or sup */
-                               break;                /* drop, to guarantee consistency (miss a little bit of compression) */
-               }
-
-               if (match[matchLength] < ip[matchLength]) {
-                       /* match is smaller than curr */
-                       *smallerPtr = matchIndex;         /* update smaller idx */
-                       commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
-                       if (matchIndex <= btLow) {
-                               smallerPtr = &dummy32;
-                               break;
-                       }                         /* beyond tree size, stop the search */
-                       smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */
-                       matchIndex = nextPtr[1];  /* new matchIndex larger than previous (closer to curr) */
-               } else {
-                       /* match is larger than curr */
-                       *largerPtr = matchIndex;
-                       commonLengthLarger = matchLength;
-                       if (matchIndex <= btLow) {
-                               largerPtr = &dummy32;
-                               break;
-                       } /* beyond tree size, stop the search */
-                       largerPtr = nextPtr;
-                       matchIndex = nextPtr[0];
-               }
-       }
-
-       *smallerPtr = *largerPtr = 0;
-
-       zc->nextToUpdate = (matchEndIdx > curr + 8) ? matchEndIdx - 8 : curr + 1;
-       return bestLength;
-}
-
-static void ZSTD_updateTree(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, const U32 nbCompares, const U32 mls)
-{
-       const BYTE *const base = zc->base;
-       const U32 target = (U32)(ip - base);
-       U32 idx = zc->nextToUpdate;
-
-       while (idx < target)
-               idx += ZSTD_insertBt1(zc, base + idx, mls, iend, nbCompares, 0);
-}
-
-/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
-static size_t ZSTD_BtFindBestMatch(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 mls)
-{
-       if (ip < zc->base + zc->nextToUpdate)
-               return 0; /* skipped area */
-       ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
-       return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
-}
-
-static size_t ZSTD_BtFindBestMatch_selectMLS(ZSTD_CCtx *zc, /* Index table will be updated */
-                                            const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 matchLengthSearch)
-{
-       switch (matchLengthSearch) {
-       default: /* includes case 3 */
-       case 4: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
-       case 5: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
-       case 7:
-       case 6: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
-       }
-}
-
-static void ZSTD_updateTree_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, const U32 nbCompares, const U32 mls)
-{
-       const BYTE *const base = zc->base;
-       const U32 target = (U32)(ip - base);
-       U32 idx = zc->nextToUpdate;
-
-       while (idx < target)
-               idx += ZSTD_insertBt1(zc, base + idx, mls, iend, nbCompares, 1);
-}
-
-/** Tree updater, providing best match */
-static size_t ZSTD_BtFindBestMatch_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts,
-                                          const U32 mls)
-{
-       if (ip < zc->base + zc->nextToUpdate)
-               return 0; /* skipped area */
-       ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
-       return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
-}
-
-static size_t ZSTD_BtFindBestMatch_selectMLS_extDict(ZSTD_CCtx *zc, /* Index table will be updated */
-                                                    const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts,
-                                                    const U32 matchLengthSearch)
-{
-       switch (matchLengthSearch) {
-       default: /* includes case 3 */
-       case 4: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
-       case 5: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
-       case 7:
-       case 6: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
-       }
-}
-
-/* *********************************
-*  Hash Chain
-***********************************/
-#define NEXT_IN_CHAIN(d, mask) chainTable[(d)&mask]
-
-/* Update chains up to ip (excluded)
-   Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE
-U32 ZSTD_insertAndFindFirstIndex(ZSTD_CCtx *zc, const BYTE *ip, U32 mls)
-{
-       U32 *const hashTable = zc->hashTable;
-       const U32 hashLog = zc->params.cParams.hashLog;
-       U32 *const chainTable = zc->chainTable;
-       const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1;
-       const BYTE *const base = zc->base;
-       const U32 target = (U32)(ip - base);
-       U32 idx = zc->nextToUpdate;
-
-       while (idx < target) { /* catch up */
-               size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls);
-               NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
-               hashTable[h] = idx;
-               idx++;
-       }
-
-       zc->nextToUpdate = target;
-       return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
-}
-
-/* inlining is important to hardwire a hot branch (template emulation) */
-FORCE_INLINE
-size_t ZSTD_HcFindBestMatch_generic(ZSTD_CCtx *zc, /* Index table will be updated */
-                                   const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 mls,
-                                   const U32 extDict)
-{
-       U32 *const chainTable = zc->chainTable;
-       const U32 chainSize = (1 << zc->params.cParams.chainLog);
-       const U32 chainMask = chainSize - 1;
-       const BYTE *const base = zc->base;
-       const BYTE *const dictBase = zc->dictBase;
-       const U32 dictLimit = zc->dictLimit;
-       const BYTE *const prefixStart = base + dictLimit;
-       const BYTE *const dictEnd = dictBase + dictLimit;
-       const U32 lowLimit = zc->lowLimit;
-       const U32 curr = (U32)(ip - base);
-       const U32 minChain = curr > chainSize ? curr - chainSize : 0;
-       int nbAttempts = maxNbAttempts;
-       size_t ml = EQUAL_READ32 - 1;
-
-       /* HC4 match finder */
-       U32 matchIndex = ZSTD_insertAndFindFirstIndex(zc, ip, mls);
-
-       for (; (matchIndex > lowLimit) & (nbAttempts > 0); nbAttempts--) {
-               const BYTE *match;
-               size_t currMl = 0;
-               if ((!extDict) || matchIndex >= dictLimit) {
-                       match = base + matchIndex;
-                       if (match[ml] == ip[ml]) /* potentially better */
-                               currMl = ZSTD_count(ip, match, iLimit);
-               } else {
-                       match = dictBase + matchIndex;
-                       if (ZSTD_read32(match) == ZSTD_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-                               currMl = ZSTD_count_2segments(ip + EQUAL_READ32, match + EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32;
-               }
-
-               /* save best solution */
-               if (currMl > ml) {
-                       ml = currMl;
-                       *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
-                       if (ip + currMl == iLimit)
-                               break; /* best possible, and avoid read overflow*/
-               }
-
-               if (matchIndex <= minChain)
-                       break;
-               matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
-       }
-
-       return ml;
-}
-
-FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS(ZSTD_CCtx *zc, const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts,
-                                                  const U32 matchLengthSearch)
-{
-       switch (matchLengthSearch) {
-       default: /* includes case 3 */
-       case 4: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
-       case 5: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
-       case 7:
-       case 6: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
-       }
-}
-
-FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS(ZSTD_CCtx *zc, const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts,
-                                                          const U32 matchLengthSearch)
-{
-       switch (matchLengthSearch) {
-       default: /* includes case 3 */
-       case 4: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
-       case 5: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
-       case 7:
-       case 6: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
-       }
-}
-
-/* *******************************
-*  Common parser - lazy strategy
-*********************************/
-FORCE_INLINE
-void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 searchMethod, const U32 depth)
-{
-       seqStore_t *seqStorePtr = &(ctx->seqStore);
-       const BYTE *const istart = (const BYTE *)src;
-       const BYTE *ip = istart;
-       const BYTE *anchor = istart;
-       const BYTE *const iend = istart + srcSize;
-       const BYTE *const ilimit = iend - 8;
-       const BYTE *const base = ctx->base + ctx->dictLimit;
-
-       U32 const maxSearches = 1 << ctx->params.cParams.searchLog;
-       U32 const mls = ctx->params.cParams.searchLength;
-
-       typedef size_t (*searchMax_f)(ZSTD_CCtx * zc, const BYTE *ip, const BYTE *iLimit, size_t *offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch);
-       searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
-       U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset = 0;
-
-       /* init */
-       ip += (ip == base);
-       ctx->nextToUpdate3 = ctx->nextToUpdate;
-       {
-               U32 const maxRep = (U32)(ip - base);
-               if (offset_2 > maxRep)
-                       savedOffset = offset_2, offset_2 = 0;
-               if (offset_1 > maxRep)
-                       savedOffset = offset_1, offset_1 = 0;
-       }
-
-       /* Match Loop */
-       while (ip < ilimit) {
-               size_t matchLength = 0;
-               size_t offset = 0;
-               const BYTE *start = ip + 1;
-
-               /* check repCode */
-               if ((offset_1 > 0) & (ZSTD_read32(ip + 1) == ZSTD_read32(ip + 1 - offset_1))) {
-                       /* repcode : we take it */
-                       matchLength = ZSTD_count(ip + 1 + EQUAL_READ32, ip + 1 + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32;
-                       if (depth == 0)
-                               goto _storeSequence;
-               }
-
-               /* first search (depth 0) */
-               {
-                       size_t offsetFound = 99999999;
-                       size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
-                       if (ml2 > matchLength)
-                               matchLength = ml2, start = ip, offset = offsetFound;
-               }
-
-               if (matchLength < EQUAL_READ32) {
-                       ip += ((ip - anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
-                       continue;
-               }
-
-               /* let's try to find a better solution */
-               if (depth >= 1)
-                       while (ip < ilimit) {
-                               ip++;
-                               if ((offset) && ((offset_1 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_1)))) {
-                                       size_t const mlRep = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32;
-                                       int const gain2 = (int)(mlRep * 3);
-                                       int const gain1 = (int)(matchLength * 3 - ZSTD_highbit32((U32)offset + 1) + 1);
-                                       if ((mlRep >= EQUAL_READ32) && (gain2 > gain1))
-                                               matchLength = mlRep, offset = 0, start = ip;
-                               }
-                               {
-                                       size_t offset2 = 99999999;
-                                       size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
-                                       int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */
-                                       int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 4);
-                                       if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
-                                               matchLength = ml2, offset = offset2, start = ip;
-                                               continue; /* search a better one */
-                                       }
-                               }
-
-                               /* let's find an even better one */
-                               if ((depth == 2) && (ip < ilimit)) {
-                                       ip++;
-                                       if ((offset) && ((offset_1 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_1)))) {
-                                               size_t const ml2 = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32;
-                                               int const gain2 = (int)(ml2 * 4);
-                                               int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 1);
-                                               if ((ml2 >= EQUAL_READ32) && (gain2 > gain1))
-                                                       matchLength = ml2, offset = 0, start = ip;
-                                       }
-                                       {
-                                               size_t offset2 = 99999999;
-                                               size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
-                                               int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */
-                                               int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 7);
-                                               if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
-                                                       matchLength = ml2, offset = offset2, start = ip;
-                                                       continue;
-                                               }
-                                       }
-                               }
-                               break; /* nothing found : store previous solution */
-                       }
-
-               /* NOTE:
-                * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
-                * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
-                * overflows the pointer, which is undefined behavior.
-                */
-               /* catch up */
-               if (offset) {
-                       while ((start > anchor) && (start > base + offset - ZSTD_REP_MOVE) &&
-                              (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1])) /* only search for offset within prefix */
-                       {
-                               start--;
-                               matchLength++;
-                       }
-                       offset_2 = offset_1;
-                       offset_1 = (U32)(offset - ZSTD_REP_MOVE);
-               }
-
-       /* store sequence */
-_storeSequence:
-               {
-                       size_t const litLength = start - anchor;
-                       ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength - MINMATCH);
-                       anchor = ip = start + matchLength;
-               }
-
-               /* check immediate repcode */
-               while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) {
-                       /* store sequence */
-                       matchLength = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_2, iend) + EQUAL_READ32;
-                       offset = offset_2;
-                       offset_2 = offset_1;
-                       offset_1 = (U32)offset; /* swap repcodes */
-                       ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength - MINMATCH);
-                       ip += matchLength;
-                       anchor = ip;
-                       continue; /* faster when present ... (?) */
-               }
-       }
-
-       /* Save reps for next block */
-       ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
-       ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
-
-       /* Last Literals */
-       {
-               size_t const lastLLSize = iend - anchor;
-               memcpy(seqStorePtr->lit, anchor, lastLLSize);
-               seqStorePtr->lit += lastLLSize;
-       }
-}
-
-static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); }
-
-static void ZSTD_compressBlock_lazy2(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); }
-
-static void ZSTD_compressBlock_lazy(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); }
-
-static void ZSTD_compressBlock_greedy(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); }
-
-FORCE_INLINE
-void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 searchMethod, const U32 depth)
-{
-       seqStore_t *seqStorePtr = &(ctx->seqStore);
-       const BYTE *const istart = (const BYTE *)src;
-       const BYTE *ip = istart;
-       const BYTE *anchor = istart;
-       const BYTE *const iend = istart + srcSize;
-       const BYTE *const ilimit = iend - 8;
-       const BYTE *const base = ctx->base;
-       const U32 dictLimit = ctx->dictLimit;
-       const U32 lowestIndex = ctx->lowLimit;
-       const BYTE *const prefixStart = base + dictLimit;
-       const BYTE *const dictBase = ctx->dictBase;
-       const BYTE *const dictEnd = dictBase + dictLimit;
-       const BYTE *const dictStart = dictBase + ctx->lowLimit;
-
-       const U32 maxSearches = 1 << ctx->params.cParams.searchLog;
-       const U32 mls = ctx->params.cParams.searchLength;
-
-       typedef size_t (*searchMax_f)(ZSTD_CCtx * zc, const BYTE *ip, const BYTE *iLimit, size_t *offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch);
-       searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
-
-       U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
-
-       /* init */
-       ctx->nextToUpdate3 = ctx->nextToUpdate;
-       ip += (ip == prefixStart);
-
-       /* Match Loop */
-       while (ip < ilimit) {
-               size_t matchLength = 0;
-               size_t offset = 0;
-               const BYTE *start = ip + 1;
-               U32 curr = (U32)(ip - base);
-
-               /* check repCode */
-               {
-                       const U32 repIndex = (U32)(curr + 1 - offset_1);
-                       const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-                       const BYTE *const repMatch = repBase + repIndex;
-                       if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-                               if (ZSTD_read32(ip + 1) == ZSTD_read32(repMatch)) {
-                                       /* repcode detected we should take it */
-                                       const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                                       matchLength =
-                                           ZSTD_count_2segments(ip + 1 + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
-                                       if (depth == 0)
-                                               goto _storeSequence;
-                               }
-               }
-
-               /* first search (depth 0) */
-               {
-                       size_t offsetFound = 99999999;
-                       size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
-                       if (ml2 > matchLength)
-                               matchLength = ml2, start = ip, offset = offsetFound;
-               }
-
-               if (matchLength < EQUAL_READ32) {
-                       ip += ((ip - anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
-                       continue;
-               }
-
-               /* let's try to find a better solution */
-               if (depth >= 1)
-                       while (ip < ilimit) {
-                               ip++;
-                               curr++;
-                               /* check repCode */
-                               if (offset) {
-                                       const U32 repIndex = (U32)(curr - offset_1);
-                                       const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-                                       const BYTE *const repMatch = repBase + repIndex;
-                                       if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-                                               if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) {
-                                                       /* repcode detected */
-                                                       const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                                                       size_t const repLength =
-                                                           ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) +
-                                                           EQUAL_READ32;
-                                                       int const gain2 = (int)(repLength * 3);
-                                                       int const gain1 = (int)(matchLength * 3 - ZSTD_highbit32((U32)offset + 1) + 1);
-                                                       if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
-                                                               matchLength = repLength, offset = 0, start = ip;
-                                               }
-                               }
-
-                               /* search match, depth 1 */
-                               {
-                                       size_t offset2 = 99999999;
-                                       size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
-                                       int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */
-                                       int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 4);
-                                       if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
-                                               matchLength = ml2, offset = offset2, start = ip;
-                                               continue; /* search a better one */
-                                       }
-                               }
-
-                               /* let's find an even better one */
-                               if ((depth == 2) && (ip < ilimit)) {
-                                       ip++;
-                                       curr++;
-                                       /* check repCode */
-                                       if (offset) {
-                                               const U32 repIndex = (U32)(curr - offset_1);
-                                               const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-                                               const BYTE *const repMatch = repBase + repIndex;
-                                               if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-                                                       if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) {
-                                                               /* repcode detected */
-                                                               const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                                                               size_t repLength = ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend,
-                                                                                                       repEnd, prefixStart) +
-                                                                                  EQUAL_READ32;
-                                                               int gain2 = (int)(repLength * 4);
-                                                               int gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 1);
-                                                               if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
-                                                                       matchLength = repLength, offset = 0, start = ip;
-                                                       }
-                                       }
-
-                                       /* search match, depth 2 */
-                                       {
-                                               size_t offset2 = 99999999;
-                                               size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
-                                               int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */
-                                               int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 7);
-                                               if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
-                                                       matchLength = ml2, offset = offset2, start = ip;
-                                                       continue;
-                                               }
-                                       }
-                               }
-                               break; /* nothing found : store previous solution */
-                       }
-
-               /* catch up */
-               if (offset) {
-                       U32 const matchIndex = (U32)((start - base) - (offset - ZSTD_REP_MOVE));
-                       const BYTE *match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
-                       const BYTE *const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
-                       while ((start > anchor) && (match > mStart) && (start[-1] == match[-1])) {
-                               start--;
-                               match--;
-                               matchLength++;
-                       } /* catch up */
-                       offset_2 = offset_1;
-                       offset_1 = (U32)(offset - ZSTD_REP_MOVE);
-               }
-
-       /* store sequence */
-       _storeSequence : {
-               size_t const litLength = start - anchor;
-               ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength - MINMATCH);
-               anchor = ip = start + matchLength;
-       }
-
-               /* check immediate repcode */
-               while (ip <= ilimit) {
-                       const U32 repIndex = (U32)((ip - base) - offset_2);
-                       const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-                       const BYTE *const repMatch = repBase + repIndex;
-                       if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-                               if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) {
-                                       /* repcode detected we should take it */
-                                       const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                                       matchLength =
-                                           ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
-                                       offset = offset_2;
-                                       offset_2 = offset_1;
-                                       offset_1 = (U32)offset; /* swap offset history */
-                                       ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength - MINMATCH);
-                                       ip += matchLength;
-                                       anchor = ip;
-                                       continue; /* faster when present ... (?) */
-                               }
-                       break;
-               }
-       }
-
-       /* Save reps for next block */
-       ctx->repToConfirm[0] = offset_1;
-       ctx->repToConfirm[1] = offset_2;
-
-       /* Last Literals */
-       {
-               size_t const lastLLSize = iend - anchor;
-               memcpy(seqStorePtr->lit, anchor, lastLLSize);
-               seqStorePtr->lit += lastLLSize;
-       }
-}
-
-void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); }
-
-static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-       ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1);
-}
-
-static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-       ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2);
-}
-
-static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-       ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
-}
-
-/* The optimal parser */
-#include "zstd_opt.h"
-
-static void ZSTD_compressBlock_btopt(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-#ifdef ZSTD_OPT_H_91842398743
-       ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
-#else
-       (void)ctx;
-       (void)src;
-       (void)srcSize;
-       return;
-#endif
-}
-
-static void ZSTD_compressBlock_btopt2(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-#ifdef ZSTD_OPT_H_91842398743
-       ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
-#else
-       (void)ctx;
-       (void)src;
-       (void)srcSize;
-       return;
-#endif
-}
-
-static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-#ifdef ZSTD_OPT_H_91842398743
-       ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
-#else
-       (void)ctx;
-       (void)src;
-       (void)srcSize;
-       return;
-#endif
-}
-
-static void ZSTD_compressBlock_btopt2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize)
-{
-#ifdef ZSTD_OPT_H_91842398743
-       ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
-#else
-       (void)ctx;
-       (void)src;
-       (void)srcSize;
-       return;
-#endif
-}
-
-typedef void (*ZSTD_blockCompressor)(ZSTD_CCtx *ctx, const void *src, size_t srcSize);
-
-static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
-{
-       static const ZSTD_blockCompressor blockCompressor[2][8] = {
-           {ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2,
-            ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt2},
-           {ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,
-            ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt2_extDict}};
-
-       return blockCompressor[extDict][(U32)strat];
-}
-
-static size_t ZSTD_compressBlock_internal(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-       ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
-       const BYTE *const base = zc->base;
-       const BYTE *const istart = (const BYTE *)src;
-       const U32 curr = (U32)(istart - base);
-       if (srcSize < MIN_CBLOCK_SIZE + ZSTD_blockHeaderSize + 1)
-               return 0; /* don't even attempt compression below a certain srcSize */
-       ZSTD_resetSeqStore(&(zc->seqStore));
-       if (curr > zc->nextToUpdate + 384)
-               zc->nextToUpdate = curr - MIN(192, (U32)(curr - zc->nextToUpdate - 384)); /* update tree not updated after finding very long rep matches */
-       blockCompressor(zc, src, srcSize);
-       return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
-}
-
-/*! ZSTD_compress_generic() :
-*   Compress a chunk of data into one or multiple blocks.
-*   All blocks will be terminated, all input will be consumed.
-*   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
-*   Frame is supposed already started (header already produced)
-*   @return : compressed size, or an error code
-*/
-static size_t ZSTD_compress_generic(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, U32 lastFrameChunk)
-{
-       size_t blockSize = cctx->blockSize;
-       size_t remaining = srcSize;
-       const BYTE *ip = (const BYTE *)src;
-       BYTE *const ostart = (BYTE *)dst;
-       BYTE *op = ostart;
-       U32 const maxDist = 1 << cctx->params.cParams.windowLog;
-
-       if (cctx->params.fParams.checksumFlag && srcSize)
-               xxh64_update(&cctx->xxhState, src, srcSize);
-
-       while (remaining) {
-               U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
-               size_t cSize;
-
-               if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE)
-                       return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
-               if (remaining < blockSize)
-                       blockSize = remaining;
-
-               /* preemptive overflow correction */
-               if (cctx->lowLimit > (3U << 29)) {
-                       U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1;
-                       U32 const curr = (U32)(ip - cctx->base);
-                       U32 const newCurr = (curr & cycleMask) + (1 << cctx->params.cParams.windowLog);
-                       U32 const correction = curr - newCurr;
-                       ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30);
-                       ZSTD_reduceIndex(cctx, correction);
-                       cctx->base += correction;
-                       cctx->dictBase += correction;
-                       cctx->lowLimit -= correction;
-                       cctx->dictLimit -= correction;
-                       if (cctx->nextToUpdate < correction)
-                               cctx->nextToUpdate = 0;
-                       else
-                               cctx->nextToUpdate -= correction;
-               }
-
-               if ((U32)(ip + blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) {
-                       /* enforce maxDist */
-                       U32 const newLowLimit = (U32)(ip + blockSize - cctx->base) - maxDist;
-                       if (cctx->lowLimit < newLowLimit)
-                               cctx->lowLimit = newLowLimit;
-                       if (cctx->dictLimit < cctx->lowLimit)
-                               cctx->dictLimit = cctx->lowLimit;
-               }
-
-               cSize = ZSTD_compressBlock_internal(cctx, op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, ip, blockSize);
-               if (ZSTD_isError(cSize))
-                       return cSize;
-
-               if (cSize == 0) { /* block is not compressible */
-                       U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw) << 1) + (U32)(blockSize << 3);
-                       if (blockSize + ZSTD_blockHeaderSize > dstCapacity)
-                               return ERROR(dstSize_tooSmall);
-                       ZSTD_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */
-                       memcpy(op + ZSTD_blockHeaderSize, ip, blockSize);
-                       cSize = ZSTD_blockHeaderSize + blockSize;
-               } else {
-                       U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed) << 1) + (U32)(cSize << 3);
-                       ZSTD_writeLE24(op, cBlockHeader24);
-                       cSize += ZSTD_blockHeaderSize;
-               }
-
-               remaining -= blockSize;
-               dstCapacity -= cSize;
-               ip += blockSize;
-               op += cSize;
-       }
-
-       if (lastFrameChunk && (op > ostart))
-               cctx->stage = ZSTDcs_ending;
-       return op - ostart;
-}
-
-static size_t ZSTD_writeFrameHeader(void *dst, size_t dstCapacity, ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID)
-{
-       BYTE *const op = (BYTE *)dst;
-       U32 const dictIDSizeCode = (dictID > 0) + (dictID >= 256) + (dictID >= 65536); /* 0-3 */
-       U32 const checksumFlag = params.fParams.checksumFlag > 0;
-       U32 const windowSize = 1U << params.cParams.windowLog;
-       U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
-       BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
-       U32 const fcsCode =
-           params.fParams.contentSizeFlag ? (pledgedSrcSize >= 256) + (pledgedSrcSize >= 65536 + 256) + (pledgedSrcSize >= 0xFFFFFFFFU) : 0; /* 0-3 */
-       BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag << 2) + (singleSegment << 5) + (fcsCode << 6));
-       size_t pos;
-
-       if (dstCapacity < ZSTD_frameHeaderSize_max)
-               return ERROR(dstSize_tooSmall);
-
-       ZSTD_writeLE32(dst, ZSTD_MAGICNUMBER);
-       op[4] = frameHeaderDecriptionByte;
-       pos = 5;
-       if (!singleSegment)
-               op[pos++] = windowLogByte;
-       switch (dictIDSizeCode) {
-       default: /* impossible */
-       case 0: break;
-       case 1:
-               op[pos] = (BYTE)(dictID);
-               pos++;
-               break;
-       case 2:
-               ZSTD_writeLE16(op + pos, (U16)dictID);
-               pos += 2;
-               break;
-       case 3:
-               ZSTD_writeLE32(op + pos, dictID);
-               pos += 4;
-               break;
-       }
-       switch (fcsCode) {
-       default: /* impossible */
-       case 0:
-               if (singleSegment)
-                       op[pos++] = (BYTE)(pledgedSrcSize);
-               break;
-       case 1:
-               ZSTD_writeLE16(op + pos, (U16)(pledgedSrcSize - 256));
-               pos += 2;
-               break;
-       case 2:
-               ZSTD_writeLE32(op + pos, (U32)(pledgedSrcSize));
-               pos += 4;
-               break;
-       case 3:
-               ZSTD_writeLE64(op + pos, (U64)(pledgedSrcSize));
-               pos += 8;
-               break;
-       }
-       return pos;
-}
-
-static size_t ZSTD_compressContinue_internal(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, U32 frame, U32 lastFrameChunk)
-{
-       const BYTE *const ip = (const BYTE *)src;
-       size_t fhSize = 0;
-
-       if (cctx->stage == ZSTDcs_created)
-               return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
-
-       if (frame && (cctx->stage == ZSTDcs_init)) {
-               fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, cctx->frameContentSize, cctx->dictID);
-               if (ZSTD_isError(fhSize))
-                       return fhSize;
-               dstCapacity -= fhSize;
-               dst = (char *)dst + fhSize;
-               cctx->stage = ZSTDcs_ongoing;
-       }
-
-       /* Check if blocks follow each other */
-       if (src != cctx->nextSrc) {
-               /* not contiguous */
-               ptrdiff_t const delta = cctx->nextSrc - ip;
-               cctx->lowLimit = cctx->dictLimit;
-               cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base);
-               cctx->dictBase = cctx->base;
-               cctx->base -= delta;
-               cctx->nextToUpdate = cctx->dictLimit;
-               if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE)
-                       cctx->lowLimit = cctx->dictLimit; /* too small extDict */
-       }
-
-       /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
-       if ((ip + srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) {
-               ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase;
-               U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx;
-               cctx->lowLimit = lowLimitMax;
-       }
-
-       cctx->nextSrc = ip + srcSize;
-
-       if (srcSize) {
-               size_t const cSize = frame ? ZSTD_compress_generic(cctx, dst, dstCapacity, src, srcSize, lastFrameChunk)
-                                          : ZSTD_compressBlock_internal(cctx, dst, dstCapacity, src, srcSize);
-               if (ZSTD_isError(cSize))
-                       return cSize;
-               return cSize + fhSize;
-       } else
-               return fhSize;
-}
-
-size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-       return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 0);
-}
-
-size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx) { return MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << cctx->params.cParams.windowLog); }
-
-size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-       size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx);
-       if (srcSize > blockSizeMax)
-               return ERROR(srcSize_wrong);
-       return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0);
-}
-
-/*! ZSTD_loadDictionaryContent() :
- *  @return : 0, or an error code
- */
-static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx *zc, const void *src, size_t srcSize)
-{
-       const BYTE *const ip = (const BYTE *)src;
-       const BYTE *const iend = ip + srcSize;
-
-       /* input becomes curr prefix */
-       zc->lowLimit = zc->dictLimit;
-       zc->dictLimit = (U32)(zc->nextSrc - zc->base);
-       zc->dictBase = zc->base;
-       zc->base += ip - zc->nextSrc;
-       zc->nextToUpdate = zc->dictLimit;
-       zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base);
-
-       zc->nextSrc = iend;
-       if (srcSize <= HASH_READ_SIZE)
-               return 0;
-
-       switch (zc->params.cParams.strategy) {
-       case ZSTD_fast: ZSTD_fillHashTable(zc, iend, zc->params.cParams.searchLength); break;
-
-       case ZSTD_dfast: ZSTD_fillDoubleHashTable(zc, iend, zc->params.cParams.searchLength); break;
-
-       case ZSTD_greedy:
-       case ZSTD_lazy:
-       case ZSTD_lazy2:
-               if (srcSize >= HASH_READ_SIZE)
-                       ZSTD_insertAndFindFirstIndex(zc, iend - HASH_READ_SIZE, zc->params.cParams.searchLength);
-               break;
-
-       case ZSTD_btlazy2:
-       case ZSTD_btopt:
-       case ZSTD_btopt2:
-               if (srcSize >= HASH_READ_SIZE)
-                       ZSTD_updateTree(zc, iend - HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength);
-               break;
-
-       default:
-               return ERROR(GENERIC); /* strategy doesn't exist; impossible */
-       }
-
-       zc->nextToUpdate = (U32)(iend - zc->base);
-       return 0;
-}
-
-/* Dictionaries that assign zero probability to symbols that show up causes problems
-   when FSE encoding.  Refuse dictionaries that assign zero probability to symbols
-   that we may encounter during compression.
-   NOTE: This behavior is not standard and could be improved in the future. */
-static size_t ZSTD_checkDictNCount(short *normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue)
-{
-       U32 s;
-       if (dictMaxSymbolValue < maxSymbolValue)
-               return ERROR(dictionary_corrupted);
-       for (s = 0; s <= maxSymbolValue; ++s) {
-               if (normalizedCounter[s] == 0)
-                       return ERROR(dictionary_corrupted);
-       }
-       return 0;
-}
-
-/* Dictionary format :
- * See :
- * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
- */
-/*! ZSTD_loadZstdDictionary() :
- * @return : 0, or an error code
- *  assumptions : magic number supposed already checked
- *                dictSize supposed > 8
- */
-static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx *cctx, const void *dict, size_t dictSize)
-{
-       const BYTE *dictPtr = (const BYTE *)dict;
-       const BYTE *const dictEnd = dictPtr + dictSize;
-       short offcodeNCount[MaxOff + 1];
-       unsigned offcodeMaxValue = MaxOff;
-
-       dictPtr += 4; /* skip magic number */
-       cctx->dictID = cctx->params.fParams.noDictIDFlag ? 0 : ZSTD_readLE32(dictPtr);
-       dictPtr += 4;
-
-       {
-               size_t const hufHeaderSize = HUF_readCTable_wksp(cctx->hufTable, 255, dictPtr, dictEnd - dictPtr, cctx->tmpCounters, sizeof(cctx->tmpCounters));
-               if (HUF_isError(hufHeaderSize))
-                       return ERROR(dictionary_corrupted);
-               dictPtr += hufHeaderSize;
-       }
-
-       {
-               unsigned offcodeLog;
-               size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd - dictPtr);
-               if (FSE_isError(offcodeHeaderSize))
-                       return ERROR(dictionary_corrupted);
-               if (offcodeLog > OffFSELog)
-                       return ERROR(dictionary_corrupted);
-               /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
-               CHECK_E(FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)),
-                       dictionary_corrupted);
-               dictPtr += offcodeHeaderSize;
-       }
-
-       {
-               short matchlengthNCount[MaxML + 1];
-               unsigned matchlengthMaxValue = MaxML, matchlengthLog;
-               size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd - dictPtr);
-               if (FSE_isError(matchlengthHeaderSize))
-                       return ERROR(dictionary_corrupted);
-               if (matchlengthLog > MLFSELog)
-                       return ERROR(dictionary_corrupted);
-               /* Every match length code must have non-zero probability */
-               CHECK_F(ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
-               CHECK_E(
-                   FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)),
-                   dictionary_corrupted);
-               dictPtr += matchlengthHeaderSize;
-       }
-
-       {
-               short litlengthNCount[MaxLL + 1];
-               unsigned litlengthMaxValue = MaxLL, litlengthLog;
-               size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd - dictPtr);
-               if (FSE_isError(litlengthHeaderSize))
-                       return ERROR(dictionary_corrupted);
-               if (litlengthLog > LLFSELog)
-                       return ERROR(dictionary_corrupted);
-               /* Every literal length code must have non-zero probability */
-               CHECK_F(ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
-               CHECK_E(FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)),
-                       dictionary_corrupted);
-               dictPtr += litlengthHeaderSize;
-       }
-
-       if (dictPtr + 12 > dictEnd)
-               return ERROR(dictionary_corrupted);
-       cctx->rep[0] = ZSTD_readLE32(dictPtr + 0);
-       cctx->rep[1] = ZSTD_readLE32(dictPtr + 4);
-       cctx->rep[2] = ZSTD_readLE32(dictPtr + 8);
-       dictPtr += 12;
-
-       {
-               size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
-               U32 offcodeMax = MaxOff;
-               if (dictContentSize <= ((U32)-1) - 128 KB) {
-                       U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
-                       offcodeMax = ZSTD_highbit32(maxOffset);              /* Calculate minimum offset code required to represent maxOffset */
-               }
-               /* All offset values <= dictContentSize + 128 KB must be representable */
-               CHECK_F(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
-               /* All repCodes must be <= dictContentSize and != 0*/
-               {
-                       U32 u;
-                       for (u = 0; u < 3; u++) {
-                               if (cctx->rep[u] == 0)
-                                       return ERROR(dictionary_corrupted);
-                               if (cctx->rep[u] > dictContentSize)
-                                       return ERROR(dictionary_corrupted);
-                       }
-               }
-
-               cctx->flagStaticTables = 1;
-               cctx->flagStaticHufTable = HUF_repeat_valid;
-               return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize);
-       }
-}
-
-/** ZSTD_compress_insertDictionary() :
-*   @return : 0, or an error code */
-static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx *cctx, const void *dict, size_t dictSize)
-{
-       if ((dict == NULL) || (dictSize <= 8))
-               return 0;
-
-       /* dict as pure content */
-       if ((ZSTD_readLE32(dict) != ZSTD_DICT_MAGIC) || (cctx->forceRawDict))
-               return ZSTD_loadDictionaryContent(cctx, dict, dictSize);
-
-       /* dict as zstd dictionary */
-       return ZSTD_loadZstdDictionary(cctx, dict, dictSize);
-}
-
-/*! ZSTD_compressBegin_internal() :
-*   @return : 0, or an error code */
-static size_t ZSTD_compressBegin_internal(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, ZSTD_parameters params, U64 pledgedSrcSize)
-{
-       ZSTD_compResetPolicy_e const crp = dictSize ? ZSTDcrp_fullReset : ZSTDcrp_continue;
-       CHECK_F(ZSTD_resetCCtx_advanced(cctx, params, pledgedSrcSize, crp));
-       return ZSTD_compress_insertDictionary(cctx, dict, dictSize);
-}
-
-/*! ZSTD_compressBegin_advanced() :
-*   @return : 0, or an error code */
-size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize)
-{
-       /* compression parameters verification and optimization */
-       CHECK_F(ZSTD_checkCParams(params.cParams));
-       return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize);
-}
-
-size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, int compressionLevel)
-{
-       ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
-       return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0);
-}
-
-size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel) { return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); }
-
-/*! ZSTD_writeEpilogue() :
-*   Ends a frame.
-*   @return : nb of bytes written into dst (or an error code) */
-static size_t ZSTD_writeEpilogue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity)
-{
-       BYTE *const ostart = (BYTE *)dst;
-       BYTE *op = ostart;
-       size_t fhSize = 0;
-
-       if (cctx->stage == ZSTDcs_created)
-               return ERROR(stage_wrong); /* init missing */
-
-       /* special case : empty frame */
-       if (cctx->stage == ZSTDcs_init) {
-               fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0);
-               if (ZSTD_isError(fhSize))
-                       return fhSize;
-               dstCapacity -= fhSize;
-               op += fhSize;
-               cctx->stage = ZSTDcs_ongoing;
-       }
-
-       if (cctx->stage != ZSTDcs_ending) {
-               /* write one last empty block, make it the "last" block */
-               U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw) << 1) + 0;
-               if (dstCapacity < 4)
-                       return ERROR(dstSize_tooSmall);
-               ZSTD_writeLE32(op, cBlockHeader24);
-               op += ZSTD_blockHeaderSize;
-               dstCapacity -= ZSTD_blockHeaderSize;
-       }
-
-       if (cctx->params.fParams.checksumFlag) {
-               U32 const checksum = (U32)xxh64_digest(&cctx->xxhState);
-               if (dstCapacity < 4)
-                       return ERROR(dstSize_tooSmall);
-               ZSTD_writeLE32(op, checksum);
-               op += 4;
-       }
-
-       cctx->stage = ZSTDcs_created; /* return to "created but no init" status */
-       return op - ostart;
-}
-
-size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-       size_t endResult;
-       size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 1);
-       if (ZSTD_isError(cSize))
-               return cSize;
-       endResult = ZSTD_writeEpilogue(cctx, (char *)dst + cSize, dstCapacity - cSize);
-       if (ZSTD_isError(endResult))
-               return endResult;
-       return cSize + endResult;
-}
-
-static size_t ZSTD_compress_internal(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize,
-                                    ZSTD_parameters params)
-{
-       CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, params, srcSize));
-       return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
-}
-
-size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize,
-                              ZSTD_parameters params)
-{
-       return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
-}
-
-size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, ZSTD_parameters params)
-{
-       return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, NULL, 0, params);
-}
-
-/* =====  Dictionary API  ===== */
-
-struct ZSTD_CDict_s {
-       void *dictBuffer;
-       const void *dictContent;
-       size_t dictContentSize;
-       ZSTD_CCtx *refContext;
-}; /* typedef'd tp ZSTD_CDict within "zstd.h" */
-
-size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams) { return ZSTD_CCtxWorkspaceBound(cParams) + ZSTD_ALIGN(sizeof(ZSTD_CDict)); }
-
-static ZSTD_CDict *ZSTD_createCDict_advanced(const void *dictBuffer, size_t dictSize, unsigned byReference, ZSTD_parameters params, ZSTD_customMem customMem)
-{
-       if (!customMem.customAlloc || !customMem.customFree)
-               return NULL;
-
-       {
-               ZSTD_CDict *const cdict = (ZSTD_CDict *)ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
-               ZSTD_CCtx *const cctx = ZSTD_createCCtx_advanced(customMem);
-
-               if (!cdict || !cctx) {
-                       ZSTD_free(cdict, customMem);
-                       ZSTD_freeCCtx(cctx);
-                       return NULL;
-               }
-
-               if ((byReference) || (!dictBuffer) || (!dictSize)) {
-                       cdict->dictBuffer = NULL;
-                       cdict->dictContent = dictBuffer;
-               } else {
-                       void *const internalBuffer = ZSTD_malloc(dictSize, customMem);
-                       if (!internalBuffer) {
-                               ZSTD_free(cctx, customMem);
-                               ZSTD_free(cdict, customMem);
-                               return NULL;
-                       }
-                       memcpy(internalBuffer, dictBuffer, dictSize);
-                       cdict->dictBuffer = internalBuffer;
-                       cdict->dictContent = internalBuffer;
-               }
-
-               {
-                       size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0);
-                       if (ZSTD_isError(errorCode)) {
-                               ZSTD_free(cdict->dictBuffer, customMem);
-                               ZSTD_free(cdict, customMem);
-                               ZSTD_freeCCtx(cctx);
-                               return NULL;
-                       }
-               }
-
-               cdict->refContext = cctx;
-               cdict->dictContentSize = dictSize;
-               return cdict;
-       }
-}
-
-ZSTD_CDict *ZSTD_initCDict(const void *dict, size_t dictSize, ZSTD_parameters params, void *workspace, size_t workspaceSize)
-{
-       ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-       return ZSTD_createCDict_advanced(dict, dictSize, 1, params, stackMem);
-}
-
-size_t ZSTD_freeCDict(ZSTD_CDict *cdict)
-{
-       if (cdict == NULL)
-               return 0; /* support free on NULL */
-       {
-               ZSTD_customMem const cMem = cdict->refContext->customMem;
-               ZSTD_freeCCtx(cdict->refContext);
-               ZSTD_free(cdict->dictBuffer, cMem);
-               ZSTD_free(cdict, cMem);
-               return 0;
-       }
-}
-
-static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict *cdict) { return ZSTD_getParamsFromCCtx(cdict->refContext); }
-
-size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict, unsigned long long pledgedSrcSize)
-{
-       if (cdict->dictContentSize)
-               CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
-       else {
-               ZSTD_parameters params = cdict->refContext->params;
-               params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
-               CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, params, pledgedSrcSize));
-       }
-       return 0;
-}
-
-/*! ZSTD_compress_usingCDict() :
-*   Compression using a digested Dictionary.
-*   Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
-*   Note that compression level is decided during dictionary creation */
-size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const ZSTD_CDict *cdict)
-{
-       CHECK_F(ZSTD_compressBegin_usingCDict(cctx, cdict, srcSize));
-
-       if (cdict->refContext->params.fParams.contentSizeFlag == 1) {
-               cctx->params.fParams.contentSizeFlag = 1;
-               cctx->frameContentSize = srcSize;
-       } else {
-               cctx->params.fParams.contentSizeFlag = 0;
-       }
-
-       return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
-}
-
-/* ******************************************************************
-*  Streaming
-********************************************************************/
-
-typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage;
-
-struct ZSTD_CStream_s {
-       ZSTD_CCtx *cctx;
-       ZSTD_CDict *cdictLocal;
-       const ZSTD_CDict *cdict;
-       char *inBuff;
-       size_t inBuffSize;
-       size_t inToCompress;
-       size_t inBuffPos;
-       size_t inBuffTarget;
-       size_t blockSize;
-       char *outBuff;
-       size_t outBuffSize;
-       size_t outBuffContentSize;
-       size_t outBuffFlushedSize;
-       ZSTD_cStreamStage stage;
-       U32 checksum;
-       U32 frameEnded;
-       U64 pledgedSrcSize;
-       U64 inputProcessed;
-       ZSTD_parameters params;
-       ZSTD_customMem customMem;
-}; /* typedef'd to ZSTD_CStream within "zstd.h" */
-
-size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams)
-{
-       size_t const inBuffSize = (size_t)1 << cParams.windowLog;
-       size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, inBuffSize);
-       size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
-
-       return ZSTD_CCtxWorkspaceBound(cParams) + ZSTD_ALIGN(sizeof(ZSTD_CStream)) + ZSTD_ALIGN(inBuffSize) + ZSTD_ALIGN(outBuffSize);
-}
-
-ZSTD_CStream *ZSTD_createCStream_advanced(ZSTD_customMem customMem)
-{
-       ZSTD_CStream *zcs;
-
-       if (!customMem.customAlloc || !customMem.customFree)
-               return NULL;
-
-       zcs = (ZSTD_CStream *)ZSTD_malloc(sizeof(ZSTD_CStream), customMem);
-       if (zcs == NULL)
-               return NULL;
-       memset(zcs, 0, sizeof(ZSTD_CStream));
-       memcpy(&zcs->customMem, &customMem, sizeof(ZSTD_customMem));
-       zcs->cctx = ZSTD_createCCtx_advanced(customMem);
-       if (zcs->cctx == NULL) {
-               ZSTD_freeCStream(zcs);
-               return NULL;
-       }
-       return zcs;
-}
-
-size_t ZSTD_freeCStream(ZSTD_CStream *zcs)
-{
-       if (zcs == NULL)
-               return 0; /* support free on NULL */
-       {
-               ZSTD_customMem const cMem = zcs->customMem;
-               ZSTD_freeCCtx(zcs->cctx);
-               zcs->cctx = NULL;
-               ZSTD_freeCDict(zcs->cdictLocal);
-               zcs->cdictLocal = NULL;
-               ZSTD_free(zcs->inBuff, cMem);
-               zcs->inBuff = NULL;
-               ZSTD_free(zcs->outBuff, cMem);
-               zcs->outBuff = NULL;
-               ZSTD_free(zcs, cMem);
-               return 0;
-       }
-}
-
-/*======   Initialization   ======*/
-
-size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
-size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */; }
-
-static size_t ZSTD_resetCStream_internal(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize)
-{
-       if (zcs->inBuffSize == 0)
-               return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */
-
-       if (zcs->cdict)
-               CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize))
-       else
-               CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
-
-       zcs->inToCompress = 0;
-       zcs->inBuffPos = 0;
-       zcs->inBuffTarget = zcs->blockSize;
-       zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
-       zcs->stage = zcss_load;
-       zcs->frameEnded = 0;
-       zcs->pledgedSrcSize = pledgedSrcSize;
-       zcs->inputProcessed = 0;
-       return 0; /* ready to go */
-}
-
-size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize)
-{
-
-       zcs->params.fParams.contentSizeFlag = (pledgedSrcSize > 0);
-
-       return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
-}
-
-static size_t ZSTD_initCStream_advanced(ZSTD_CStream *zcs, const void *dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize)
-{
-       /* allocate buffers */
-       {
-               size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
-               if (zcs->inBuffSize < neededInBuffSize) {
-                       zcs->inBuffSize = neededInBuffSize;
-                       ZSTD_free(zcs->inBuff, zcs->customMem);
-                       zcs->inBuff = (char *)ZSTD_malloc(neededInBuffSize, zcs->customMem);
-                       if (zcs->inBuff == NULL)
-                               return ERROR(memory_allocation);
-               }
-               zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize);
-       }
-       if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize) + 1) {
-               zcs->outBuffSize = ZSTD_compressBound(zcs->blockSize) + 1;
-               ZSTD_free(zcs->outBuff, zcs->customMem);
-               zcs->outBuff = (char *)ZSTD_malloc(zcs->outBuffSize, zcs->customMem);
-               if (zcs->outBuff == NULL)
-                       return ERROR(memory_allocation);
-       }
-
-       if (dict && dictSize >= 8) {
-               ZSTD_freeCDict(zcs->cdictLocal);
-               zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0, params, zcs->customMem);
-               if (zcs->cdictLocal == NULL)
-                       return ERROR(memory_allocation);
-               zcs->cdict = zcs->cdictLocal;
-       } else
-               zcs->cdict = NULL;
-
-       zcs->checksum = params.fParams.checksumFlag > 0;
-       zcs->params = params;
-
-       return ZSTD_resetCStream_internal(zcs, pledgedSrcSize);
-}
-
-ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params, unsigned long long pledgedSrcSize, void *workspace, size_t workspaceSize)
-{
-       ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-       ZSTD_CStream *const zcs = ZSTD_createCStream_advanced(stackMem);
-       if (zcs) {
-               size_t const code = ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize);
-               if (ZSTD_isError(code)) {
-                       return NULL;
-               }
-       }
-       return zcs;
-}
-
-ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict, unsigned long long pledgedSrcSize, void *workspace, size_t workspaceSize)
-{
-       ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
-       ZSTD_CStream *const zcs = ZSTD_initCStream(params, pledgedSrcSize, workspace, workspaceSize);
-       if (zcs) {
-               zcs->cdict = cdict;
-               if (ZSTD_isError(ZSTD_resetCStream_internal(zcs, pledgedSrcSize))) {
-                       return NULL;
-               }
-       }
-       return zcs;
-}
-
-/*======   Compression   ======*/
-
-typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e;
-
-ZSTD_STATIC size_t ZSTD_limitCopy(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-       size_t const length = MIN(dstCapacity, srcSize);
-       memcpy(dst, src, length);
-       return length;
-}
-
-static size_t ZSTD_compressStream_generic(ZSTD_CStream *zcs, void *dst, size_t *dstCapacityPtr, const void *src, size_t *srcSizePtr, ZSTD_flush_e const flush)
-{
-       U32 someMoreWork = 1;
-       const char *const istart = (const char *)src;
-       const char *const iend = istart + *srcSizePtr;
-       const char *ip = istart;
-       char *const ostart = (char *)dst;
-       char *const oend = ostart + *dstCapacityPtr;
-       char *op = ostart;
-
-       while (someMoreWork) {
-               switch (zcs->stage) {
-               case zcss_init:
-                       return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */
-
-               case zcss_load:
-                       /* complete inBuffer */
-                       {
-                               size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
-                               size_t const loaded = ZSTD_limitCopy(zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend - ip);
-                               zcs->inBuffPos += loaded;
-                               ip += loaded;
-                               if ((zcs->inBuffPos == zcs->inToCompress) || (!flush && (toLoad != loaded))) {
-                                       someMoreWork = 0;
-                                       break; /* not enough input to get a full block : stop there, wait for more */
-                               }
-                       }
-                       /* compress curr block (note : this stage cannot be stopped in the middle) */
-                       {
-                               void *cDst;
-                               size_t cSize;
-                               size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
-                               size_t oSize = oend - op;
-                               if (oSize >= ZSTD_compressBound(iSize))
-                                       cDst = op; /* compress directly into output buffer (avoid flush stage) */
-                               else
-                                       cDst = zcs->outBuff, oSize = zcs->outBuffSize;
-                               cSize = (flush == zsf_end) ? ZSTD_compressEnd(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize)
-                                                          : ZSTD_compressContinue(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize);
-                               if (ZSTD_isError(cSize))
-                                       return cSize;
-                               if (flush == zsf_end)
-                                       zcs->frameEnded = 1;
-                               /* prepare next block */
-                               zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
-                               if (zcs->inBuffTarget > zcs->inBuffSize)
-                                       zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; /* note : inBuffSize >= blockSize */
-                               zcs->inToCompress = zcs->inBuffPos;
-                               if (cDst == op) {
-                                       op += cSize;
-                                       break;
-                               } /* no need to flush */
-                               zcs->outBuffContentSize = cSize;
-                               zcs->outBuffFlushedSize = 0;
-                               zcs->stage = zcss_flush; /* pass-through to flush stage */
-                       }
-                       fallthrough;
-
-               case zcss_flush: {
-                       size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-                       size_t const flushed = ZSTD_limitCopy(op, oend - op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
-                       op += flushed;
-                       zcs->outBuffFlushedSize += flushed;
-                       if (toFlush != flushed) {
-                               someMoreWork = 0;
-                               break;
-                       } /* dst too small to store flushed data : stop there */
-                       zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
-                       zcs->stage = zcss_load;
-                       break;
-               }
-
-               case zcss_final:
-                       someMoreWork = 0; /* do nothing */
-                       break;
-
-               default:
-                       return ERROR(GENERIC); /* impossible */
-               }
-       }
-
-       *srcSizePtr = ip - istart;
-       *dstCapacityPtr = op - ostart;
-       zcs->inputProcessed += *srcSizePtr;
-       if (zcs->frameEnded)
-               return 0;
-       {
-               size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
-               if (hintInSize == 0)
-                       hintInSize = zcs->blockSize;
-               return hintInSize;
-       }
-}
-
-size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output, ZSTD_inBuffer *input)
-{
-       size_t sizeRead = input->size - input->pos;
-       size_t sizeWritten = output->size - output->pos;
-       size_t const result =
-           ZSTD_compressStream_generic(zcs, (char *)(output->dst) + output->pos, &sizeWritten, (const char *)(input->src) + input->pos, &sizeRead, zsf_gather);
-       input->pos += sizeRead;
-       output->pos += sizeWritten;
-       return result;
-}
-
-/*======   Finalize   ======*/
-
-/*! ZSTD_flushStream() :
-*   @return : amount of data remaining to flush */
-size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output)
-{
-       size_t srcSize = 0;
-       size_t sizeWritten = output->size - output->pos;
-       size_t const result = ZSTD_compressStream_generic(zcs, (char *)(output->dst) + output->pos, &sizeWritten, &srcSize,
-                                                         &srcSize, /* use a valid src address instead of NULL */
-                                                         zsf_flush);
-       output->pos += sizeWritten;
-       if (ZSTD_isError(result))
-               return result;
-       return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */
-}
-
-size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output)
-{
-       BYTE *const ostart = (BYTE *)(output->dst) + output->pos;
-       BYTE *const oend = (BYTE *)(output->dst) + output->size;
-       BYTE *op = ostart;
-
-       if ((zcs->pledgedSrcSize) && (zcs->inputProcessed != zcs->pledgedSrcSize))
-               return ERROR(srcSize_wrong); /* pledgedSrcSize not respected */
-
-       if (zcs->stage != zcss_final) {
-               /* flush whatever remains */
-               size_t srcSize = 0;
-               size_t sizeWritten = output->size - output->pos;
-               size_t const notEnded =
-                   ZSTD_compressStream_generic(zcs, ostart, &sizeWritten, &srcSize, &srcSize, zsf_end); /* use a valid src address instead of NULL */
-               size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-               op += sizeWritten;
-               if (remainingToFlush) {
-                       output->pos += sizeWritten;
-                       return remainingToFlush + ZSTD_BLOCKHEADERSIZE /* final empty block */ + (zcs->checksum * 4);
-               }
-               /* create epilogue */
-               zcs->stage = zcss_final;
-               zcs->outBuffContentSize = !notEnded ? 0 : ZSTD_compressEnd(zcs->cctx, zcs->outBuff, zcs->outBuffSize, NULL,
-                                                                          0); /* write epilogue, including final empty block, into outBuff */
-       }
-
-       /* flush epilogue */
-       {
-               size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
-               size_t const flushed = ZSTD_limitCopy(op, oend - op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
-               op += flushed;
-               zcs->outBuffFlushedSize += flushed;
-               output->pos += op - ostart;
-               if (toFlush == flushed)
-                       zcs->stage = zcss_init; /* end reached */
-               return toFlush - flushed;
-       }
-}
-
-/*-=====  Pre-defined compression levels  =====-*/
-
-#define ZSTD_DEFAULT_CLEVEL 1
-#define ZSTD_MAX_CLEVEL 22
-int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
-
-static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL + 1] = {
-    {
-       /* "default" */
-       /* W,  C,  H,  S,  L, TL, strat */
-       {18, 12, 12, 1, 7, 16, ZSTD_fast},    /* level  0 - never used */
-       {19, 13, 14, 1, 7, 16, ZSTD_fast},    /* level  1 */
-       {19, 15, 16, 1, 6, 16, ZSTD_fast},    /* level  2 */
-       {20, 16, 17, 1, 5, 16, ZSTD_dfast},   /* level  3.*/
-       {20, 18, 18, 1, 5, 16, ZSTD_dfast},   /* level  4.*/
-       {20, 15, 18, 3, 5, 16, ZSTD_greedy},  /* level  5 */
-       {21, 16, 19, 2, 5, 16, ZSTD_lazy},    /* level  6 */
-       {21, 17, 20, 3, 5, 16, ZSTD_lazy},    /* level  7 */
-       {21, 18, 20, 3, 5, 16, ZSTD_lazy2},   /* level  8 */
-       {21, 20, 20, 3, 5, 16, ZSTD_lazy2},   /* level  9 */
-       {21, 19, 21, 4, 5, 16, ZSTD_lazy2},   /* level 10 */
-       {22, 20, 22, 4, 5, 16, ZSTD_lazy2},   /* level 11 */
-       {22, 20, 22, 5, 5, 16, ZSTD_lazy2},   /* level 12 */
-       {22, 21, 22, 5, 5, 16, ZSTD_lazy2},   /* level 13 */
-       {22, 21, 22, 6, 5, 16, ZSTD_lazy2},   /* level 14 */
-       {22, 21, 21, 5, 5, 16, ZSTD_btlazy2}, /* level 15 */
-       {23, 22, 22, 5, 5, 16, ZSTD_btlazy2}, /* level 16 */
-       {23, 21, 22, 4, 5, 24, ZSTD_btopt},   /* level 17 */
-       {23, 23, 22, 6, 5, 32, ZSTD_btopt},   /* level 18 */
-       {23, 23, 22, 6, 3, 48, ZSTD_btopt},   /* level 19 */
-       {25, 25, 23, 7, 3, 64, ZSTD_btopt2},  /* level 20 */
-       {26, 26, 23, 7, 3, 256, ZSTD_btopt2}, /* level 21 */
-       {27, 27, 25, 9, 3, 512, ZSTD_btopt2}, /* level 22 */
-    },
-    {
-       /* for srcSize <= 256 KB */
-       /* W,  C,  H,  S,  L,  T, strat */
-       {0, 0, 0, 0, 0, 0, ZSTD_fast},   /* level  0 - not used */
-       {18, 13, 14, 1, 6, 8, ZSTD_fast},      /* level  1 */
-       {18, 14, 13, 1, 5, 8, ZSTD_dfast},     /* level  2 */
-       {18, 16, 15, 1, 5, 8, ZSTD_dfast},     /* level  3 */
-       {18, 15, 17, 1, 5, 8, ZSTD_greedy},    /* level  4.*/
-       {18, 16, 17, 4, 5, 8, ZSTD_greedy},    /* level  5.*/
-       {18, 16, 17, 3, 5, 8, ZSTD_lazy},      /* level  6.*/
-       {18, 17, 17, 4, 4, 8, ZSTD_lazy},      /* level  7 */
-       {18, 17, 17, 4, 4, 8, ZSTD_lazy2},     /* level  8 */
-       {18, 17, 17, 5, 4, 8, ZSTD_lazy2},     /* level  9 */
-       {18, 17, 17, 6, 4, 8, ZSTD_lazy2},     /* level 10 */
-       {18, 18, 17, 6, 4, 8, ZSTD_lazy2},     /* level 11.*/
-       {18, 18, 17, 7, 4, 8, ZSTD_lazy2},     /* level 12.*/
-       {18, 19, 17, 6, 4, 8, ZSTD_btlazy2},   /* level 13 */
-       {18, 18, 18, 4, 4, 16, ZSTD_btopt},    /* level 14.*/
-       {18, 18, 18, 4, 3, 16, ZSTD_btopt},    /* level 15.*/
-       {18, 19, 18, 6, 3, 32, ZSTD_btopt},    /* level 16.*/
-       {18, 19, 18, 8, 3, 64, ZSTD_btopt},    /* level 17.*/
-       {18, 19, 18, 9, 3, 128, ZSTD_btopt},   /* level 18.*/
-       {18, 19, 18, 10, 3, 256, ZSTD_btopt},  /* level 19.*/
-       {18, 19, 18, 11, 3, 512, ZSTD_btopt2}, /* level 20.*/
-       {18, 19, 18, 12, 3, 512, ZSTD_btopt2}, /* level 21.*/
-       {18, 19, 18, 13, 3, 512, ZSTD_btopt2}, /* level 22.*/
-    },
-    {
-       /* for srcSize <= 128 KB */
-       /* W,  C,  H,  S,  L,  T, strat */
-       {17, 12, 12, 1, 7, 8, ZSTD_fast},      /* level  0 - not used */
-       {17, 12, 13, 1, 6, 8, ZSTD_fast},      /* level  1 */
-       {17, 13, 16, 1, 5, 8, ZSTD_fast},      /* level  2 */
-       {17, 16, 16, 2, 5, 8, ZSTD_dfast},     /* level  3 */
-       {17, 13, 15, 3, 4, 8, ZSTD_greedy},    /* level  4 */
-       {17, 15, 17, 4, 4, 8, ZSTD_greedy},    /* level  5 */
-       {17, 16, 17, 3, 4, 8, ZSTD_lazy},      /* level  6 */
-       {17, 15, 17, 4, 4, 8, ZSTD_lazy2},     /* level  7 */
-       {17, 17, 17, 4, 4, 8, ZSTD_lazy2},     /* level  8 */
-       {17, 17, 17, 5, 4, 8, ZSTD_lazy2},     /* level  9 */
-       {17, 17, 17, 6, 4, 8, ZSTD_lazy2},     /* level 10 */
-       {17, 17, 17, 7, 4, 8, ZSTD_lazy2},     /* level 11 */
-       {17, 17, 17, 8, 4, 8, ZSTD_lazy2},     /* level 12 */
-       {17, 18, 17, 6, 4, 8, ZSTD_btlazy2},   /* level 13.*/
-       {17, 17, 17, 7, 3, 8, ZSTD_btopt},     /* level 14.*/
-       {17, 17, 17, 7, 3, 16, ZSTD_btopt},    /* level 15.*/
-       {17, 18, 17, 7, 3, 32, ZSTD_btopt},    /* level 16.*/
-       {17, 18, 17, 7, 3, 64, ZSTD_btopt},    /* level 17.*/
-       {17, 18, 17, 7, 3, 256, ZSTD_btopt},   /* level 18.*/
-       {17, 18, 17, 8, 3, 256, ZSTD_btopt},   /* level 19.*/
-       {17, 18, 17, 9, 3, 256, ZSTD_btopt2},  /* level 20.*/
-       {17, 18, 17, 10, 3, 256, ZSTD_btopt2}, /* level 21.*/
-       {17, 18, 17, 11, 3, 512, ZSTD_btopt2}, /* level 22.*/
-    },
-    {
-       /* for srcSize <= 16 KB */
-       /* W,  C,  H,  S,  L,  T, strat */
-       {14, 12, 12, 1, 7, 6, ZSTD_fast},      /* level  0 - not used */
-       {14, 14, 14, 1, 6, 6, ZSTD_fast},      /* level  1 */
-       {14, 14, 14, 1, 4, 6, ZSTD_fast},      /* level  2 */
-       {14, 14, 14, 1, 4, 6, ZSTD_dfast},     /* level  3.*/
-       {14, 14, 14, 4, 4, 6, ZSTD_greedy},    /* level  4.*/
-       {14, 14, 14, 3, 4, 6, ZSTD_lazy},      /* level  5.*/
-       {14, 14, 14, 4, 4, 6, ZSTD_lazy2},     /* level  6 */
-       {14, 14, 14, 5, 4, 6, ZSTD_lazy2},     /* level  7 */
-       {14, 14, 14, 6, 4, 6, ZSTD_lazy2},     /* level  8.*/
-       {14, 15, 14, 6, 4, 6, ZSTD_btlazy2},   /* level  9.*/
-       {14, 15, 14, 3, 3, 6, ZSTD_btopt},     /* level 10.*/
-       {14, 15, 14, 6, 3, 8, ZSTD_btopt},     /* level 11.*/
-       {14, 15, 14, 6, 3, 16, ZSTD_btopt},    /* level 12.*/
-       {14, 15, 14, 6, 3, 24, ZSTD_btopt},    /* level 13.*/
-       {14, 15, 15, 6, 3, 48, ZSTD_btopt},    /* level 14.*/
-       {14, 15, 15, 6, 3, 64, ZSTD_btopt},    /* level 15.*/
-       {14, 15, 15, 6, 3, 96, ZSTD_btopt},    /* level 16.*/
-       {14, 15, 15, 6, 3, 128, ZSTD_btopt},   /* level 17.*/
-       {14, 15, 15, 6, 3, 256, ZSTD_btopt},   /* level 18.*/
-       {14, 15, 15, 7, 3, 256, ZSTD_btopt},   /* level 19.*/
-       {14, 15, 15, 8, 3, 256, ZSTD_btopt2},  /* level 20.*/
-       {14, 15, 15, 9, 3, 256, ZSTD_btopt2},  /* level 21.*/
-       {14, 15, 15, 10, 3, 256, ZSTD_btopt2}, /* level 22.*/
-    },
-};
-
-/*! ZSTD_getCParams() :
-*   @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
-*   Size values are optional, provide 0 if not known or unused */
-ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize)
-{
-       ZSTD_compressionParameters cp;
-       size_t const addedSize = srcSize ? 0 : 500;
-       U64 const rSize = srcSize + dictSize ? srcSize + dictSize + addedSize : (U64)-1;
-       U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */
-       if (compressionLevel <= 0)
-               compressionLevel = ZSTD_DEFAULT_CLEVEL; /* 0 == default; no negative compressionLevel yet */
-       if (compressionLevel > ZSTD_MAX_CLEVEL)
-               compressionLevel = ZSTD_MAX_CLEVEL;
-       cp = ZSTD_defaultCParameters[tableID][compressionLevel];
-       if (ZSTD_32bits()) { /* auto-correction, for 32-bits mode */
-               if (cp.windowLog > ZSTD_WINDOWLOG_MAX)
-                       cp.windowLog = ZSTD_WINDOWLOG_MAX;
-               if (cp.chainLog > ZSTD_CHAINLOG_MAX)
-                       cp.chainLog = ZSTD_CHAINLOG_MAX;
-               if (cp.hashLog > ZSTD_HASHLOG_MAX)
-                       cp.hashLog = ZSTD_HASHLOG_MAX;
-       }
-       cp = ZSTD_adjustCParams(cp, srcSize, dictSize);
-       return cp;
-}
-
-/*! ZSTD_getParams() :
-*   same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`).
-*   All fields of `ZSTD_frameParameters` are set to default (0) */
-ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize)
-{
-       ZSTD_parameters params;
-       ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize);
-       memset(&params, 0, sizeof(params));
-       params.cParams = cParams;
-       return params;
-}
-
-EXPORT_SYMBOL(ZSTD_maxCLevel);
-EXPORT_SYMBOL(ZSTD_compressBound);
-
-EXPORT_SYMBOL(ZSTD_CCtxWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initCCtx);
-EXPORT_SYMBOL(ZSTD_compressCCtx);
-EXPORT_SYMBOL(ZSTD_compress_usingDict);
-
-EXPORT_SYMBOL(ZSTD_CDictWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initCDict);
-EXPORT_SYMBOL(ZSTD_compress_usingCDict);
-
-EXPORT_SYMBOL(ZSTD_CStreamWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initCStream);
-EXPORT_SYMBOL(ZSTD_initCStream_usingCDict);
-EXPORT_SYMBOL(ZSTD_resetCStream);
-EXPORT_SYMBOL(ZSTD_compressStream);
-EXPORT_SYMBOL(ZSTD_flushStream);
-EXPORT_SYMBOL(ZSTD_endStream);
-EXPORT_SYMBOL(ZSTD_CStreamInSize);
-EXPORT_SYMBOL(ZSTD_CStreamOutSize);
-
-EXPORT_SYMBOL(ZSTD_getCParams);
-EXPORT_SYMBOL(ZSTD_getParams);
-EXPORT_SYMBOL(ZSTD_checkCParams);
-EXPORT_SYMBOL(ZSTD_adjustCParams);
-
-EXPORT_SYMBOL(ZSTD_compressBegin);
-EXPORT_SYMBOL(ZSTD_compressBegin_usingDict);
-EXPORT_SYMBOL(ZSTD_compressBegin_advanced);
-EXPORT_SYMBOL(ZSTD_copyCCtx);
-EXPORT_SYMBOL(ZSTD_compressBegin_usingCDict);
-EXPORT_SYMBOL(ZSTD_compressContinue);
-EXPORT_SYMBOL(ZSTD_compressEnd);
-
-EXPORT_SYMBOL(ZSTD_getBlockSizeMax);
-EXPORT_SYMBOL(ZSTD_compressBlock);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("Zstd Compressor");
diff --git a/lib/zstd/compress/fse_compress.c b/lib/zstd/compress/fse_compress.c
new file mode 100644 (file)
index 0000000..436985b
--- /dev/null
@@ -0,0 +1,625 @@
+/* ******************************************************************
+ * FSE : Finite State Entropy encoder
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include "../common/compiler.h"
+#include "../common/mem.h"        /* U32, U16, etc. */
+#include "../common/debug.h"      /* assert, DEBUGLOG */
+#include "hist.h"       /* HIST_count_wksp */
+#include "../common/bitstream.h"
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#include "../common/error_private.h"
+#define ZSTD_DEPS_NEED_MALLOC
+#define ZSTD_DEPS_NEED_MATH64
+#include "../common/zstd_deps.h"  /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define FSE_isError ERR_isError
+
+
+/* **************************************************************
+*  Templates
+****************************************************************/
+/*
+  designed to be included
+  for type-specific functions (template emulation in C)
+  Objective is to write these functions only once, for improved maintenance
+*/
+
+/* safety checks */
+#ifndef FSE_FUNCTION_EXTENSION
+#  error "FSE_FUNCTION_EXTENSION must be defined"
+#endif
+#ifndef FSE_FUNCTION_TYPE
+#  error "FSE_FUNCTION_TYPE must be defined"
+#endif
+
+/* Function names */
+#define FSE_CAT(X,Y) X##Y
+#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
+#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
+
+
+/* Function templates */
+
+/* FSE_buildCTable_wksp() :
+ * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
+ * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
+ * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
+ */
+size_t FSE_buildCTable_wksp(FSE_CTable* ct,
+                      const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
+                            void* workSpace, size_t wkspSize)
+{
+    U32 const tableSize = 1 << tableLog;
+    U32 const tableMask = tableSize - 1;
+    void* const ptr = ct;
+    U16* const tableU16 = ( (U16*) ptr) + 2;
+    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
+    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
+    U32 const step = FSE_TABLESTEP(tableSize);
+
+    U32* cumul = (U32*)workSpace;
+    FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
+
+    U32 highThreshold = tableSize-1;
+
+    if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
+    if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
+    /* CTable header */
+    tableU16[-2] = (U16) tableLog;
+    tableU16[-1] = (U16) maxSymbolValue;
+    assert(tableLog < 16);   /* required for threshold strategy to work */
+
+    /* For explanations on how to distribute symbol values over the table :
+     * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
+
+     #ifdef __clang_analyzer__
+     ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize);   /* useless initialization, just to keep scan-build happy */
+     #endif
+
+    /* symbol start positions */
+    {   U32 u;
+        cumul[0] = 0;
+        for (u=1; u <= maxSymbolValue+1; u++) {
+            if (normalizedCounter[u-1]==-1) {  /* Low proba symbol */
+                cumul[u] = cumul[u-1] + 1;
+                tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
+            } else {
+                cumul[u] = cumul[u-1] + normalizedCounter[u-1];
+        }   }
+        cumul[maxSymbolValue+1] = tableSize+1;
+    }
+
+    /* Spread symbols */
+    {   U32 position = 0;
+        U32 symbol;
+        for (symbol=0; symbol<=maxSymbolValue; symbol++) {
+            int nbOccurrences;
+            int const freq = normalizedCounter[symbol];
+            for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
+                tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
+                position = (position + step) & tableMask;
+                while (position > highThreshold)
+                    position = (position + step) & tableMask;   /* Low proba area */
+        }   }
+
+        assert(position==0);  /* Must have initialized all positions */
+    }
+
+    /* Build table */
+    {   U32 u; for (u=0; u<tableSize; u++) {
+        FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
+        tableU16[cumul[s]++] = (U16) (tableSize+u);   /* TableU16 : sorted by symbol order; gives next state value */
+    }   }
+
+    /* Build Symbol Transformation Table */
+    {   unsigned total = 0;
+        unsigned s;
+        for (s=0; s<=maxSymbolValue; s++) {
+            switch (normalizedCounter[s])
+            {
+            case  0:
+                /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
+                symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
+                break;
+
+            case -1:
+            case  1:
+                symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
+                symbolTT[s].deltaFindState = total - 1;
+                total ++;
+                break;
+            default :
+                {
+                    U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
+                    U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
+                    symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
+                    symbolTT[s].deltaFindState = total - normalizedCounter[s];
+                    total +=  normalizedCounter[s];
+    }   }   }   }
+
+#if 0  /* debug : symbol costs */
+    DEBUGLOG(5, "\n --- table statistics : ");
+    {   U32 symbol;
+        for (symbol=0; symbol<=maxSymbolValue; symbol++) {
+            DEBUGLOG(5, "%3u: w=%3i,   maxBits=%u, fracBits=%.2f",
+                symbol, normalizedCounter[symbol],
+                FSE_getMaxNbBits(symbolTT, symbol),
+                (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
+        }
+    }
+#endif
+
+    return 0;
+}
+
+
+
+
+#ifndef FSE_COMMONDEFS_ONLY
+
+
+/*-**************************************************************
+*  FSE NCount encoding
+****************************************************************/
+size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
+    return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
+}
+
+static size_t
+FSE_writeNCount_generic (void* header, size_t headerBufferSize,
+                   const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
+                         unsigned writeIsSafe)
+{
+    BYTE* const ostart = (BYTE*) header;
+    BYTE* out = ostart;
+    BYTE* const oend = ostart + headerBufferSize;
+    int nbBits;
+    const int tableSize = 1 << tableLog;
+    int remaining;
+    int threshold;
+    U32 bitStream = 0;
+    int bitCount = 0;
+    unsigned symbol = 0;
+    unsigned const alphabetSize = maxSymbolValue + 1;
+    int previousIs0 = 0;
+
+    /* Table Size */
+    bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
+    bitCount  += 4;
+
+    /* Init */
+    remaining = tableSize+1;   /* +1 for extra accuracy */
+    threshold = tableSize;
+    nbBits = tableLog+1;
+
+    while ((symbol < alphabetSize) && (remaining>1)) {  /* stops at 1 */
+        if (previousIs0) {
+            unsigned start = symbol;
+            while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
+            if (symbol == alphabetSize) break;   /* incorrect distribution */
+            while (symbol >= start+24) {
+                start+=24;
+                bitStream += 0xFFFFU << bitCount;
+                if ((!writeIsSafe) && (out > oend-2))
+                    return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+                out[0] = (BYTE) bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out+=2;
+                bitStream>>=16;
+            }
+            while (symbol >= start+3) {
+                start+=3;
+                bitStream += 3 << bitCount;
+                bitCount += 2;
+            }
+            bitStream += (symbol-start) << bitCount;
+            bitCount += 2;
+            if (bitCount>16) {
+                if ((!writeIsSafe) && (out > oend - 2))
+                    return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+                out[0] = (BYTE)bitStream;
+                out[1] = (BYTE)(bitStream>>8);
+                out += 2;
+                bitStream >>= 16;
+                bitCount -= 16;
+        }   }
+        {   int count = normalizedCounter[symbol++];
+            int const max = (2*threshold-1) - remaining;
+            remaining -= count < 0 ? -count : count;
+            count++;   /* +1 for extra accuracy */
+            if (count>=threshold)
+                count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
+            bitStream += count << bitCount;
+            bitCount  += nbBits;
+            bitCount  -= (count<max);
+            previousIs0  = (count==1);
+            if (remaining<1) return ERROR(GENERIC);
+            while (remaining<threshold) { nbBits--; threshold>>=1; }
+        }
+        if (bitCount>16) {
+            if ((!writeIsSafe) && (out > oend - 2))
+                return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+            out[0] = (BYTE)bitStream;
+            out[1] = (BYTE)(bitStream>>8);
+            out += 2;
+            bitStream >>= 16;
+            bitCount -= 16;
+    }   }
+
+    if (remaining != 1)
+        return ERROR(GENERIC);  /* incorrect normalized distribution */
+    assert(symbol <= alphabetSize);
+
+    /* flush remaining bitStream */
+    if ((!writeIsSafe) && (out > oend - 2))
+        return ERROR(dstSize_tooSmall);   /* Buffer overflow */
+    out[0] = (BYTE)bitStream;
+    out[1] = (BYTE)(bitStream>>8);
+    out+= (bitCount+7) /8;
+
+    return (out-ostart);
+}
+
+
+size_t FSE_writeNCount (void* buffer, size_t bufferSize,
+                  const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
+{
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported */
+    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
+
+    if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
+        return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
+
+    return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
+}
+
+
+/*-**************************************************************
+*  FSE Compression Code
+****************************************************************/
+
+FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
+{
+    size_t size;
+    if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
+    size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
+    return (FSE_CTable*)ZSTD_malloc(size);
+}
+
+void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); }
+
+/* provides the minimum logSize to safely represent a distribution */
+static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
+{
+    U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
+    U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
+    U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
+    assert(srcSize > 1); /* Not supported, RLE should be used instead */
+    return minBits;
+}
+
+unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
+{
+    U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
+    U32 tableLog = maxTableLog;
+    U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
+    assert(srcSize > 1); /* Not supported, RLE should be used instead */
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+    if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
+    if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
+    if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
+    if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
+    return tableLog;
+}
+
+unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
+}
+
+/* Secondary normalization method.
+   To be used when primary method fails. */
+
+static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount)
+{
+    short const NOT_YET_ASSIGNED = -2;
+    U32 s;
+    U32 distributed = 0;
+    U32 ToDistribute;
+
+    /* Init */
+    U32 const lowThreshold = (U32)(total >> tableLog);
+    U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
+
+    for (s=0; s<=maxSymbolValue; s++) {
+        if (count[s] == 0) {
+            norm[s]=0;
+            continue;
+        }
+        if (count[s] <= lowThreshold) {
+            norm[s] = lowProbCount;
+            distributed++;
+            total -= count[s];
+            continue;
+        }
+        if (count[s] <= lowOne) {
+            norm[s] = 1;
+            distributed++;
+            total -= count[s];
+            continue;
+        }
+
+        norm[s]=NOT_YET_ASSIGNED;
+    }
+    ToDistribute = (1 << tableLog) - distributed;
+
+    if (ToDistribute == 0)
+        return 0;
+
+    if ((total / ToDistribute) > lowOne) {
+        /* risk of rounding to zero */
+        lowOne = (U32)((total * 3) / (ToDistribute * 2));
+        for (s=0; s<=maxSymbolValue; s++) {
+            if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
+                norm[s] = 1;
+                distributed++;
+                total -= count[s];
+                continue;
+        }   }
+        ToDistribute = (1 << tableLog) - distributed;
+    }
+
+    if (distributed == maxSymbolValue+1) {
+        /* all values are pretty poor;
+           probably incompressible data (should have already been detected);
+           find max, then give all remaining points to max */
+        U32 maxV = 0, maxC = 0;
+        for (s=0; s<=maxSymbolValue; s++)
+            if (count[s] > maxC) { maxV=s; maxC=count[s]; }
+        norm[maxV] += (short)ToDistribute;
+        return 0;
+    }
+
+    if (total == 0) {
+        /* all of the symbols were low enough for the lowOne or lowThreshold */
+        for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
+            if (norm[s] > 0) { ToDistribute--; norm[s]++; }
+        return 0;
+    }
+
+    {   U64 const vStepLog = 62 - tableLog;
+        U64 const mid = (1ULL << (vStepLog-1)) - 1;
+        U64 const rStep = ZSTD_div64((((U64)1<<vStepLog) * ToDistribute) + mid, (U32)total);   /* scale on remaining */
+        U64 tmpTotal = mid;
+        for (s=0; s<=maxSymbolValue; s++) {
+            if (norm[s]==NOT_YET_ASSIGNED) {
+                U64 const end = tmpTotal + (count[s] * rStep);
+                U32 const sStart = (U32)(tmpTotal >> vStepLog);
+                U32 const sEnd = (U32)(end >> vStepLog);
+                U32 const weight = sEnd - sStart;
+                if (weight < 1)
+                    return ERROR(GENERIC);
+                norm[s] = (short)weight;
+                tmpTotal = end;
+    }   }   }
+
+    return 0;
+}
+
+size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
+                           const unsigned* count, size_t total,
+                           unsigned maxSymbolValue, unsigned useLowProbCount)
+{
+    /* Sanity checks */
+    if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
+    if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported size */
+    if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
+    if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
+
+    {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
+        short const lowProbCount = useLowProbCount ? -1 : 1;
+        U64 const scale = 62 - tableLog;
+        U64 const step = ZSTD_div64((U64)1<<62, (U32)total);   /* <== here, one division ! */
+        U64 const vStep = 1ULL<<(scale-20);
+        int stillToDistribute = 1<<tableLog;
+        unsigned s;
+        unsigned largest=0;
+        short largestP=0;
+        U32 lowThreshold = (U32)(total >> tableLog);
+
+        for (s=0; s<=maxSymbolValue; s++) {
+            if (count[s] == total) return 0;   /* rle special case */
+            if (count[s] == 0) { normalizedCounter[s]=0; continue; }
+            if (count[s] <= lowThreshold) {
+                normalizedCounter[s] = lowProbCount;
+                stillToDistribute--;
+            } else {
+                short proba = (short)((count[s]*step) >> scale);
+                if (proba<8) {
+                    U64 restToBeat = vStep * rtbTable[proba];
+                    proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
+                }
+                if (proba > largestP) { largestP=proba; largest=s; }
+                normalizedCounter[s] = proba;
+                stillToDistribute -= proba;
+        }   }
+        if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
+            /* corner case, need another normalization method */
+            size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount);
+            if (FSE_isError(errorCode)) return errorCode;
+        }
+        else normalizedCounter[largest] += (short)stillToDistribute;
+    }
+
+#if 0
+    {   /* Print Table (debug) */
+        U32 s;
+        U32 nTotal = 0;
+        for (s=0; s<=maxSymbolValue; s++)
+            RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
+        for (s=0; s<=maxSymbolValue; s++)
+            nTotal += abs(normalizedCounter[s]);
+        if (nTotal != (1U<<tableLog))
+            RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
+        getchar();
+    }
+#endif
+
+    return tableLog;
+}
+
+
+/* fake FSE_CTable, for raw (uncompressed) input */
+size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
+{
+    const unsigned tableSize = 1 << nbBits;
+    const unsigned tableMask = tableSize - 1;
+    const unsigned maxSymbolValue = tableMask;
+    void* const ptr = ct;
+    U16* const tableU16 = ( (U16*) ptr) + 2;
+    void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1);   /* assumption : tableLog >= 1 */
+    FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
+    unsigned s;
+
+    /* Sanity checks */
+    if (nbBits < 1) return ERROR(GENERIC);             /* min size */
+
+    /* header */
+    tableU16[-2] = (U16) nbBits;
+    tableU16[-1] = (U16) maxSymbolValue;
+
+    /* Build table */
+    for (s=0; s<tableSize; s++)
+        tableU16[s] = (U16)(tableSize + s);
+
+    /* Build Symbol Transformation Table */
+    {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
+        for (s=0; s<=maxSymbolValue; s++) {
+            symbolTT[s].deltaNbBits = deltaNbBits;
+            symbolTT[s].deltaFindState = s-1;
+    }   }
+
+    return 0;
+}
+
+/* fake FSE_CTable, for rle input (always same symbol) */
+size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
+{
+    void* ptr = ct;
+    U16* tableU16 = ( (U16*) ptr) + 2;
+    void* FSCTptr = (U32*)ptr + 2;
+    FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
+
+    /* header */
+    tableU16[-2] = (U16) 0;
+    tableU16[-1] = (U16) symbolValue;
+
+    /* Build table */
+    tableU16[0] = 0;
+    tableU16[1] = 0;   /* just in case */
+
+    /* Build Symbol Transformation Table */
+    symbolTT[symbolValue].deltaNbBits = 0;
+    symbolTT[symbolValue].deltaFindState = 0;
+
+    return 0;
+}
+
+
+static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
+                           const void* src, size_t srcSize,
+                           const FSE_CTable* ct, const unsigned fast)
+{
+    const BYTE* const istart = (const BYTE*) src;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* ip=iend;
+
+    BIT_CStream_t bitC;
+    FSE_CState_t CState1, CState2;
+
+    /* init */
+    if (srcSize <= 2) return 0;
+    { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
+      if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
+
+#define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
+
+    if (srcSize & 1) {
+        FSE_initCState2(&CState1, ct, *--ip);
+        FSE_initCState2(&CState2, ct, *--ip);
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        FSE_FLUSHBITS(&bitC);
+    } else {
+        FSE_initCState2(&CState2, ct, *--ip);
+        FSE_initCState2(&CState1, ct, *--ip);
+    }
+
+    /* join to mod 4 */
+    srcSize -= 2;
+    if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) {  /* test bit 2 */
+        FSE_encodeSymbol(&bitC, &CState2, *--ip);
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        FSE_FLUSHBITS(&bitC);
+    }
+
+    /* 2 or 4 encoding per loop */
+    while ( ip>istart ) {
+
+        FSE_encodeSymbol(&bitC, &CState2, *--ip);
+
+        if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
+            FSE_FLUSHBITS(&bitC);
+
+        FSE_encodeSymbol(&bitC, &CState1, *--ip);
+
+        if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) {  /* this test must be static */
+            FSE_encodeSymbol(&bitC, &CState2, *--ip);
+            FSE_encodeSymbol(&bitC, &CState1, *--ip);
+        }
+
+        FSE_FLUSHBITS(&bitC);
+    }
+
+    FSE_flushCState(&bitC, &CState2);
+    FSE_flushCState(&bitC, &CState1);
+    return BIT_closeCStream(&bitC);
+}
+
+size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
+                           const void* src, size_t srcSize,
+                           const FSE_CTable* ct)
+{
+    unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
+
+    if (fast)
+        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
+    else
+        return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
+}
+
+
+size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
+
+
+#endif   /* FSE_COMMONDEFS_ONLY */
diff --git a/lib/zstd/compress/hist.c b/lib/zstd/compress/hist.c
new file mode 100644 (file)
index 0000000..3ddc6df
--- /dev/null
@@ -0,0 +1,165 @@
+/* ******************************************************************
+ * hist : Histogram functions
+ * part of Finite State Entropy project
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* --- dependencies --- */
+#include "../common/mem.h"             /* U32, BYTE, etc. */
+#include "../common/debug.h"           /* assert, DEBUGLOG */
+#include "../common/error_private.h"   /* ERROR */
+#include "hist.h"
+
+
+/* --- Error management --- */
+unsigned HIST_isError(size_t code) { return ERR_isError(code); }
+
+/*-**************************************************************
+ *  Histogram functions
+ ****************************************************************/
+unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
+                           const void* src, size_t srcSize)
+{
+    const BYTE* ip = (const BYTE*)src;
+    const BYTE* const end = ip + srcSize;
+    unsigned maxSymbolValue = *maxSymbolValuePtr;
+    unsigned largestCount=0;
+
+    ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
+    if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
+
+    while (ip<end) {
+        assert(*ip <= maxSymbolValue);
+        count[*ip++]++;
+    }
+
+    while (!count[maxSymbolValue]) maxSymbolValue--;
+    *maxSymbolValuePtr = maxSymbolValue;
+
+    {   U32 s;
+        for (s=0; s<=maxSymbolValue; s++)
+            if (count[s] > largestCount) largestCount = count[s];
+    }
+
+    return largestCount;
+}
+
+typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
+
+/* HIST_count_parallel_wksp() :
+ * store histogram into 4 intermediate tables, recombined at the end.
+ * this design makes better use of OoO cpus,
+ * and is noticeably faster when some values are heavily repeated.
+ * But it needs some additional workspace for intermediate tables.
+ * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32.
+ * @return : largest histogram frequency,
+ *           or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */
+static size_t HIST_count_parallel_wksp(
+                                unsigned* count, unsigned* maxSymbolValuePtr,
+                                const void* source, size_t sourceSize,
+                                HIST_checkInput_e check,
+                                U32* const workSpace)
+{
+    const BYTE* ip = (const BYTE*)source;
+    const BYTE* const iend = ip+sourceSize;
+    size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count);
+    unsigned max=0;
+    U32* const Counting1 = workSpace;
+    U32* const Counting2 = Counting1 + 256;
+    U32* const Counting3 = Counting2 + 256;
+    U32* const Counting4 = Counting3 + 256;
+
+    /* safety checks */
+    assert(*maxSymbolValuePtr <= 255);
+    if (!sourceSize) {
+        ZSTD_memset(count, 0, countSize);
+        *maxSymbolValuePtr = 0;
+        return 0;
+    }
+    ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned));
+
+    /* by stripes of 16 bytes */
+    {   U32 cached = MEM_read32(ip); ip += 4;
+        while (ip < iend-15) {
+            U32 c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+            c = cached; cached = MEM_read32(ip); ip += 4;
+            Counting1[(BYTE) c     ]++;
+            Counting2[(BYTE)(c>>8) ]++;
+            Counting3[(BYTE)(c>>16)]++;
+            Counting4[       c>>24 ]++;
+        }
+        ip-=4;
+    }
+
+    /* finish last symbols */
+    while (ip<iend) Counting1[*ip++]++;
+
+    {   U32 s;
+        for (s=0; s<256; s++) {
+            Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
+            if (Counting1[s] > max) max = Counting1[s];
+    }   }
+
+    {   unsigned maxSymbolValue = 255;
+        while (!Counting1[maxSymbolValue]) maxSymbolValue--;
+        if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall);
+        *maxSymbolValuePtr = maxSymbolValue;
+        ZSTD_memmove(count, Counting1, countSize);   /* in case count & Counting1 are overlapping */
+    }
+    return (size_t)max;
+}
+
+/* HIST_countFast_wksp() :
+ * Same as HIST_countFast(), but using an externally provided scratch buffer.
+ * `workSpace` is a writable buffer which must be 4-bytes aligned,
+ * `workSpaceSize` must be >= HIST_WKSP_SIZE
+ */
+size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                          const void* source, size_t sourceSize,
+                          void* workSpace, size_t workSpaceSize)
+{
+    if (sourceSize < 1500) /* heuristic threshold */
+        return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
+    if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
+    return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
+}
+
+/* HIST_count_wksp() :
+ * Same as HIST_count(), but using an externally provided scratch buffer.
+ * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
+size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                       const void* source, size_t sourceSize,
+                       void* workSpace, size_t workSpaceSize)
+{
+    if ((size_t)workSpace & 3) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
+    if (*maxSymbolValuePtr < 255)
+        return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
+    *maxSymbolValuePtr = 255;
+    return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
+}
+
diff --git a/lib/zstd/compress/hist.h b/lib/zstd/compress/hist.h
new file mode 100644 (file)
index 0000000..fc1830a
--- /dev/null
@@ -0,0 +1,75 @@
+/* ******************************************************************
+ * hist : Histogram functions
+ * part of Finite State Entropy project
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* --- dependencies --- */
+#include "../common/zstd_deps.h"   /* size_t */
+
+
+/* --- simple histogram functions --- */
+
+/*! HIST_count():
+ *  Provides the precise count of each byte within a table 'count'.
+ * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
+ *  Updates *maxSymbolValuePtr with actual largest symbol value detected.
+ * @return : count of the most frequent symbol (which isn't identified).
+ *           or an error code, which can be tested using HIST_isError().
+ *           note : if return == srcSize, there is only one symbol.
+ */
+size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
+                  const void* src, size_t srcSize);
+
+unsigned HIST_isError(size_t code);  /*< tells if a return value is an error code */
+
+
+/* --- advanced histogram functions --- */
+
+#define HIST_WKSP_SIZE_U32 1024
+#define HIST_WKSP_SIZE    (HIST_WKSP_SIZE_U32 * sizeof(unsigned))
+/* HIST_count_wksp() :
+ *  Same as HIST_count(), but using an externally provided scratch buffer.
+ *  Benefit is this function will use very little stack space.
+ * `workSpace` is a writable buffer which must be 4-bytes aligned,
+ * `workSpaceSize` must be >= HIST_WKSP_SIZE
+ */
+size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                       const void* src, size_t srcSize,
+                       void* workSpace, size_t workSpaceSize);
+
+/* HIST_countFast() :
+ *  same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
+ *  This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`
+ */
+size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
+                      const void* src, size_t srcSize);
+
+/* HIST_countFast_wksp() :
+ *  Same as HIST_countFast(), but using an externally provided scratch buffer.
+ * `workSpace` is a writable buffer which must be 4-bytes aligned,
+ * `workSpaceSize` must be >= HIST_WKSP_SIZE
+ */
+size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
+                           const void* src, size_t srcSize,
+                           void* workSpace, size_t workSpaceSize);
+
+/*! HIST_count_simple() :
+ *  Same as HIST_countFast(), this function is unsafe,
+ *  and will segfault if any value within `src` is `> *maxSymbolValuePtr`.
+ *  It is also a bit slower for large inputs.
+ *  However, it does not need any additional memory (not even on stack).
+ * @return : count of the most frequent symbol.
+ *  Note this function doesn't produce any error (i.e. it must succeed).
+ */
+unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
+                           const void* src, size_t srcSize);
diff --git a/lib/zstd/compress/huf_compress.c b/lib/zstd/compress/huf_compress.c
new file mode 100644 (file)
index 0000000..f76a526
--- /dev/null
@@ -0,0 +1,905 @@
+/* ******************************************************************
+ * Huffman encoder, part of New Generation Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+*  Compiler specifics
+****************************************************************/
+
+
+/* **************************************************************
+*  Includes
+****************************************************************/
+#include "../common/zstd_deps.h"     /* ZSTD_memcpy, ZSTD_memset */
+#include "../common/compiler.h"
+#include "../common/bitstream.h"
+#include "hist.h"
+#define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
+#include "../common/fse.h"        /* header compression */
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "../common/error_private.h"
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_isError ERR_isError
+#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
+
+
+/* **************************************************************
+*  Utils
+****************************************************************/
+unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
+{
+    return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
+}
+
+
+/* *******************************************************
+*  HUF : Huffman block compression
+*********************************************************/
+/* HUF_compressWeights() :
+ * Same as FSE_compress(), but dedicated to huff0's weights compression.
+ * The use case needs much less stack memory.
+ * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
+ */
+#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
+
+typedef struct {
+    FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
+    U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
+    unsigned count[HUF_TABLELOG_MAX+1];
+    S16 norm[HUF_TABLELOG_MAX+1];
+} HUF_CompressWeightsWksp;
+
+static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize)
+{
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* op = ostart;
+    BYTE* const oend = ostart + dstSize;
+
+    unsigned maxSymbolValue = HUF_TABLELOG_MAX;
+    U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
+    HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
+
+    if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
+
+    /* init conditions */
+    if (wtSize <= 1) return 0;  /* Not compressible */
+
+    /* Scan input and build symbol stats */
+    {   unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
+        if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
+        if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
+    }
+
+    tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
+    CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
+
+    /* Write table description header */
+    {   CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
+        op += hSize;
+    }
+
+    /* Compress */
+    CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
+    {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
+        if (cSize == 0) return 0;   /* not enough space for compressed data */
+        op += cSize;
+    }
+
+    return (size_t)(op-ostart);
+}
+
+
+typedef struct {
+    HUF_CompressWeightsWksp wksp;
+    BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
+} HUF_WriteCTableWksp;
+
+size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
+                            const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
+                            void* workspace, size_t workspaceSize)
+{
+    BYTE* op = (BYTE*)dst;
+    U32 n;
+    HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
+
+    /* check conditions */
+    if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
+
+    /* convert to weight */
+    wksp->bitsToWeight[0] = 0;
+    for (n=1; n<huffLog+1; n++)
+        wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
+    for (n=0; n<maxSymbolValue; n++)
+        wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];
+
+    /* attempt weights compression by FSE */
+    {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
+        if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
+            op[0] = (BYTE)hSize;
+            return hSize+1;
+    }   }
+
+    /* write raw values as 4-bits (max : 15) */
+    if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
+    if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
+    op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
+    wksp->huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
+    for (n=0; n<maxSymbolValue; n+=2)
+        op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
+    return ((maxSymbolValue+1)/2) + 1;
+}
+
+/*! HUF_writeCTable() :
+    `CTable` : Huffman tree to save, using huf representation.
+    @return : size of saved CTable */
+size_t HUF_writeCTable (void* dst, size_t maxDstSize,
+                        const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
+{
+    HUF_WriteCTableWksp wksp;
+    return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp));
+}
+
+
+size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights)
+{
+    BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
+    U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
+    U32 tableLog = 0;
+    U32 nbSymbols = 0;
+
+    /* get symbol weights */
+    CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
+    *hasZeroWeights = (rankVal[0] > 0);
+
+    /* check result */
+    if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
+
+    /* Prepare base value per rank */
+    {   U32 n, nextRankStart = 0;
+        for (n=1; n<=tableLog; n++) {
+            U32 curr = nextRankStart;
+            nextRankStart += (rankVal[n] << (n-1));
+            rankVal[n] = curr;
+    }   }
+
+    /* fill nbBits */
+    {   U32 n; for (n=0; n<nbSymbols; n++) {
+            const U32 w = huffWeight[n];
+            CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
+    }   }
+
+    /* fill val */
+    {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
+        U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
+        { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
+        /* determine stating value per rank */
+        valPerRank[tableLog+1] = 0;   /* for w==0 */
+        {   U16 min = 0;
+            U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
+                valPerRank[n] = min;     /* get starting value within each rank */
+                min += nbPerRank[n];
+                min >>= 1;
+        }   }
+        /* assign value within rank, symbol order */
+        { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
+    }
+
+    *maxSymbolValuePtr = nbSymbols - 1;
+    return readSize;
+}
+
+U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
+{
+    const HUF_CElt* table = (const HUF_CElt*)symbolTable;
+    assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
+    return table[symbolValue].nbBits;
+}
+
+
+typedef struct nodeElt_s {
+    U32 count;
+    U16 parent;
+    BYTE byte;
+    BYTE nbBits;
+} nodeElt;
+
+/*
+ * HUF_setMaxHeight():
+ * Enforces maxNbBits on the Huffman tree described in huffNode.
+ *
+ * It sets all nodes with nbBits > maxNbBits to be maxNbBits. Then it adjusts
+ * the tree to so that it is a valid canonical Huffman tree.
+ *
+ * @pre               The sum of the ranks of each symbol == 2^largestBits,
+ *                    where largestBits == huffNode[lastNonNull].nbBits.
+ * @post              The sum of the ranks of each symbol == 2^largestBits,
+ *                    where largestBits is the return value <= maxNbBits.
+ *
+ * @param huffNode    The Huffman tree modified in place to enforce maxNbBits.
+ * @param lastNonNull The symbol with the lowest count in the Huffman tree.
+ * @param maxNbBits   The maximum allowed number of bits, which the Huffman tree
+ *                    may not respect. After this function the Huffman tree will
+ *                    respect maxNbBits.
+ * @return            The maximum number of bits of the Huffman tree after adjustment,
+ *                    necessarily no more than maxNbBits.
+ */
+static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
+{
+    const U32 largestBits = huffNode[lastNonNull].nbBits;
+    /* early exit : no elt > maxNbBits, so the tree is already valid. */
+    if (largestBits <= maxNbBits) return largestBits;
+
+    /* there are several too large elements (at least >= 2) */
+    {   int totalCost = 0;
+        const U32 baseCost = 1 << (largestBits - maxNbBits);
+        int n = (int)lastNonNull;
+
+        /* Adjust any ranks > maxNbBits to maxNbBits.
+         * Compute totalCost, which is how far the sum of the ranks is
+         * we are over 2^largestBits after adjust the offending ranks.
+         */
+        while (huffNode[n].nbBits > maxNbBits) {
+            totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
+            huffNode[n].nbBits = (BYTE)maxNbBits;
+            n--;
+        }
+        /* n stops at huffNode[n].nbBits <= maxNbBits */
+        assert(huffNode[n].nbBits <= maxNbBits);
+        /* n end at index of smallest symbol using < maxNbBits */
+        while (huffNode[n].nbBits == maxNbBits) --n;
+
+        /* renorm totalCost from 2^largestBits to 2^maxNbBits
+         * note : totalCost is necessarily a multiple of baseCost */
+        assert((totalCost & (baseCost - 1)) == 0);
+        totalCost >>= (largestBits - maxNbBits);
+        assert(totalCost > 0);
+
+        /* repay normalized cost */
+        {   U32 const noSymbol = 0xF0F0F0F0;
+            U32 rankLast[HUF_TABLELOG_MAX+2];
+
+            /* Get pos of last (smallest = lowest cum. count) symbol per rank */
+            ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
+            {   U32 currentNbBits = maxNbBits;
+                int pos;
+                for (pos=n ; pos >= 0; pos--) {
+                    if (huffNode[pos].nbBits >= currentNbBits) continue;
+                    currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
+                    rankLast[maxNbBits-currentNbBits] = (U32)pos;
+            }   }
+
+            while (totalCost > 0) {
+                /* Try to reduce the next power of 2 above totalCost because we
+                 * gain back half the rank.
+                 */
+                U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
+                for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
+                    U32 const highPos = rankLast[nBitsToDecrease];
+                    U32 const lowPos = rankLast[nBitsToDecrease-1];
+                    if (highPos == noSymbol) continue;
+                    /* Decrease highPos if no symbols of lowPos or if it is
+                     * not cheaper to remove 2 lowPos than highPos.
+                     */
+                    if (lowPos == noSymbol) break;
+                    {   U32 const highTotal = huffNode[highPos].count;
+                        U32 const lowTotal = 2 * huffNode[lowPos].count;
+                        if (highTotal <= lowTotal) break;
+                }   }
+                /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
+                assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
+                /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
+                while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
+                    nBitsToDecrease++;
+                assert(rankLast[nBitsToDecrease] != noSymbol);
+                /* Increase the number of bits to gain back half the rank cost. */
+                totalCost -= 1 << (nBitsToDecrease-1);
+                huffNode[rankLast[nBitsToDecrease]].nbBits++;
+
+                /* Fix up the new rank.
+                 * If the new rank was empty, this symbol is now its smallest.
+                 * Otherwise, this symbol will be the largest in the new rank so no adjustment.
+                 */
+                if (rankLast[nBitsToDecrease-1] == noSymbol)
+                    rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
+                /* Fix up the old rank.
+                 * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
+                 * it must be the only symbol in its rank, so the old rank now has no symbols.
+                 * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
+                 * the smallest node in the rank. If the previous position belongs to a different rank,
+                 * then the rank is now empty.
+                 */
+                if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
+                    rankLast[nBitsToDecrease] = noSymbol;
+                else {
+                    rankLast[nBitsToDecrease]--;
+                    if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
+                        rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
+                }
+            }   /* while (totalCost > 0) */
+
+            /* If we've removed too much weight, then we have to add it back.
+             * To avoid overshooting again, we only adjust the smallest rank.
+             * We take the largest nodes from the lowest rank 0 and move them
+             * to rank 1. There's guaranteed to be enough rank 0 symbols because
+             * TODO.
+             */
+            while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
+                /* special case : no rank 1 symbol (using maxNbBits-1);
+                 * let's create one from largest rank 0 (using maxNbBits).
+                 */
+                if (rankLast[1] == noSymbol) {
+                    while (huffNode[n].nbBits == maxNbBits) n--;
+                    huffNode[n+1].nbBits--;
+                    assert(n >= 0);
+                    rankLast[1] = (U32)(n+1);
+                    totalCost++;
+                    continue;
+                }
+                huffNode[ rankLast[1] + 1 ].nbBits--;
+                rankLast[1]++;
+                totalCost ++;
+            }
+        }   /* repay normalized cost */
+    }   /* there are several too large elements (at least >= 2) */
+
+    return maxNbBits;
+}
+
+typedef struct {
+    U32 base;
+    U32 curr;
+} rankPos;
+
+typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
+
+#define RANK_POSITION_TABLE_SIZE 32
+
+typedef struct {
+  huffNodeTable huffNodeTbl;
+  rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
+} HUF_buildCTable_wksp_tables;
+
+/*
+ * HUF_sort():
+ * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
+ *
+ * @param[out] huffNode       Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
+ *                            Must have (maxSymbolValue + 1) entries.
+ * @param[in]  count          Histogram of the symbols.
+ * @param[in]  maxSymbolValue Maximum symbol value.
+ * @param      rankPosition   This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
+ */
+static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
+{
+    int n;
+    int const maxSymbolValue1 = (int)maxSymbolValue + 1;
+
+    /* Compute base and set curr to base.
+     * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1.
+     * Then 2^lowerRank <= count[n]+1 <= 2^rank.
+     * We attribute each symbol to lowerRank's base value, because we want to know where
+     * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
+     */
+    ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
+    for (n = 0; n < maxSymbolValue1; ++n) {
+        U32 lowerRank = BIT_highbit32(count[n] + 1);
+        rankPosition[lowerRank].base++;
+    }
+    assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
+    for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
+        rankPosition[n-1].base += rankPosition[n].base;
+        rankPosition[n-1].curr = rankPosition[n-1].base;
+    }
+    /* Sort */
+    for (n = 0; n < maxSymbolValue1; ++n) {
+        U32 const c = count[n];
+        U32 const r = BIT_highbit32(c+1) + 1;
+        U32 pos = rankPosition[r].curr++;
+        /* Insert into the correct position in the rank.
+         * We have at most 256 symbols, so this insertion should be fine.
+         */
+        while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
+            huffNode[pos] = huffNode[pos-1];
+            pos--;
+        }
+        huffNode[pos].count = c;
+        huffNode[pos].byte  = (BYTE)n;
+    }
+}
+
+
+/* HUF_buildCTable_wksp() :
+ *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
+ *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables).
+ */
+#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
+
+/* HUF_buildTree():
+ * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
+ *
+ * @param huffNode        The array sorted by HUF_sort(). Builds the Huffman tree in this array.
+ * @param maxSymbolValue  The maximum symbol value.
+ * @return                The smallest node in the Huffman tree (by count).
+ */
+static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
+{
+    nodeElt* const huffNode0 = huffNode - 1;
+    int nonNullRank;
+    int lowS, lowN;
+    int nodeNb = STARTNODE;
+    int n, nodeRoot;
+    /* init for parents */
+    nonNullRank = (int)maxSymbolValue;
+    while(huffNode[nonNullRank].count == 0) nonNullRank--;
+    lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
+    huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
+    huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb;
+    nodeNb++; lowS-=2;
+    for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
+    huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */
+
+    /* create parents */
+    while (nodeNb <= nodeRoot) {
+        int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
+        huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
+        huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb;
+        nodeNb++;
+    }
+
+    /* distribute weights (unlimited tree height) */
+    huffNode[nodeRoot].nbBits = 0;
+    for (n=nodeRoot-1; n>=STARTNODE; n--)
+        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+    for (n=0; n<=nonNullRank; n++)
+        huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
+
+    return nonNullRank;
+}
+
+/*
+ * HUF_buildCTableFromTree():
+ * Build the CTable given the Huffman tree in huffNode.
+ *
+ * @param[out] CTable         The output Huffman CTable.
+ * @param      huffNode       The Huffman tree.
+ * @param      nonNullRank    The last and smallest node in the Huffman tree.
+ * @param      maxSymbolValue The maximum symbol value.
+ * @param      maxNbBits      The exact maximum number of bits used in the Huffman tree.
+ */
+static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
+{
+    /* fill result into ctable (val, nbBits) */
+    int n;
+    U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
+    U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
+    int const alphabetSize = (int)(maxSymbolValue + 1);
+    for (n=0; n<=nonNullRank; n++)
+        nbPerRank[huffNode[n].nbBits]++;
+    /* determine starting value per rank */
+    {   U16 min = 0;
+        for (n=(int)maxNbBits; n>0; n--) {
+            valPerRank[n] = min;      /* get starting value within each rank */
+            min += nbPerRank[n];
+            min >>= 1;
+    }   }
+    for (n=0; n<alphabetSize; n++)
+        CTable[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
+    for (n=0; n<alphabetSize; n++)
+        CTable[n].val = valPerRank[CTable[n].nbBits]++;   /* assign value within rank, symbol order */
+}
+
+size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
+{
+    HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
+    nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
+    nodeElt* const huffNode = huffNode0+1;
+    int nonNullRank;
+
+    /* safety checks */
+    if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
+    if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
+      return ERROR(workSpace_tooSmall);
+    if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
+      return ERROR(maxSymbolValue_tooLarge);
+    ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
+
+    /* sort, decreasing order */
+    HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
+
+    /* build tree */
+    nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
+
+    /* enforce maxTableLog */
+    maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
+    if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
+
+    HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
+
+    return maxNbBits;
+}
+
+size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
+{
+    size_t nbBits = 0;
+    int s;
+    for (s = 0; s <= (int)maxSymbolValue; ++s) {
+        nbBits += CTable[s].nbBits * count[s];
+    }
+    return nbBits >> 3;
+}
+
+int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
+  int bad = 0;
+  int s;
+  for (s = 0; s <= (int)maxSymbolValue; ++s) {
+    bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
+  }
+  return !bad;
+}
+
+size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
+
+FORCE_INLINE_TEMPLATE void
+HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
+{
+    BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
+}
+
+#define HUF_FLUSHBITS(s)  BIT_flushBits(s)
+
+#define HUF_FLUSHBITS_1(stream) \
+    if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
+
+#define HUF_FLUSHBITS_2(stream) \
+    if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
+                                   const void* src, size_t srcSize,
+                                   const HUF_CElt* CTable)
+{
+    const BYTE* ip = (const BYTE*) src;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+    size_t n;
+    BIT_CStream_t bitC;
+
+    /* init */
+    if (dstSize < 8) return 0;   /* not enough space to compress */
+    { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
+      if (HUF_isError(initErr)) return 0; }
+
+    n = srcSize & ~3;  /* join to mod 4 */
+    switch (srcSize & 3)
+    {
+        case 3:
+            HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
+            HUF_FLUSHBITS_2(&bitC);
+            ZSTD_FALLTHROUGH;
+        case 2:
+            HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
+            HUF_FLUSHBITS_1(&bitC);
+            ZSTD_FALLTHROUGH;
+        case 1:
+            HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
+            HUF_FLUSHBITS(&bitC);
+            ZSTD_FALLTHROUGH;
+        case 0: ZSTD_FALLTHROUGH;
+        default: break;
+    }
+
+    for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
+        HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
+        HUF_FLUSHBITS_1(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
+        HUF_FLUSHBITS_2(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
+        HUF_FLUSHBITS_1(&bitC);
+        HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
+        HUF_FLUSHBITS(&bitC);
+    }
+
+    return BIT_closeCStream(&bitC);
+}
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
+                                   const void* src, size_t srcSize,
+                                   const HUF_CElt* CTable)
+{
+    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+static size_t
+HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
+                                      const void* src, size_t srcSize,
+                                      const HUF_CElt* CTable)
+{
+    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+static size_t
+HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+                              const void* src, size_t srcSize,
+                              const HUF_CElt* CTable, const int bmi2)
+{
+    if (bmi2) {
+        return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
+    }
+    return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
+}
+
+#else
+
+static size_t
+HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
+                              const void* src, size_t srcSize,
+                              const HUF_CElt* CTable, const int bmi2)
+{
+    (void)bmi2;
+    return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
+}
+
+#endif
+
+size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+    return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+}
+
+
+static size_t
+HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
+                              const void* src, size_t srcSize,
+                              const HUF_CElt* CTable, int bmi2)
+{
+    size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* const iend = ip + srcSize;
+    BYTE* const ostart = (BYTE*) dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+
+    if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
+    if (srcSize < 12) return 0;   /* no saving possible : too small input */
+    op += 6;   /* jumpTable */
+
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+        if (cSize==0) return 0;
+        assert(cSize <= 65535);
+        MEM_writeLE16(ostart, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+        if (cSize==0) return 0;
+        assert(cSize <= 65535);
+        MEM_writeLE16(ostart+2, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    assert(op <= oend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
+        if (cSize==0) return 0;
+        assert(cSize <= 65535);
+        MEM_writeLE16(ostart+4, (U16)cSize);
+        op += cSize;
+    }
+
+    ip += segmentSize;
+    assert(op <= oend);
+    assert(ip <= iend);
+    {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
+        if (cSize==0) return 0;
+        op += cSize;
+    }
+
+    return (size_t)(op-ostart);
+}
+
+size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
+{
+    return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+}
+
+typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
+
+static size_t HUF_compressCTable_internal(
+                BYTE* const ostart, BYTE* op, BYTE* const oend,
+                const void* src, size_t srcSize,
+                HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
+{
+    size_t const cSize = (nbStreams==HUF_singleStream) ?
+                         HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
+                         HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
+    if (HUF_isError(cSize)) { return cSize; }
+    if (cSize==0) { return 0; }   /* uncompressible */
+    op += cSize;
+    /* check compressibility */
+    assert(op >= ostart);
+    if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
+    return (size_t)(op-ostart);
+}
+
+typedef struct {
+    unsigned count[HUF_SYMBOLVALUE_MAX + 1];
+    HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
+    union {
+        HUF_buildCTable_wksp_tables buildCTable_wksp;
+        HUF_WriteCTableWksp writeCTable_wksp;
+    } wksps;
+} HUF_compress_tables_t;
+
+/* HUF_compress_internal() :
+ * `workSpace_align4` must be aligned on 4-bytes boundaries,
+ * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */
+static size_t
+HUF_compress_internal (void* dst, size_t dstSize,
+                 const void* src, size_t srcSize,
+                       unsigned maxSymbolValue, unsigned huffLog,
+                       HUF_nbStreams_e nbStreams,
+                       void* workSpace_align4, size_t wkspSize,
+                       HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
+                 const int bmi2)
+{
+    HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart;
+
+    HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
+    assert(((size_t)workSpace_align4 & 3) == 0);   /* must be aligned on 4-bytes boundaries */
+
+    /* checks & inits */
+    if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
+    if (!srcSize) return 0;  /* Uncompressed */
+    if (!dstSize) return 0;  /* cannot fit anything within dst budget */
+    if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
+    if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
+    if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+    if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
+
+    /* Heuristic : If old table is valid, use it for small inputs */
+    if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
+        return HUF_compressCTable_internal(ostart, op, oend,
+                                           src, srcSize,
+                                           nbStreams, oldHufTable, bmi2);
+    }
+
+    /* Scan input and build symbol stats */
+    {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) );
+        if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
+        if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
+    }
+
+    /* Check validity of previous table */
+    if ( repeat
+      && *repeat == HUF_repeat_check
+      && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
+        *repeat = HUF_repeat_none;
+    }
+    /* Heuristic : use existing table for small inputs */
+    if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
+        return HUF_compressCTable_internal(ostart, op, oend,
+                                           src, srcSize,
+                                           nbStreams, oldHufTable, bmi2);
+    }
+
+    /* Build Huffman Tree */
+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+    {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
+                                            maxSymbolValue, huffLog,
+                                            &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
+        CHECK_F(maxBits);
+        huffLog = (U32)maxBits;
+        /* Zero unused symbols in CTable, so we can check it for validity */
+        ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0,
+               sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
+    }
+
+    /* Write table description header */
+    {   CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
+                                              &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
+        /* Check if using previous huffman table is beneficial */
+        if (repeat && *repeat != HUF_repeat_none) {
+            size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
+            size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
+            if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
+                return HUF_compressCTable_internal(ostart, op, oend,
+                                                   src, srcSize,
+                                                   nbStreams, oldHufTable, bmi2);
+        }   }
+
+        /* Use the new huffman table */
+        if (hSize + 12ul >= srcSize) { return 0; }
+        op += hSize;
+        if (repeat) { *repeat = HUF_repeat_none; }
+        if (oldHufTable)
+            ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
+    }
+    return HUF_compressCTable_internal(ostart, op, oend,
+                                       src, srcSize,
+                                       nbStreams, table->CTable, bmi2);
+}
+
+
+size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_singleStream,
+                                 workSpace, wkspSize,
+                                 NULL, NULL, 0, 0 /*bmi2*/);
+}
+
+size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize,
+                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_singleStream,
+                                 workSpace, wkspSize, hufTable,
+                                 repeat, preferRepeat, bmi2);
+}
+
+/* HUF_compress4X_repeat():
+ * compress input using 4 streams.
+ * provide workspace to generate compression tables */
+size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_fourStreams,
+                                 workSpace, wkspSize,
+                                 NULL, NULL, 0, 0 /*bmi2*/);
+}
+
+/* HUF_compress4X_repeat():
+ * compress input using 4 streams.
+ * re-use an existing huffman compression table */
+size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
+                      const void* src, size_t srcSize,
+                      unsigned maxSymbolValue, unsigned huffLog,
+                      void* workSpace, size_t wkspSize,
+                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
+{
+    return HUF_compress_internal(dst, dstSize, src, srcSize,
+                                 maxSymbolValue, huffLog, HUF_fourStreams,
+                                 workSpace, wkspSize,
+                                 hufTable, repeat, preferRepeat, bmi2);
+}
+
diff --git a/lib/zstd/compress/zstd_compress.c b/lib/zstd/compress/zstd_compress.c
new file mode 100644 (file)
index 0000000..a4e9160
--- /dev/null
@@ -0,0 +1,5109 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "../common/zstd_deps.h"  /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
+#include "../common/cpu.h"
+#include "../common/mem.h"
+#include "hist.h"           /* HIST_countFast_wksp */
+#define FSE_STATIC_LINKING_ONLY   /* FSE_encodeSymbol */
+#include "../common/fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "zstd_compress_internal.h"
+#include "zstd_compress_sequences.h"
+#include "zstd_compress_literals.h"
+#include "zstd_fast.h"
+#include "zstd_double_fast.h"
+#include "zstd_lazy.h"
+#include "zstd_opt.h"
+#include "zstd_ldm.h"
+#include "zstd_compress_superblock.h"
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * COMPRESS_HEAPMODE :
+ * Select how default decompression function ZSTD_compress() allocates its context,
+ * on stack (0, default), or into heap (1).
+ * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected.
+ */
+
+
+/*-*************************************
+*  Helper functions
+***************************************/
+/* ZSTD_compressBound()
+ * Note that the result from this function is only compatible with the "normal"
+ * full-block strategy.
+ * When there are a lot of small blocks due to frequent flush in streaming mode
+ * the overhead of headers can make the compressed data to be larger than the
+ * return value of ZSTD_compressBound().
+ */
+size_t ZSTD_compressBound(size_t srcSize) {
+    return ZSTD_COMPRESSBOUND(srcSize);
+}
+
+
+/*-*************************************
+*  Context memory management
+***************************************/
+struct ZSTD_CDict_s {
+    const void* dictContent;
+    size_t dictContentSize;
+    ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */
+    U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
+    ZSTD_cwksp workspace;
+    ZSTD_matchState_t matchState;
+    ZSTD_compressedBlockState_t cBlockState;
+    ZSTD_customMem customMem;
+    U32 dictID;
+    int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
+};  /* typedef'd to ZSTD_CDict within "zstd.h" */
+
+ZSTD_CCtx* ZSTD_createCCtx(void)
+{
+    return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
+}
+
+static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
+{
+    assert(cctx != NULL);
+    ZSTD_memset(cctx, 0, sizeof(*cctx));
+    cctx->customMem = memManager;
+    cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    {   size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
+        assert(!ZSTD_isError(err));
+        (void)err;
+    }
+}
+
+ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
+{
+    ZSTD_STATIC_ASSERT(zcss_init==0);
+    ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+    {   ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem);
+        if (!cctx) return NULL;
+        ZSTD_initCCtx(cctx, customMem);
+        return cctx;
+    }
+}
+
+ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize)
+{
+    ZSTD_cwksp ws;
+    ZSTD_CCtx* cctx;
+    if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL;  /* minimum size */
+    if ((size_t)workspace & 7) return NULL;  /* must be 8-aligned */
+    ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
+
+    cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx));
+    if (cctx == NULL) return NULL;
+
+    ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx));
+    ZSTD_cwksp_move(&cctx->workspace, &ws);
+    cctx->staticSize = workspaceSize;
+
+    /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
+    if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL;
+    cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
+    cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t));
+    cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE);
+    cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    return cctx;
+}
+
+/*
+ * Clears and frees all of the dictionaries in the CCtx.
+ */
+static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx)
+{
+    ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem);
+    ZSTD_freeCDict(cctx->localDict.cdict);
+    ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict));
+    ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));
+    cctx->cdict = NULL;
+}
+
+static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict)
+{
+    size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0;
+    size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict);
+    return bufferSize + cdictSize;
+}
+
+static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
+{
+    assert(cctx != NULL);
+    assert(cctx->staticSize == 0);
+    ZSTD_clearAllDicts(cctx);
+    ZSTD_cwksp_free(&cctx->workspace, cctx->customMem);
+}
+
+size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
+{
+    if (cctx==NULL) return 0;   /* support free on NULL */
+    RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
+                    "not compatible with static CCtx");
+    {
+        int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx);
+        ZSTD_freeCCtxContent(cctx);
+        if (!cctxInWorkspace) {
+            ZSTD_customFree(cctx, cctx->customMem);
+        }
+    }
+    return 0;
+}
+
+
+static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
+{
+    (void)cctx;
+    return 0;
+}
+
+
+size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
+{
+    if (cctx==NULL) return 0;   /* support sizeof on NULL */
+    /* cctx may be in the workspace */
+    return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx))
+           + ZSTD_cwksp_sizeof(&cctx->workspace)
+           + ZSTD_sizeof_localDict(cctx->localDict)
+           + ZSTD_sizeof_mtctx(cctx);
+}
+
+size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
+{
+    return ZSTD_sizeof_CCtx(zcs);  /* same object */
+}
+
+/* private API call, for dictBuilder only */
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
+
+/* Returns 1 if compression parameters are such that we should
+ * enable long distance matching (wlog >= 27, strategy >= btopt).
+ * Returns 0 otherwise.
+ */
+static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) {
+    return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27;
+}
+
+static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
+        ZSTD_compressionParameters cParams)
+{
+    ZSTD_CCtx_params cctxParams;
+    /* should not matter, as all cParams are presumed properly defined */
+    ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);
+    cctxParams.cParams = cParams;
+
+    if (ZSTD_CParams_shouldEnableLdm(&cParams)) {
+        DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params");
+        cctxParams.ldmParams.enableLdm = 1;
+        /* LDM is enabled by default for optimal parser and window size >= 128MB */
+        ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams);
+        assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog);
+        assert(cctxParams.ldmParams.hashRateLog < 32);
+    }
+
+    assert(!ZSTD_checkCParams(cParams));
+    return cctxParams;
+}
+
+static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(
+        ZSTD_customMem customMem)
+{
+    ZSTD_CCtx_params* params;
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+    params = (ZSTD_CCtx_params*)ZSTD_customCalloc(
+            sizeof(ZSTD_CCtx_params), customMem);
+    if (!params) { return NULL; }
+    ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
+    params->customMem = customMem;
+    return params;
+}
+
+ZSTD_CCtx_params* ZSTD_createCCtxParams(void)
+{
+    return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem);
+}
+
+size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)
+{
+    if (params == NULL) { return 0; }
+    ZSTD_customFree(params, params->customMem);
+    return 0;
+}
+
+size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)
+{
+    return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
+}
+
+size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
+    RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
+    ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
+    cctxParams->compressionLevel = compressionLevel;
+    cctxParams->fParams.contentSizeFlag = 1;
+    return 0;
+}
+
+#define ZSTD_NO_CLEVEL 0
+
+/*
+ * Initializes the cctxParams from params and compressionLevel.
+ * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL.
+ */
+static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel)
+{
+    assert(!ZSTD_checkCParams(params->cParams));
+    ZSTD_memset(cctxParams, 0, sizeof(*cctxParams));
+    cctxParams->cParams = params->cParams;
+    cctxParams->fParams = params->fParams;
+    /* Should not matter, as all cParams are presumed properly defined.
+     * But, set it for tracing anyway.
+     */
+    cctxParams->compressionLevel = compressionLevel;
+}
+
+size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
+{
+    RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!");
+    FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
+    ZSTD_CCtxParams_init_internal(cctxParams, &params, ZSTD_NO_CLEVEL);
+    return 0;
+}
+
+/*
+ * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone.
+ * @param param Validated zstd parameters.
+ */
+static void ZSTD_CCtxParams_setZstdParams(
+        ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params)
+{
+    assert(!ZSTD_checkCParams(params->cParams));
+    cctxParams->cParams = params->cParams;
+    cctxParams->fParams = params->fParams;
+    /* Should not matter, as all cParams are presumed properly defined.
+     * But, set it for tracing anyway.
+     */
+    cctxParams->compressionLevel = ZSTD_NO_CLEVEL;
+}
+
+ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
+{
+    ZSTD_bounds bounds = { 0, 0, 0 };
+
+    switch(param)
+    {
+    case ZSTD_c_compressionLevel:
+        bounds.lowerBound = ZSTD_minCLevel();
+        bounds.upperBound = ZSTD_maxCLevel();
+        return bounds;
+
+    case ZSTD_c_windowLog:
+        bounds.lowerBound = ZSTD_WINDOWLOG_MIN;
+        bounds.upperBound = ZSTD_WINDOWLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_hashLog:
+        bounds.lowerBound = ZSTD_HASHLOG_MIN;
+        bounds.upperBound = ZSTD_HASHLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_chainLog:
+        bounds.lowerBound = ZSTD_CHAINLOG_MIN;
+        bounds.upperBound = ZSTD_CHAINLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_searchLog:
+        bounds.lowerBound = ZSTD_SEARCHLOG_MIN;
+        bounds.upperBound = ZSTD_SEARCHLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_minMatch:
+        bounds.lowerBound = ZSTD_MINMATCH_MIN;
+        bounds.upperBound = ZSTD_MINMATCH_MAX;
+        return bounds;
+
+    case ZSTD_c_targetLength:
+        bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;
+        bounds.upperBound = ZSTD_TARGETLENGTH_MAX;
+        return bounds;
+
+    case ZSTD_c_strategy:
+        bounds.lowerBound = ZSTD_STRATEGY_MIN;
+        bounds.upperBound = ZSTD_STRATEGY_MAX;
+        return bounds;
+
+    case ZSTD_c_contentSizeFlag:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_checksumFlag:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_dictIDFlag:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_nbWorkers:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 0;
+        return bounds;
+
+    case ZSTD_c_jobSize:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 0;
+        return bounds;
+
+    case ZSTD_c_overlapLog:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 0;
+        return bounds;
+
+    case ZSTD_c_enableDedicatedDictSearch:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_enableLongDistanceMatching:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_ldmHashLog:
+        bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN;
+        bounds.upperBound = ZSTD_LDM_HASHLOG_MAX;
+        return bounds;
+
+    case ZSTD_c_ldmMinMatch:
+        bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN;
+        bounds.upperBound = ZSTD_LDM_MINMATCH_MAX;
+        return bounds;
+
+    case ZSTD_c_ldmBucketSizeLog:
+        bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN;
+        bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX;
+        return bounds;
+
+    case ZSTD_c_ldmHashRateLog:
+        bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN;
+        bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX;
+        return bounds;
+
+    /* experimental parameters */
+    case ZSTD_c_rsyncable:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_forceMaxWindow :
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    case ZSTD_c_format:
+        ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
+        bounds.lowerBound = ZSTD_f_zstd1;
+        bounds.upperBound = ZSTD_f_zstd1_magicless;   /* note : how to ensure at compile time that this is the highest value enum ? */
+        return bounds;
+
+    case ZSTD_c_forceAttachDict:
+        ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad);
+        bounds.lowerBound = ZSTD_dictDefaultAttach;
+        bounds.upperBound = ZSTD_dictForceLoad;       /* note : how to ensure at compile time that this is the highest value enum ? */
+        return bounds;
+
+    case ZSTD_c_literalCompressionMode:
+        ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed);
+        bounds.lowerBound = ZSTD_lcm_auto;
+        bounds.upperBound = ZSTD_lcm_uncompressed;
+        return bounds;
+
+    case ZSTD_c_targetCBlockSize:
+        bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
+        bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
+        return bounds;
+
+    case ZSTD_c_srcSizeHint:
+        bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN;
+        bounds.upperBound = ZSTD_SRCSIZEHINT_MAX;
+        return bounds;
+
+    case ZSTD_c_stableInBuffer:
+    case ZSTD_c_stableOutBuffer:
+        bounds.lowerBound = (int)ZSTD_bm_buffered;
+        bounds.upperBound = (int)ZSTD_bm_stable;
+        return bounds;
+
+    case ZSTD_c_blockDelimiters:
+        bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters;
+        bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters;
+        return bounds;
+
+    case ZSTD_c_validateSequences:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
+    default:
+        bounds.error = ERROR(parameter_unsupported);
+        return bounds;
+    }
+}
+
+/* ZSTD_cParam_clampBounds:
+ * Clamps the value into the bounded range.
+ */
+static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
+{
+    ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
+    if (ZSTD_isError(bounds.error)) return bounds.error;
+    if (*value < bounds.lowerBound) *value = bounds.lowerBound;
+    if (*value > bounds.upperBound) *value = bounds.upperBound;
+    return 0;
+}
+
+#define BOUNDCHECK(cParam, val) { \
+    RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
+                    parameter_outOfBound, "Param out of bounds"); \
+}
+
+
+static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
+{
+    switch(param)
+    {
+    case ZSTD_c_compressionLevel:
+    case ZSTD_c_hashLog:
+    case ZSTD_c_chainLog:
+    case ZSTD_c_searchLog:
+    case ZSTD_c_minMatch:
+    case ZSTD_c_targetLength:
+    case ZSTD_c_strategy:
+        return 1;
+
+    case ZSTD_c_format:
+    case ZSTD_c_windowLog:
+    case ZSTD_c_contentSizeFlag:
+    case ZSTD_c_checksumFlag:
+    case ZSTD_c_dictIDFlag:
+    case ZSTD_c_forceMaxWindow :
+    case ZSTD_c_nbWorkers:
+    case ZSTD_c_jobSize:
+    case ZSTD_c_overlapLog:
+    case ZSTD_c_rsyncable:
+    case ZSTD_c_enableDedicatedDictSearch:
+    case ZSTD_c_enableLongDistanceMatching:
+    case ZSTD_c_ldmHashLog:
+    case ZSTD_c_ldmMinMatch:
+    case ZSTD_c_ldmBucketSizeLog:
+    case ZSTD_c_ldmHashRateLog:
+    case ZSTD_c_forceAttachDict:
+    case ZSTD_c_literalCompressionMode:
+    case ZSTD_c_targetCBlockSize:
+    case ZSTD_c_srcSizeHint:
+    case ZSTD_c_stableInBuffer:
+    case ZSTD_c_stableOutBuffer:
+    case ZSTD_c_blockDelimiters:
+    case ZSTD_c_validateSequences:
+    default:
+        return 0;
+    }
+}
+
+size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
+{
+    DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value);
+    if (cctx->streamStage != zcss_init) {
+        if (ZSTD_isUpdateAuthorized(param)) {
+            cctx->cParamsChanged = 1;
+        } else {
+            RETURN_ERROR(stage_wrong, "can only set params in ctx init stage");
+    }   }
+
+    switch(param)
+    {
+    case ZSTD_c_nbWorkers:
+        RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported,
+                        "MT not compatible with static alloc");
+        break;
+
+    case ZSTD_c_compressionLevel:
+    case ZSTD_c_windowLog:
+    case ZSTD_c_hashLog:
+    case ZSTD_c_chainLog:
+    case ZSTD_c_searchLog:
+    case ZSTD_c_minMatch:
+    case ZSTD_c_targetLength:
+    case ZSTD_c_strategy:
+    case ZSTD_c_ldmHashRateLog:
+    case ZSTD_c_format:
+    case ZSTD_c_contentSizeFlag:
+    case ZSTD_c_checksumFlag:
+    case ZSTD_c_dictIDFlag:
+    case ZSTD_c_forceMaxWindow:
+    case ZSTD_c_forceAttachDict:
+    case ZSTD_c_literalCompressionMode:
+    case ZSTD_c_jobSize:
+    case ZSTD_c_overlapLog:
+    case ZSTD_c_rsyncable:
+    case ZSTD_c_enableDedicatedDictSearch:
+    case ZSTD_c_enableLongDistanceMatching:
+    case ZSTD_c_ldmHashLog:
+    case ZSTD_c_ldmMinMatch:
+    case ZSTD_c_ldmBucketSizeLog:
+    case ZSTD_c_targetCBlockSize:
+    case ZSTD_c_srcSizeHint:
+    case ZSTD_c_stableInBuffer:
+    case ZSTD_c_stableOutBuffer:
+    case ZSTD_c_blockDelimiters:
+    case ZSTD_c_validateSequences:
+        break;
+
+    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+    }
+    return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value);
+}
+
+size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
+                                    ZSTD_cParameter param, int value)
+{
+    DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value);
+    switch(param)
+    {
+    case ZSTD_c_format :
+        BOUNDCHECK(ZSTD_c_format, value);
+        CCtxParams->format = (ZSTD_format_e)value;
+        return (size_t)CCtxParams->format;
+
+    case ZSTD_c_compressionLevel : {
+        FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
+        if (value == 0)
+            CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
+        else
+            CCtxParams->compressionLevel = value;
+        if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel;
+        return 0;  /* return type (size_t) cannot represent negative values */
+    }
+
+    case ZSTD_c_windowLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_windowLog, value);
+        CCtxParams->cParams.windowLog = (U32)value;
+        return CCtxParams->cParams.windowLog;
+
+    case ZSTD_c_hashLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_hashLog, value);
+        CCtxParams->cParams.hashLog = (U32)value;
+        return CCtxParams->cParams.hashLog;
+
+    case ZSTD_c_chainLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_chainLog, value);
+        CCtxParams->cParams.chainLog = (U32)value;
+        return CCtxParams->cParams.chainLog;
+
+    case ZSTD_c_searchLog :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_searchLog, value);
+        CCtxParams->cParams.searchLog = (U32)value;
+        return (size_t)value;
+
+    case ZSTD_c_minMatch :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_minMatch, value);
+        CCtxParams->cParams.minMatch = value;
+        return CCtxParams->cParams.minMatch;
+
+    case ZSTD_c_targetLength :
+        BOUNDCHECK(ZSTD_c_targetLength, value);
+        CCtxParams->cParams.targetLength = value;
+        return CCtxParams->cParams.targetLength;
+
+    case ZSTD_c_strategy :
+        if (value!=0)   /* 0 => use default */
+            BOUNDCHECK(ZSTD_c_strategy, value);
+        CCtxParams->cParams.strategy = (ZSTD_strategy)value;
+        return (size_t)CCtxParams->cParams.strategy;
+
+    case ZSTD_c_contentSizeFlag :
+        /* Content size written in frame header _when known_ (default:1) */
+        DEBUGLOG(4, "set content size flag = %u", (value!=0));
+        CCtxParams->fParams.contentSizeFlag = value != 0;
+        return CCtxParams->fParams.contentSizeFlag;
+
+    case ZSTD_c_checksumFlag :
+        /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
+        CCtxParams->fParams.checksumFlag = value != 0;
+        return CCtxParams->fParams.checksumFlag;
+
+    case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
+        DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
+        CCtxParams->fParams.noDictIDFlag = !value;
+        return !CCtxParams->fParams.noDictIDFlag;
+
+    case ZSTD_c_forceMaxWindow :
+        CCtxParams->forceWindow = (value != 0);
+        return CCtxParams->forceWindow;
+
+    case ZSTD_c_forceAttachDict : {
+        const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
+        BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
+        CCtxParams->attachDictPref = pref;
+        return CCtxParams->attachDictPref;
+    }
+
+    case ZSTD_c_literalCompressionMode : {
+        const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value;
+        BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
+        CCtxParams->literalCompressionMode = lcm;
+        return CCtxParams->literalCompressionMode;
+    }
+
+    case ZSTD_c_nbWorkers :
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+
+    case ZSTD_c_jobSize :
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+
+    case ZSTD_c_overlapLog :
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+
+    case ZSTD_c_rsyncable :
+        RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
+        return 0;
+
+    case ZSTD_c_enableDedicatedDictSearch :
+        CCtxParams->enableDedicatedDictSearch = (value!=0);
+        return CCtxParams->enableDedicatedDictSearch;
+
+    case ZSTD_c_enableLongDistanceMatching :
+        CCtxParams->ldmParams.enableLdm = (value!=0);
+        return CCtxParams->ldmParams.enableLdm;
+
+    case ZSTD_c_ldmHashLog :
+        if (value!=0)   /* 0 ==> auto */
+            BOUNDCHECK(ZSTD_c_ldmHashLog, value);
+        CCtxParams->ldmParams.hashLog = value;
+        return CCtxParams->ldmParams.hashLog;
+
+    case ZSTD_c_ldmMinMatch :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
+        CCtxParams->ldmParams.minMatchLength = value;
+        return CCtxParams->ldmParams.minMatchLength;
+
+    case ZSTD_c_ldmBucketSizeLog :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
+        CCtxParams->ldmParams.bucketSizeLog = value;
+        return CCtxParams->ldmParams.bucketSizeLog;
+
+    case ZSTD_c_ldmHashRateLog :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_ldmHashRateLog, value);
+        CCtxParams->ldmParams.hashRateLog = value;
+        return CCtxParams->ldmParams.hashRateLog;
+
+    case ZSTD_c_targetCBlockSize :
+        if (value!=0)   /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
+        CCtxParams->targetCBlockSize = value;
+        return CCtxParams->targetCBlockSize;
+
+    case ZSTD_c_srcSizeHint :
+        if (value!=0)    /* 0 ==> default */
+            BOUNDCHECK(ZSTD_c_srcSizeHint, value);
+        CCtxParams->srcSizeHint = value;
+        return CCtxParams->srcSizeHint;
+
+    case ZSTD_c_stableInBuffer:
+        BOUNDCHECK(ZSTD_c_stableInBuffer, value);
+        CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value;
+        return CCtxParams->inBufferMode;
+
+    case ZSTD_c_stableOutBuffer:
+        BOUNDCHECK(ZSTD_c_stableOutBuffer, value);
+        CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value;
+        return CCtxParams->outBufferMode;
+
+    case ZSTD_c_blockDelimiters:
+        BOUNDCHECK(ZSTD_c_blockDelimiters, value);
+        CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value;
+        return CCtxParams->blockDelimiters;
+
+    case ZSTD_c_validateSequences:
+        BOUNDCHECK(ZSTD_c_validateSequences, value);
+        CCtxParams->validateSequences = value;
+        return CCtxParams->validateSequences;
+
+    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+    }
+}
+
+size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value)
+{
+    return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value);
+}
+
+size_t ZSTD_CCtxParams_getParameter(
+        ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value)
+{
+    switch(param)
+    {
+    case ZSTD_c_format :
+        *value = CCtxParams->format;
+        break;
+    case ZSTD_c_compressionLevel :
+        *value = CCtxParams->compressionLevel;
+        break;
+    case ZSTD_c_windowLog :
+        *value = (int)CCtxParams->cParams.windowLog;
+        break;
+    case ZSTD_c_hashLog :
+        *value = (int)CCtxParams->cParams.hashLog;
+        break;
+    case ZSTD_c_chainLog :
+        *value = (int)CCtxParams->cParams.chainLog;
+        break;
+    case ZSTD_c_searchLog :
+        *value = CCtxParams->cParams.searchLog;
+        break;
+    case ZSTD_c_minMatch :
+        *value = CCtxParams->cParams.minMatch;
+        break;
+    case ZSTD_c_targetLength :
+        *value = CCtxParams->cParams.targetLength;
+        break;
+    case ZSTD_c_strategy :
+        *value = (unsigned)CCtxParams->cParams.strategy;
+        break;
+    case ZSTD_c_contentSizeFlag :
+        *value = CCtxParams->fParams.contentSizeFlag;
+        break;
+    case ZSTD_c_checksumFlag :
+        *value = CCtxParams->fParams.checksumFlag;
+        break;
+    case ZSTD_c_dictIDFlag :
+        *value = !CCtxParams->fParams.noDictIDFlag;
+        break;
+    case ZSTD_c_forceMaxWindow :
+        *value = CCtxParams->forceWindow;
+        break;
+    case ZSTD_c_forceAttachDict :
+        *value = CCtxParams->attachDictPref;
+        break;
+    case ZSTD_c_literalCompressionMode :
+        *value = CCtxParams->literalCompressionMode;
+        break;
+    case ZSTD_c_nbWorkers :
+        assert(CCtxParams->nbWorkers == 0);
+        *value = CCtxParams->nbWorkers;
+        break;
+    case ZSTD_c_jobSize :
+        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
+    case ZSTD_c_overlapLog :
+        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
+    case ZSTD_c_rsyncable :
+        RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
+    case ZSTD_c_enableDedicatedDictSearch :
+        *value = CCtxParams->enableDedicatedDictSearch;
+        break;
+    case ZSTD_c_enableLongDistanceMatching :
+        *value = CCtxParams->ldmParams.enableLdm;
+        break;
+    case ZSTD_c_ldmHashLog :
+        *value = CCtxParams->ldmParams.hashLog;
+        break;
+    case ZSTD_c_ldmMinMatch :
+        *value = CCtxParams->ldmParams.minMatchLength;
+        break;
+    case ZSTD_c_ldmBucketSizeLog :
+        *value = CCtxParams->ldmParams.bucketSizeLog;
+        break;
+    case ZSTD_c_ldmHashRateLog :
+        *value = CCtxParams->ldmParams.hashRateLog;
+        break;
+    case ZSTD_c_targetCBlockSize :
+        *value = (int)CCtxParams->targetCBlockSize;
+        break;
+    case ZSTD_c_srcSizeHint :
+        *value = (int)CCtxParams->srcSizeHint;
+        break;
+    case ZSTD_c_stableInBuffer :
+        *value = (int)CCtxParams->inBufferMode;
+        break;
+    case ZSTD_c_stableOutBuffer :
+        *value = (int)CCtxParams->outBufferMode;
+        break;
+    case ZSTD_c_blockDelimiters :
+        *value = (int)CCtxParams->blockDelimiters;
+        break;
+    case ZSTD_c_validateSequences :
+        *value = (int)CCtxParams->validateSequences;
+        break;
+    default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
+    }
+    return 0;
+}
+
+/* ZSTD_CCtx_setParametersUsingCCtxParams() :
+ *  just applies `params` into `cctx`
+ *  no action is performed, parameters are merely stored.
+ *  If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.
+ *    This is possible even if a compression is ongoing.
+ *    In which case, new parameters will be applied on the fly, starting with next compression job.
+ */
+size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+        ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
+{
+    DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "The context is in the wrong stage!");
+    RETURN_ERROR_IF(cctx->cdict, stage_wrong,
+                    "Can't override parameters with cdict attached (some must "
+                    "be inherited from the cdict).");
+
+    cctx->requestedParams = *params;
+    return 0;
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't set pledgedSrcSize when not in init stage.");
+    cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+    return 0;
+}
+
+static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(
+        int const compressionLevel,
+        size_t const dictSize);
+static int ZSTD_dedicatedDictSearch_isSupported(
+        const ZSTD_compressionParameters* cParams);
+static void ZSTD_dedicatedDictSearch_revertCParams(
+        ZSTD_compressionParameters* cParams);
+
+/*
+ * Initializes the local dict using the requested parameters.
+ * NOTE: This does not use the pledged src size, because it may be used for more
+ * than one compression.
+ */
+static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
+{
+    ZSTD_localDict* const dl = &cctx->localDict;
+    if (dl->dict == NULL) {
+        /* No local dictionary. */
+        assert(dl->dictBuffer == NULL);
+        assert(dl->cdict == NULL);
+        assert(dl->dictSize == 0);
+        return 0;
+    }
+    if (dl->cdict != NULL) {
+        assert(cctx->cdict == dl->cdict);
+        /* Local dictionary already initialized. */
+        return 0;
+    }
+    assert(dl->dictSize > 0);
+    assert(cctx->cdict == NULL);
+    assert(cctx->prefixDict.dict == NULL);
+
+    dl->cdict = ZSTD_createCDict_advanced2(
+            dl->dict,
+            dl->dictSize,
+            ZSTD_dlm_byRef,
+            dl->dictContentType,
+            &cctx->requestedParams,
+            cctx->customMem);
+    RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed");
+    cctx->cdict = dl->cdict;
+    return 0;
+}
+
+size_t ZSTD_CCtx_loadDictionary_advanced(
+        ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
+        ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't load a dictionary when ctx is not in init stage.");
+    DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
+    ZSTD_clearAllDicts(cctx);  /* in case one already exists */
+    if (dict == NULL || dictSize == 0)  /* no dictionary mode */
+        return 0;
+    if (dictLoadMethod == ZSTD_dlm_byRef) {
+        cctx->localDict.dict = dict;
+    } else {
+        void* dictBuffer;
+        RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
+                        "no malloc for static CCtx");
+        dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem);
+        RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!");
+        ZSTD_memcpy(dictBuffer, dict, dictSize);
+        cctx->localDict.dictBuffer = dictBuffer;
+        cctx->localDict.dict = dictBuffer;
+    }
+    cctx->localDict.dictSize = dictSize;
+    cctx->localDict.dictContentType = dictContentType;
+    return 0;
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(
+      ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_CCtx_loadDictionary_advanced(
+            cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
+}
+
+ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_CCtx_loadDictionary_advanced(
+            cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
+}
+
+
+size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't ref a dict when ctx not in init stage.");
+    /* Free the existing local cdict (if any) to save memory. */
+    ZSTD_clearAllDicts(cctx);
+    cctx->cdict = cdict;
+    return 0;
+}
+
+size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't ref a pool when ctx not in init stage.");
+    cctx->pool = pool;
+    return 0;
+}
+
+size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
+{
+    return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);
+}
+
+size_t ZSTD_CCtx_refPrefix_advanced(
+        ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
+{
+    RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                    "Can't ref a prefix when ctx not in init stage.");
+    ZSTD_clearAllDicts(cctx);
+    if (prefix != NULL && prefixSize > 0) {
+        cctx->prefixDict.dict = prefix;
+        cctx->prefixDict.dictSize = prefixSize;
+        cctx->prefixDict.dictContentType = dictContentType;
+    }
+    return 0;
+}
+
+/*! ZSTD_CCtx_reset() :
+ *  Also dumps dictionary */
+size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
+{
+    if ( (reset == ZSTD_reset_session_only)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        cctx->streamStage = zcss_init;
+        cctx->pledgedSrcSizePlusOne = 0;
+    }
+    if ( (reset == ZSTD_reset_parameters)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
+                        "Can't reset parameters only when not in init stage.");
+        ZSTD_clearAllDicts(cctx);
+        return ZSTD_CCtxParams_reset(&cctx->requestedParams);
+    }
+    return 0;
+}
+
+
+/* ZSTD_checkCParams() :
+    control CParam values remain within authorized range.
+    @return : 0, or an error code if one value is beyond authorized range */
+size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
+{
+    BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
+    BOUNDCHECK(ZSTD_c_chainLog,  (int)cParams.chainLog);
+    BOUNDCHECK(ZSTD_c_hashLog,   (int)cParams.hashLog);
+    BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
+    BOUNDCHECK(ZSTD_c_minMatch,  (int)cParams.minMatch);
+    BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
+    BOUNDCHECK(ZSTD_c_strategy,  cParams.strategy);
+    return 0;
+}
+
+/* ZSTD_clampCParams() :
+ *  make CParam values within valid range.
+ *  @return : valid CParams */
+static ZSTD_compressionParameters
+ZSTD_clampCParams(ZSTD_compressionParameters cParams)
+{
+#   define CLAMP_TYPE(cParam, val, type) {                                \
+        ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);         \
+        if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound;      \
+        else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
+    }
+#   define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
+    CLAMP(ZSTD_c_windowLog, cParams.windowLog);
+    CLAMP(ZSTD_c_chainLog,  cParams.chainLog);
+    CLAMP(ZSTD_c_hashLog,   cParams.hashLog);
+    CLAMP(ZSTD_c_searchLog, cParams.searchLog);
+    CLAMP(ZSTD_c_minMatch,  cParams.minMatch);
+    CLAMP(ZSTD_c_targetLength,cParams.targetLength);
+    CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy);
+    return cParams;
+}
+
+/* ZSTD_cycleLog() :
+ *  condition for correct operation : hashLog > 1 */
+U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
+{
+    U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
+    return hashLog - btScale;
+}
+
+/* ZSTD_dictAndWindowLog() :
+ * Returns an adjusted window log that is large enough to fit the source and the dictionary.
+ * The zstd format says that the entire dictionary is valid if one byte of the dictionary
+ * is within the window. So the hashLog and chainLog should be large enough to reference both
+ * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing
+ * the hashLog and windowLog.
+ * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN.
+ */
+static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize)
+{
+    const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX;
+    /* No dictionary ==> No change */
+    if (dictSize == 0) {
+        return windowLog;
+    }
+    assert(windowLog <= ZSTD_WINDOWLOG_MAX);
+    assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */
+    {
+        U64 const windowSize = 1ULL << windowLog;
+        U64 const dictAndWindowSize = dictSize + windowSize;
+        /* If the window size is already large enough to fit both the source and the dictionary
+         * then just use the window size. Otherwise adjust so that it fits the dictionary and
+         * the window.
+         */
+        if (windowSize >= dictSize + srcSize) {
+            return windowLog; /* Window size large enough already */
+        } else if (dictAndWindowSize >= maxWindowSize) {
+            return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */
+        } else  {
+            return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1;
+        }
+    }
+}
+
+/* ZSTD_adjustCParams_internal() :
+ *  optimize `cPar` for a specified input (`srcSize` and `dictSize`).
+ *  mostly downsize to reduce memory consumption and initialization latency.
+ * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
+ * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`.
+ *  note : `srcSize==0` means 0!
+ *  condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
+static ZSTD_compressionParameters
+ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
+                            unsigned long long srcSize,
+                            size_t dictSize,
+                            ZSTD_cParamMode_e mode)
+{
+    const U64 minSrcSize = 513; /* (1<<9) + 1 */
+    const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
+    assert(ZSTD_checkCParams(cPar)==0);
+
+    switch (mode) {
+    case ZSTD_cpm_unknown:
+    case ZSTD_cpm_noAttachDict:
+        /* If we don't know the source size, don't make any
+         * assumptions about it. We will already have selected
+         * smaller parameters if a dictionary is in use.
+         */
+        break;
+    case ZSTD_cpm_createCDict:
+        /* Assume a small source size when creating a dictionary
+         * with an unkown source size.
+         */
+        if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
+            srcSize = minSrcSize;
+        break;
+    case ZSTD_cpm_attachDict:
+        /* Dictionary has its own dedicated parameters which have
+         * already been selected. We are selecting parameters
+         * for only the source.
+         */
+        dictSize = 0;
+        break;
+    default:
+        assert(0);
+        break;
+    }
+
+    /* resize windowLog if input is small enough, to use less memory */
+    if ( (srcSize < maxWindowResize)
+      && (dictSize < maxWindowResize) )  {
+        U32 const tSize = (U32)(srcSize + dictSize);
+        static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
+        U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
+                            ZSTD_highbit32(tSize-1) + 1;
+        if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
+    }
+    if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+        U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize);
+        U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
+        if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1;
+        if (cycleLog > dictAndWindowLog)
+            cPar.chainLog -= (cycleLog - dictAndWindowLog);
+    }
+
+    if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
+        cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN;  /* minimum wlog required for valid frame header */
+
+    return cPar;
+}
+
+ZSTD_compressionParameters
+ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
+                   unsigned long long srcSize,
+                   size_t dictSize)
+{
+    cPar = ZSTD_clampCParams(cPar);   /* resulting cPar is necessarily valid (all parameters within range) */
+    if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN;
+    return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown);
+}
+
+static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
+static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
+
+static void ZSTD_overrideCParams(
+              ZSTD_compressionParameters* cParams,
+        const ZSTD_compressionParameters* overrides)
+{
+    if (overrides->windowLog)    cParams->windowLog    = overrides->windowLog;
+    if (overrides->hashLog)      cParams->hashLog      = overrides->hashLog;
+    if (overrides->chainLog)     cParams->chainLog     = overrides->chainLog;
+    if (overrides->searchLog)    cParams->searchLog    = overrides->searchLog;
+    if (overrides->minMatch)     cParams->minMatch     = overrides->minMatch;
+    if (overrides->targetLength) cParams->targetLength = overrides->targetLength;
+    if (overrides->strategy)     cParams->strategy     = overrides->strategy;
+}
+
+ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
+        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
+{
+    ZSTD_compressionParameters cParams;
+    if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) {
+      srcSizeHint = CCtxParams->srcSizeHint;
+    }
+    cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode);
+    if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
+    ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
+    assert(!ZSTD_checkCParams(cParams));
+    /* srcSizeHint == 0 means 0 */
+    return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode);
+}
+
+static size_t
+ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+                       const U32 forCCtx)
+{
+    size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+    size_t const hSize = ((size_t)1) << cParams->hashLog;
+    U32    const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
+    size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
+    /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't
+     * surrounded by redzones in ASAN. */
+    size_t const tableSpace = chainSize * sizeof(U32)
+                            + hSize * sizeof(U32)
+                            + h3Size * sizeof(U32);
+    size_t const optPotentialSpace =
+        ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32))
+      + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32))
+      + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32))
+      + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32))
+      + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+      + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+    size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
+                                ? optPotentialSpace
+                                : 0;
+    DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
+                (U32)chainSize, (U32)hSize, (U32)h3Size);
+    return tableSpace + optSpace;
+}
+
+static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+        const ZSTD_compressionParameters* cParams,
+        const ldmParams_t* ldmParams,
+        const int isStatic,
+        const size_t buffInSize,
+        const size_t buffOutSize,
+        const U64 pledgedSrcSize)
+{
+    size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize));
+    size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
+    U32    const divider = (cParams->minMatch==3) ? 3 : 4;
+    size_t const maxNbSeq = blockSize / divider;
+    size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
+                            + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
+                            + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
+    size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
+    size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
+    size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1);
+
+    size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
+    size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
+    size_t const ldmSeqSpace = ldmParams->enableLdm ?
+        ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
+
+
+    size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
+                             + ZSTD_cwksp_alloc_size(buffOutSize);
+
+    size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0;
+
+    size_t const neededSpace =
+        cctxSpace +
+        entropySpace +
+        blockStateSpace +
+        ldmSpace +
+        ldmSeqSpace +
+        matchStateSize +
+        tokenSpace +
+        bufferSpace;
+
+    DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace);
+    return neededSpace;
+}
+
+size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
+{
+    ZSTD_compressionParameters const cParams =
+                ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+
+    RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
+    /* estimateCCtxSize is for one-shot compression. So no buffers should
+     * be needed. However, we still allocate two 0-sized buffers, which can
+     * take space under ASAN. */
+    return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+        &cParams, &params->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
+}
+
+size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
+{
+    ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
+    return ZSTD_estimateCCtxSize_usingCCtxParams(&params);
+}
+
+static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
+{
+    int tier = 0;
+    size_t largestSize = 0;
+    static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN};
+    for (; tier < 4; ++tier) {
+        /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */
+        ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict);
+        largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize);
+    }
+    return largestSize;
+}
+
+size_t ZSTD_estimateCCtxSize(int compressionLevel)
+{
+    int level;
+    size_t memBudget = 0;
+    for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
+        /* Ensure monotonically increasing memory usage as compression level increases */
+        size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
+        if (newMB > memBudget) memBudget = newMB;
+    }
+    return memBudget;
+}
+
+size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
+{
+    RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
+    {   ZSTD_compressionParameters const cParams =
+                ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
+        size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered)
+                ? ((size_t)1 << cParams.windowLog) + blockSize
+                : 0;
+        size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
+                ? ZSTD_compressBound(blockSize) + 1
+                : 0;
+
+        return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+            &cParams, &params->ldmParams, 1, inBuffSize, outBuffSize,
+            ZSTD_CONTENTSIZE_UNKNOWN);
+    }
+}
+
+size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
+{
+    ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
+    return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
+}
+
+static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
+{
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+    return ZSTD_estimateCStreamSize_usingCParams(cParams);
+}
+
+size_t ZSTD_estimateCStreamSize(int compressionLevel)
+{
+    int level;
+    size_t memBudget = 0;
+    for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
+        size_t const newMB = ZSTD_estimateCStreamSize_internal(level);
+        if (newMB > memBudget) memBudget = newMB;
+    }
+    return memBudget;
+}
+
+/* ZSTD_getFrameProgression():
+ * tells how much data has been consumed (input) and produced (output) for current frame.
+ * able to count progression inside worker threads (non-blocking mode).
+ */
+ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx)
+{
+    {   ZSTD_frameProgression fp;
+        size_t const buffered = (cctx->inBuff == NULL) ? 0 :
+                                cctx->inBuffPos - cctx->inToCompress;
+        if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress);
+        assert(buffered <= ZSTD_BLOCKSIZE_MAX);
+        fp.ingested = cctx->consumedSrcSize + buffered;
+        fp.consumed = cctx->consumedSrcSize;
+        fp.produced = cctx->producedCSize;
+        fp.flushed  = cctx->producedCSize;   /* simplified; some data might still be left within streaming output buffer */
+        fp.currentJobID = 0;
+        fp.nbActiveWorkers = 0;
+        return fp;
+}   }
+
+/*! ZSTD_toFlushNow()
+ *  Only useful for multithreading scenarios currently (nbWorkers >= 1).
+ */
+size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
+{
+    (void)cctx;
+    return 0;   /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
+}
+
+static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
+                                    ZSTD_compressionParameters cParams2)
+{
+    (void)cParams1;
+    (void)cParams2;
+    assert(cParams1.windowLog    == cParams2.windowLog);
+    assert(cParams1.chainLog     == cParams2.chainLog);
+    assert(cParams1.hashLog      == cParams2.hashLog);
+    assert(cParams1.searchLog    == cParams2.searchLog);
+    assert(cParams1.minMatch     == cParams2.minMatch);
+    assert(cParams1.targetLength == cParams2.targetLength);
+    assert(cParams1.strategy     == cParams2.strategy);
+}
+
+void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
+{
+    int i;
+    for (i = 0; i < ZSTD_REP_NUM; ++i)
+        bs->rep[i] = repStartValue[i];
+    bs->entropy.huf.repeatMode = HUF_repeat_none;
+    bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;
+    bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;
+    bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;
+}
+
+/*! ZSTD_invalidateMatchState()
+ *  Invalidate all the matches in the match finder tables.
+ *  Requires nextSrc and base to be set (can be NULL).
+ */
+static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
+{
+    ZSTD_window_clear(&ms->window);
+
+    ms->nextToUpdate = ms->window.dictLimit;
+    ms->loadedDictEnd = 0;
+    ms->opt.litLengthSum = 0;  /* force reset of btopt stats */
+    ms->dictMatchState = NULL;
+}
+
+/*
+ * Controls, for this matchState reset, whether the tables need to be cleared /
+ * prepared for the coming compression (ZSTDcrp_makeClean), or whether the
+ * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a
+ * subsequent operation will overwrite the table space anyways (e.g., copying
+ * the matchState contents in from a CDict).
+ */
+typedef enum {
+    ZSTDcrp_makeClean,
+    ZSTDcrp_leaveDirty
+} ZSTD_compResetPolicy_e;
+
+/*
+ * Controls, for this matchState reset, whether indexing can continue where it
+ * left off (ZSTDirp_continue), or whether it needs to be restarted from zero
+ * (ZSTDirp_reset).
+ */
+typedef enum {
+    ZSTDirp_continue,
+    ZSTDirp_reset
+} ZSTD_indexResetPolicy_e;
+
+typedef enum {
+    ZSTD_resetTarget_CDict,
+    ZSTD_resetTarget_CCtx
+} ZSTD_resetTarget_e;
+
+static size_t
+ZSTD_reset_matchState(ZSTD_matchState_t* ms,
+                      ZSTD_cwksp* ws,
+                const ZSTD_compressionParameters* cParams,
+                const ZSTD_compResetPolicy_e crp,
+                const ZSTD_indexResetPolicy_e forceResetIndex,
+                const ZSTD_resetTarget_e forWho)
+{
+    size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+    size_t const hSize = ((size_t)1) << cParams->hashLog;
+    U32    const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
+    size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
+
+    DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
+    if (forceResetIndex == ZSTDirp_reset) {
+        ZSTD_window_init(&ms->window);
+        ZSTD_cwksp_mark_tables_dirty(ws);
+    }
+
+    ms->hashLog3 = hashLog3;
+
+    ZSTD_invalidateMatchState(ms);
+
+    assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */
+
+    ZSTD_cwksp_clear_tables(ws);
+
+    DEBUGLOG(5, "reserving table space");
+    /* table Space */
+    ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32));
+    ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32));
+    ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32));
+    RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
+                    "failed a workspace allocation in ZSTD_reset_matchState");
+
+    DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty);
+    if (crp!=ZSTDcrp_leaveDirty) {
+        /* reset tables only */
+        ZSTD_cwksp_clean_tables(ws);
+    }
+
+    /* opt parser space */
+    if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
+        DEBUGLOG(4, "reserving optimal parser space");
+        ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<<Litbits) * sizeof(unsigned));
+        ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
+        ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
+        ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
+        ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
+        ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+    }
+
+    ms->cParams = *cParams;
+
+    RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
+                    "failed a workspace allocation in ZSTD_reset_matchState");
+
+    return 0;
+}
+
+/* ZSTD_indexTooCloseToMax() :
+ * minor optimization : prefer memset() rather than reduceIndex()
+ * which is measurably slow in some circumstances (reported for Visual Studio).
+ * Works when re-using a context for a lot of smallish inputs :
+ * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
+ * memset() will be triggered before reduceIndex().
+ */
+#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
+static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
+{
+    return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
+}
+
+/*! ZSTD_resetCCtx_internal() :
+    note : `params` are assumed fully validated at this stage */
+static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
+                                      ZSTD_CCtx_params params,
+                                      U64 const pledgedSrcSize,
+                                      ZSTD_compResetPolicy_e const crp,
+                                      ZSTD_buffered_policy_e const zbuff)
+{
+    ZSTD_cwksp* const ws = &zc->workspace;
+    DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
+                (U32)pledgedSrcSize, params.cParams.windowLog);
+    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+
+    zc->isFirstBlock = 1;
+
+    if (params.ldmParams.enableLdm) {
+        /* Adjust long distance matching parameters */
+        ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
+        assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
+        assert(params.ldmParams.hashRateLog < 32);
+    }
+
+    {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
+        size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
+        U32    const divider = (params.cParams.minMatch==3) ? 3 : 4;
+        size_t const maxNbSeq = blockSize / divider;
+        size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered)
+                ? ZSTD_compressBound(blockSize) + 1
+                : 0;
+        size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered)
+                ? windowSize + blockSize
+                : 0;
+        size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
+
+        int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);
+        ZSTD_indexResetPolicy_e needsIndexReset =
+            (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset;
+
+        size_t const neededSpace =
+            ZSTD_estimateCCtxSize_usingCCtxParams_internal(
+                &params.cParams, &params.ldmParams, zc->staticSize != 0,
+                buffInSize, buffOutSize, pledgedSrcSize);
+        FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
+
+        if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
+
+        /* Check if workspace is large enough, alloc a new one if needed */
+        {
+            int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
+            int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
+
+            DEBUGLOG(4, "Need %zu B workspace", neededSpace);
+            DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
+
+            if (workspaceTooSmall || workspaceWasteful) {
+                DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
+                            ZSTD_cwksp_sizeof(ws) >> 10,
+                            neededSpace >> 10);
+
+                RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");
+
+                needsIndexReset = ZSTDirp_reset;
+
+                ZSTD_cwksp_free(ws, zc->customMem);
+                FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), "");
+
+                DEBUGLOG(5, "reserving object space");
+                /* Statically sized space.
+                 * entropyWorkspace never moves,
+                 * though prev/next block swap places */
+                assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t)));
+                zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
+                RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock");
+                zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
+                RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
+                zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE);
+                RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
+        }   }
+
+        ZSTD_cwksp_clear(ws);
+
+        /* init params */
+        zc->appliedParams = params;
+        zc->blockState.matchState.cParams = params.cParams;
+        zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
+        zc->consumedSrcSize = 0;
+        zc->producedCSize = 0;
+        if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
+            zc->appliedParams.fParams.contentSizeFlag = 0;
+        DEBUGLOG(4, "pledged content size : %u ; flag : %u",
+            (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
+        zc->blockSize = blockSize;
+
+        xxh64_reset(&zc->xxhState, 0);
+        zc->stage = ZSTDcs_init;
+        zc->dictID = 0;
+        zc->dictContentSize = 0;
+
+        ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
+
+        /* ZSTD_wildcopy() is used to copy into the literals buffer,
+         * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
+         */
+        zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH);
+        zc->seqStore.maxNbLit = blockSize;
+
+        /* buffers */
+        zc->bufferedPolicy = zbuff;
+        zc->inBuffSize = buffInSize;
+        zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize);
+        zc->outBuffSize = buffOutSize;
+        zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
+
+        /* ldm bucketOffsets table */
+        if (params.ldmParams.enableLdm) {
+            /* TODO: avoid memset? */
+            size_t const numBuckets =
+                  ((size_t)1) << (params.ldmParams.hashLog -
+                                  params.ldmParams.bucketSizeLog);
+            zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets);
+            ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);
+        }
+
+        /* sequences storage */
+        ZSTD_referenceExternalSequences(zc, NULL, 0);
+        zc->seqStore.maxNbSeq = maxNbSeq;
+        zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+        zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+        zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE));
+        zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef));
+
+        FORWARD_IF_ERROR(ZSTD_reset_matchState(
+            &zc->blockState.matchState,
+            ws,
+            &params.cParams,
+            crp,
+            needsIndexReset,
+            ZSTD_resetTarget_CCtx), "");
+
+        /* ldm hash table */
+        if (params.ldmParams.enableLdm) {
+            /* TODO: avoid memset? */
+            size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
+            zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
+            ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
+            zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
+            zc->maxNbLdmSequences = maxNbLdmSeq;
+
+            ZSTD_window_init(&zc->ldmState.window);
+            ZSTD_window_clear(&zc->ldmState.window);
+            zc->ldmState.loadedDictEnd = 0;
+        }
+
+        /* Due to alignment, when reusing a workspace, we can actually consume
+         * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h
+         */
+        assert(ZSTD_cwksp_used(ws) >= neededSpace &&
+               ZSTD_cwksp_used(ws) <= neededSpace + 3);
+
+        DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
+        zc->initialized = 1;
+
+        return 0;
+    }
+}
+
+/* ZSTD_invalidateRepCodes() :
+ * ensures next compression will not use repcodes from previous block.
+ * Note : only works with regular variant;
+ *        do not use with extDict variant ! */
+void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
+    int i;
+    for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0;
+    assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
+}
+
+/* These are the approximate sizes for each strategy past which copying the
+ * dictionary tables into the working context is faster than using them
+ * in-place.
+ */
+static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {
+    8 KB,  /* unused */
+    8 KB,  /* ZSTD_fast */
+    16 KB, /* ZSTD_dfast */
+    32 KB, /* ZSTD_greedy */
+    32 KB, /* ZSTD_lazy */
+    32 KB, /* ZSTD_lazy2 */
+    32 KB, /* ZSTD_btlazy2 */
+    32 KB, /* ZSTD_btopt */
+    8 KB,  /* ZSTD_btultra */
+    8 KB   /* ZSTD_btultra2 */
+};
+
+static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
+                                 const ZSTD_CCtx_params* params,
+                                 U64 pledgedSrcSize)
+{
+    size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
+    int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch;
+    return dedicatedDictSearch
+        || ( ( pledgedSrcSize <= cutoff
+            || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+            || params->attachDictPref == ZSTD_dictForceAttach )
+          && params->attachDictPref != ZSTD_dictForceCopy
+          && !params->forceWindow ); /* dictMatchState isn't correctly
+                                      * handled in _enforceMaxDist */
+}
+
+static size_t
+ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
+                        const ZSTD_CDict* cdict,
+                        ZSTD_CCtx_params params,
+                        U64 pledgedSrcSize,
+                        ZSTD_buffered_policy_e zbuff)
+{
+    {
+        ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;
+        unsigned const windowLog = params.cParams.windowLog;
+        assert(windowLog != 0);
+        /* Resize working context table params for input only, since the dict
+         * has its own tables. */
+        /* pledgedSrcSize == 0 means 0! */
+
+        if (cdict->matchState.dedicatedDictSearch) {
+            ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams);
+        }
+
+        params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
+                                                     cdict->dictContentSize, ZSTD_cpm_attachDict);
+        params.cParams.windowLog = windowLog;
+        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+                                                 ZSTDcrp_makeClean, zbuff), "");
+        assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);
+    }
+
+    {   const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
+                                  - cdict->matchState.window.base);
+        const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
+        if (cdictLen == 0) {
+            /* don't even attach dictionaries with no contents */
+            DEBUGLOG(4, "skipping attaching empty dictionary");
+        } else {
+            DEBUGLOG(4, "attaching dictionary into context");
+            cctx->blockState.matchState.dictMatchState = &cdict->matchState;
+
+            /* prep working match state so dict matches never have negative indices
+             * when they are translated to the working context's index space. */
+            if (cctx->blockState.matchState.window.dictLimit < cdictEnd) {
+                cctx->blockState.matchState.window.nextSrc =
+                    cctx->blockState.matchState.window.base + cdictEnd;
+                ZSTD_window_clear(&cctx->blockState.matchState.window);
+            }
+            /* loadedDictEnd is expressed within the referential of the active context */
+            cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
+    }   }
+
+    cctx->dictID = cdict->dictID;
+    cctx->dictContentSize = cdict->dictContentSize;
+
+    /* copy block state */
+    ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
+
+    return 0;
+}
+
+static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
+                            const ZSTD_CDict* cdict,
+                            ZSTD_CCtx_params params,
+                            U64 pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
+{
+    const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
+
+    assert(!cdict->matchState.dedicatedDictSearch);
+
+    DEBUGLOG(4, "copying dictionary into context");
+
+    {   unsigned const windowLog = params.cParams.windowLog;
+        assert(windowLog != 0);
+        /* Copy only compression parameters related to tables. */
+        params.cParams = *cdict_cParams;
+        params.cParams.windowLog = windowLog;
+        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+                                                 ZSTDcrp_leaveDirty, zbuff), "");
+        assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
+        assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
+        assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
+    }
+
+    ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
+
+    /* copy tables */
+    {   size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
+        size_t const hSize =  (size_t)1 << cdict_cParams->hashLog;
+
+        ZSTD_memcpy(cctx->blockState.matchState.hashTable,
+               cdict->matchState.hashTable,
+               hSize * sizeof(U32));
+        ZSTD_memcpy(cctx->blockState.matchState.chainTable,
+               cdict->matchState.chainTable,
+               chainSize * sizeof(U32));
+    }
+
+    /* Zero the hashTable3, since the cdict never fills it */
+    {   int const h3log = cctx->blockState.matchState.hashLog3;
+        size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
+        assert(cdict->matchState.hashLog3 == 0);
+        ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
+    }
+
+    ZSTD_cwksp_mark_tables_clean(&cctx->workspace);
+
+    /* copy dictionary offsets */
+    {   ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
+        ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
+        dstMatchState->window       = srcMatchState->window;
+        dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
+        dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
+    }
+
+    cctx->dictID = cdict->dictID;
+    cctx->dictContentSize = cdict->dictContentSize;
+
+    /* copy block state */
+    ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
+
+    return 0;
+}
+
+/* We have a choice between copying the dictionary context into the working
+ * context, or referencing the dictionary context from the working context
+ * in-place. We decide here which strategy to use. */
+static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
+                            const ZSTD_CDict* cdict,
+                            const ZSTD_CCtx_params* params,
+                            U64 pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
+{
+
+    DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",
+                (unsigned)pledgedSrcSize);
+
+    if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
+        return ZSTD_resetCCtx_byAttachingCDict(
+            cctx, cdict, *params, pledgedSrcSize, zbuff);
+    } else {
+        return ZSTD_resetCCtx_byCopyingCDict(
+            cctx, cdict, *params, pledgedSrcSize, zbuff);
+    }
+}
+
+/*! ZSTD_copyCCtx_internal() :
+ *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+ *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+ *  The "context", in this case, refers to the hash and chain tables,
+ *  entropy tables, and dictionary references.
+ * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx.
+ * @return : 0, or an error code */
+static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
+                            const ZSTD_CCtx* srcCCtx,
+                            ZSTD_frameParameters fParams,
+                            U64 pledgedSrcSize,
+                            ZSTD_buffered_policy_e zbuff)
+{
+    DEBUGLOG(5, "ZSTD_copyCCtx_internal");
+    RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
+                    "Can't copy a ctx that's not in init stage.");
+
+    ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
+    {   ZSTD_CCtx_params params = dstCCtx->requestedParams;
+        /* Copy only compression parameters related to tables. */
+        params.cParams = srcCCtx->appliedParams.cParams;
+        params.fParams = fParams;
+        ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
+                                ZSTDcrp_leaveDirty, zbuff);
+        assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
+        assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
+        assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
+        assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog);
+        assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
+    }
+
+    ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
+
+    /* copy tables */
+    {   size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
+        size_t const hSize =  (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
+        int const h3log = srcCCtx->blockState.matchState.hashLog3;
+        size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
+
+        ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable,
+               srcCCtx->blockState.matchState.hashTable,
+               hSize * sizeof(U32));
+        ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable,
+               srcCCtx->blockState.matchState.chainTable,
+               chainSize * sizeof(U32));
+        ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3,
+               srcCCtx->blockState.matchState.hashTable3,
+               h3Size * sizeof(U32));
+    }
+
+    ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace);
+
+    /* copy dictionary offsets */
+    {
+        const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
+        ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
+        dstMatchState->window       = srcMatchState->window;
+        dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
+        dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
+    }
+    dstCCtx->dictID = srcCCtx->dictID;
+    dstCCtx->dictContentSize = srcCCtx->dictContentSize;
+
+    /* copy block state */
+    ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));
+
+    return 0;
+}
+
+/*! ZSTD_copyCCtx() :
+ *  Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
+ *  Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
+ *  pledgedSrcSize==0 means "unknown".
+*   @return : 0, or an error code */
+size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
+{
+    ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy;
+    ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
+    if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
+    fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);
+
+    return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx,
+                                fParams, pledgedSrcSize,
+                                zbuff);
+}
+
+
+#define ZSTD_ROWSIZE 16
+/*! ZSTD_reduceTable() :
+ *  reduce table indexes by `reducerValue`, or squash to zero.
+ *  PreserveMark preserves "unsorted mark" for btlazy2 strategy.
+ *  It must be set to a clear 0/1 value, to remove branch during inlining.
+ *  Presume table size is a multiple of ZSTD_ROWSIZE
+ *  to help auto-vectorization */
+FORCE_INLINE_TEMPLATE void
+ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
+{
+    int const nbRows = (int)size / ZSTD_ROWSIZE;
+    int cellNb = 0;
+    int rowNb;
+    assert((size & (ZSTD_ROWSIZE-1)) == 0);  /* multiple of ZSTD_ROWSIZE */
+    assert(size < (1U<<31));   /* can be casted to int */
+
+
+    for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
+        int column;
+        for (column=0; column<ZSTD_ROWSIZE; column++) {
+            if (preserveMark) {
+                U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
+                table[cellNb] += adder;
+            }
+            if (table[cellNb] < reducerValue) table[cellNb] = 0;
+            else table[cellNb] -= reducerValue;
+            cellNb++;
+    }   }
+}
+
+static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
+{
+    ZSTD_reduceTable_internal(table, size, reducerValue, 0);
+}
+
+static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
+{
+    ZSTD_reduceTable_internal(table, size, reducerValue, 1);
+}
+
+/*! ZSTD_reduceIndex() :
+*   rescale all indexes to avoid future overflow (indexes are U32) */
+static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
+{
+    {   U32 const hSize = (U32)1 << params->cParams.hashLog;
+        ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
+    }
+
+    if (params->cParams.strategy != ZSTD_fast) {
+        U32 const chainSize = (U32)1 << params->cParams.chainLog;
+        if (params->cParams.strategy == ZSTD_btlazy2)
+            ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
+        else
+            ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
+    }
+
+    if (ms->hashLog3) {
+        U32 const h3Size = (U32)1 << ms->hashLog3;
+        ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);
+    }
+}
+
+
+/*-*******************************************************
+*  Block entropic compression
+*********************************************************/
+
+/* See doc/zstd_compression_format.md for detailed format description */
+
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
+{
+    const seqDef* const sequences = seqStorePtr->sequencesStart;
+    BYTE* const llCodeTable = seqStorePtr->llCode;
+    BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    U32 u;
+    assert(nbSeq <= seqStorePtr->maxNbSeq);
+    for (u=0; u<nbSeq; u++) {
+        U32 const llv = sequences[u].litLength;
+        U32 const mlv = sequences[u].matchLength;
+        llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
+        ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
+        mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
+    }
+    if (seqStorePtr->longLengthID==1)
+        llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
+    if (seqStorePtr->longLengthID==2)
+        mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
+}
+
+/* ZSTD_useTargetCBlockSize():
+ * Returns if target compressed block size param is being used.
+ * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize.
+ * Returns 1 if true, 0 otherwise. */
+static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
+{
+    DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize);
+    return (cctxParams->targetCBlockSize != 0);
+}
+
+/* ZSTD_entropyCompressSequences_internal():
+ * actually compresses both literals and sequences */
+MEM_STATIC size_t
+ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
+                          const ZSTD_entropyCTables_t* prevEntropy,
+                                ZSTD_entropyCTables_t* nextEntropy,
+                          const ZSTD_CCtx_params* cctxParams,
+                                void* dst, size_t dstCapacity,
+                                void* entropyWorkspace, size_t entropyWkspSize,
+                          const int bmi2)
+{
+    const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
+    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+    unsigned* count = (unsigned*)entropyWorkspace;
+    FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
+    FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
+    FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
+    U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
+    const seqDef* const sequences = seqStorePtr->sequencesStart;
+    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    const BYTE* const llCodeTable = seqStorePtr->llCode;
+    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    BYTE* seqHead;
+    BYTE* lastNCount = NULL;
+
+    entropyWorkspace = count + (MaxSeq + 1);
+    entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
+
+    DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq);
+    ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
+    assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
+
+    /* Compress literals */
+    {   const BYTE* const literals = seqStorePtr->litStart;
+        size_t const litSize = (size_t)(seqStorePtr->lit - literals);
+        size_t const cSize = ZSTD_compressLiterals(
+                                    &prevEntropy->huf, &nextEntropy->huf,
+                                    cctxParams->cParams.strategy,
+                                    ZSTD_disableLiteralsCompression(cctxParams),
+                                    op, dstCapacity,
+                                    literals, litSize,
+                                    entropyWorkspace, entropyWkspSize,
+                                    bmi2);
+        FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
+        assert(cSize <= dstCapacity);
+        op += cSize;
+    }
+
+    /* Sequences Header */
+    RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
+                    dstSize_tooSmall, "Can't fit seq hdr in output buf!");
+    if (nbSeq < 128) {
+        *op++ = (BYTE)nbSeq;
+    } else if (nbSeq < LONGNBSEQ) {
+        op[0] = (BYTE)((nbSeq>>8) + 0x80);
+        op[1] = (BYTE)nbSeq;
+        op+=2;
+    } else {
+        op[0]=0xFF;
+        MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ));
+        op+=3;
+    }
+    assert(op <= oend);
+    if (nbSeq==0) {
+        /* Copy the old tables over as if we repeated them */
+        ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
+        return (size_t)(op - ostart);
+    }
+
+    /* seqHead : flags for FSE encoding type */
+    seqHead = op++;
+    assert(op <= oend);
+
+    /* convert length/distances into codes */
+    ZSTD_seqToCodes(seqStorePtr);
+    /* build CTable for Literal Lengths */
+    {   unsigned max = MaxLL;
+        size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building LL table");
+        nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
+        LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
+                                        count, max, mostFrequent, nbSeq,
+                                        LLFSELog, prevEntropy->fse.litlengthCTable,
+                                        LL_defaultNorm, LL_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(set_basic < set_compressed && set_rle < set_compressed);
+        assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
+                count, max, llCodeTable, nbSeq,
+                LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                prevEntropy->fse.litlengthCTable,
+                sizeof(prevEntropy->fse.litlengthCTable),
+                entropyWorkspace, entropyWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
+            if (LLtype == set_compressed)
+                lastNCount = op;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    /* build CTable for Offsets */
+    {   unsigned max = MaxOff;
+        size_t const mostFrequent = HIST_countFast_wksp(
+            count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);  /* can't fail */
+        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
+        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
+        DEBUGLOG(5, "Building OF table");
+        nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
+        Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode,
+                                        count, max, mostFrequent, nbSeq,
+                                        OffFSELog, prevEntropy->fse.offcodeCTable,
+                                        OF_defaultNorm, OF_defaultNormLog,
+                                        defaultPolicy, strategy);
+        assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
+                count, max, ofCodeTable, nbSeq,
+                OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                prevEntropy->fse.offcodeCTable,
+                sizeof(prevEntropy->fse.offcodeCTable),
+                entropyWorkspace, entropyWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
+            if (Offtype == set_compressed)
+                lastNCount = op;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    /* build CTable for MatchLengths */
+    {   unsigned max = MaxML;
+        size_t const mostFrequent = HIST_countFast_wksp(
+            count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
+        nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
+        MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
+                                        count, max, mostFrequent, nbSeq,
+                                        MLFSELog, prevEntropy->fse.matchlengthCTable,
+                                        ML_defaultNorm, ML_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
+                count, max, mlCodeTable, nbSeq,
+                ML_defaultNorm, ML_defaultNormLog, MaxML,
+                prevEntropy->fse.matchlengthCTable,
+                sizeof(prevEntropy->fse.matchlengthCTable),
+                entropyWorkspace, entropyWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
+            if (MLtype == set_compressed)
+                lastNCount = op;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+
+    *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+
+    {   size_t const bitstreamSize = ZSTD_encodeSequences(
+                                        op, (size_t)(oend - op),
+                                        CTable_MatchLength, mlCodeTable,
+                                        CTable_OffsetBits, ofCodeTable,
+                                        CTable_LitLength, llCodeTable,
+                                        sequences, nbSeq,
+                                        longOffsets, bmi2);
+        FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
+        op += bitstreamSize;
+        assert(op <= oend);
+        /* zstd versions <= 1.3.4 mistakenly report corruption when
+         * FSE_readNCount() receives a buffer < 4 bytes.
+         * Fixed by https://github.com/facebook/zstd/pull/1146.
+         * This can happen when the last set_compressed table present is 2
+         * bytes and the bitstream is only one byte.
+         * In this exceedingly rare case, we will simply emit an uncompressed
+         * block, since it isn't worth optimizing.
+         */
+        if (lastNCount && (op - lastNCount) < 4) {
+            /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
+            assert(op - lastNCount == 3);
+            DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
+                        "emitting an uncompressed block.");
+            return 0;
+        }
+    }
+
+    DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
+    return (size_t)(op - ostart);
+}
+
+MEM_STATIC size_t
+ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
+                       const ZSTD_entropyCTables_t* prevEntropy,
+                             ZSTD_entropyCTables_t* nextEntropy,
+                       const ZSTD_CCtx_params* cctxParams,
+                             void* dst, size_t dstCapacity,
+                             size_t srcSize,
+                             void* entropyWorkspace, size_t entropyWkspSize,
+                             int bmi2)
+{
+    size_t const cSize = ZSTD_entropyCompressSequences_internal(
+                            seqStorePtr, prevEntropy, nextEntropy, cctxParams,
+                            dst, dstCapacity,
+                            entropyWorkspace, entropyWkspSize, bmi2);
+    if (cSize == 0) return 0;
+    /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
+     * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
+     */
+    if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
+        return 0;  /* block not compressed */
+    FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed");
+
+    /* Check compressibility */
+    {   size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
+        if (cSize >= maxCSize) return 0;  /* block not compressed */
+    }
+    DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize);
+    return cSize;
+}
+
+/* ZSTD_selectBlockCompressor() :
+ * Not static, but internal use only (used by long distance matcher)
+ * assumption : strat is a valid strategy */
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
+{
+    static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
+        { ZSTD_compressBlock_fast  /* default for 0 */,
+          ZSTD_compressBlock_fast,
+          ZSTD_compressBlock_doubleFast,
+          ZSTD_compressBlock_greedy,
+          ZSTD_compressBlock_lazy,
+          ZSTD_compressBlock_lazy2,
+          ZSTD_compressBlock_btlazy2,
+          ZSTD_compressBlock_btopt,
+          ZSTD_compressBlock_btultra,
+          ZSTD_compressBlock_btultra2 },
+        { ZSTD_compressBlock_fast_extDict  /* default for 0 */,
+          ZSTD_compressBlock_fast_extDict,
+          ZSTD_compressBlock_doubleFast_extDict,
+          ZSTD_compressBlock_greedy_extDict,
+          ZSTD_compressBlock_lazy_extDict,
+          ZSTD_compressBlock_lazy2_extDict,
+          ZSTD_compressBlock_btlazy2_extDict,
+          ZSTD_compressBlock_btopt_extDict,
+          ZSTD_compressBlock_btultra_extDict,
+          ZSTD_compressBlock_btultra_extDict },
+        { ZSTD_compressBlock_fast_dictMatchState  /* default for 0 */,
+          ZSTD_compressBlock_fast_dictMatchState,
+          ZSTD_compressBlock_doubleFast_dictMatchState,
+          ZSTD_compressBlock_greedy_dictMatchState,
+          ZSTD_compressBlock_lazy_dictMatchState,
+          ZSTD_compressBlock_lazy2_dictMatchState,
+          ZSTD_compressBlock_btlazy2_dictMatchState,
+          ZSTD_compressBlock_btopt_dictMatchState,
+          ZSTD_compressBlock_btultra_dictMatchState,
+          ZSTD_compressBlock_btultra_dictMatchState },
+        { NULL  /* default for 0 */,
+          NULL,
+          NULL,
+          ZSTD_compressBlock_greedy_dedicatedDictSearch,
+          ZSTD_compressBlock_lazy_dedicatedDictSearch,
+          ZSTD_compressBlock_lazy2_dedicatedDictSearch,
+          NULL,
+          NULL,
+          NULL,
+          NULL }
+    };
+    ZSTD_blockCompressor selectedCompressor;
+    ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
+
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
+    selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
+    assert(selectedCompressor != NULL);
+    return selectedCompressor;
+}
+
+static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
+                                   const BYTE* anchor, size_t lastLLSize)
+{
+    ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize);
+    seqStorePtr->lit += lastLLSize;
+}
+
+void ZSTD_resetSeqStore(seqStore_t* ssPtr)
+{
+    ssPtr->lit = ssPtr->litStart;
+    ssPtr->sequences = ssPtr->sequencesStart;
+    ssPtr->longLengthID = 0;
+}
+
+typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
+
+static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
+{
+    ZSTD_matchState_t* const ms = &zc->blockState.matchState;
+    DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
+    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
+    /* Assert that we have correctly flushed the ctx params into the ms's copy */
+    ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
+    if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
+        if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) {
+            ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize);
+        } else {
+            ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
+        }
+        return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
+    }
+    ZSTD_resetSeqStore(&(zc->seqStore));
+    /* required for optimal parser to read stats from dictionary */
+    ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
+    /* tell the optimal parser how we expect to compress literals */
+    ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
+    /* a gap between an attached dict and the current window is not safe,
+     * they must remain adjacent,
+     * and when that stops being the case, the dict must be unset */
+    assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
+
+    /* limited update after a very long match */
+    {   const BYTE* const base = ms->window.base;
+        const BYTE* const istart = (const BYTE*)src;
+        const U32 curr = (U32)(istart-base);
+        if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1));   /* ensure no overflow */
+        if (curr > ms->nextToUpdate + 384)
+            ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384));
+    }
+
+    /* select and store sequences */
+    {   ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
+        size_t lastLLSize;
+        {   int i;
+            for (i = 0; i < ZSTD_REP_NUM; ++i)
+                zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
+        }
+        if (zc->externSeqStore.pos < zc->externSeqStore.size) {
+            assert(!zc->appliedParams.ldmParams.enableLdm);
+            /* Updates ldmSeqStore.pos */
+            lastLLSize =
+                ZSTD_ldm_blockCompress(&zc->externSeqStore,
+                                       ms, &zc->seqStore,
+                                       zc->blockState.nextCBlock->rep,
+                                       src, srcSize);
+            assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
+        } else if (zc->appliedParams.ldmParams.enableLdm) {
+            rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
+
+            ldmSeqStore.seq = zc->ldmSequences;
+            ldmSeqStore.capacity = zc->maxNbLdmSequences;
+            /* Updates ldmSeqStore.size */
+            FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
+                                               &zc->appliedParams.ldmParams,
+                                               src, srcSize), "");
+            /* Updates ldmSeqStore.pos */
+            lastLLSize =
+                ZSTD_ldm_blockCompress(&ldmSeqStore,
+                                       ms, &zc->seqStore,
+                                       zc->blockState.nextCBlock->rep,
+                                       src, srcSize);
+            assert(ldmSeqStore.pos == ldmSeqStore.size);
+        } else {   /* not long range mode */
+            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
+            ms->ldmSeqStore = NULL;
+            lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
+        }
+        {   const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
+            ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
+    }   }
+    return ZSTDbss_compress;
+}
+
+static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
+{
+    const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
+    const seqDef* seqStoreSeqs = seqStore->sequencesStart;
+    size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
+    size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
+    size_t literalsRead = 0;
+    size_t lastLLSize;
+
+    ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
+    size_t i;
+    repcodes_t updatedRepcodes;
+
+    assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
+    /* Ensure we have enough space for last literals "sequence" */
+    assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
+    ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    for (i = 0; i < seqStoreSeqSize; ++i) {
+        U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM;
+        outSeqs[i].litLength = seqStoreSeqs[i].litLength;
+        outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH;
+        outSeqs[i].rep = 0;
+
+        if (i == seqStore->longLengthPos) {
+            if (seqStore->longLengthID == 1) {
+                outSeqs[i].litLength += 0x10000;
+            } else if (seqStore->longLengthID == 2) {
+                outSeqs[i].matchLength += 0x10000;
+            }
+        }
+
+        if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) {
+            /* Derive the correct offset corresponding to a repcode */
+            outSeqs[i].rep = seqStoreSeqs[i].offset;
+            if (outSeqs[i].litLength != 0) {
+                rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
+            } else {
+                if (outSeqs[i].rep == 3) {
+                    rawOffset = updatedRepcodes.rep[0] - 1;
+                } else {
+                    rawOffset = updatedRepcodes.rep[outSeqs[i].rep];
+                }
+            }
+        }
+        outSeqs[i].offset = rawOffset;
+        /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
+           so we provide seqStoreSeqs[i].offset - 1 */
+        updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep,
+                                         seqStoreSeqs[i].offset - 1,
+                                         seqStoreSeqs[i].litLength == 0);
+        literalsRead += outSeqs[i].litLength;
+    }
+    /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
+     * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
+     * for the block boundary, according to the API.
+     */
+    assert(seqStoreLiteralsSize >= literalsRead);
+    lastLLSize = seqStoreLiteralsSize - literalsRead;
+    outSeqs[i].litLength = (U32)lastLLSize;
+    outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
+    seqStoreSeqSize++;
+    zc->seqCollector.seqIndex += seqStoreSeqSize;
+}
+
+size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+                              size_t outSeqsSize, const void* src, size_t srcSize)
+{
+    const size_t dstCapacity = ZSTD_compressBound(srcSize);
+    void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
+    SeqCollector seqCollector;
+
+    RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
+
+    seqCollector.collectSequences = 1;
+    seqCollector.seqStart = outSeqs;
+    seqCollector.seqIndex = 0;
+    seqCollector.maxSequences = outSeqsSize;
+    zc->seqCollector = seqCollector;
+
+    ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
+    ZSTD_customFree(dst, ZSTD_defaultCMem);
+    return zc->seqCollector.seqIndex;
+}
+
+size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) {
+    size_t in = 0;
+    size_t out = 0;
+    for (; in < seqsSize; ++in) {
+        if (sequences[in].offset == 0 && sequences[in].matchLength == 0) {
+            if (in != seqsSize - 1) {
+                sequences[in+1].litLength += sequences[in].litLength;
+            }
+        } else {
+            sequences[out] = sequences[in];
+            ++out;
+        }
+    }
+    return out;
+}
+
+/* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */
+static int ZSTD_isRLE(const BYTE* src, size_t length) {
+    const BYTE* ip = src;
+    const BYTE value = ip[0];
+    const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL);
+    const size_t unrollSize = sizeof(size_t) * 4;
+    const size_t unrollMask = unrollSize - 1;
+    const size_t prefixLength = length & unrollMask;
+    size_t i;
+    size_t u;
+    if (length == 1) return 1;
+    /* Check if prefix is RLE first before using unrolled loop */
+    if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) {
+        return 0;
+    }
+    for (i = prefixLength; i != length; i += unrollSize) {
+        for (u = 0; u < unrollSize; u += sizeof(size_t)) {
+            if (MEM_readST(ip + i + u) != valueST) {
+                return 0;
+            }
+        }
+    }
+    return 1;
+}
+
+/* Returns true if the given block may be RLE.
+ * This is just a heuristic based on the compressibility.
+ * It may return both false positives and false negatives.
+ */
+static int ZSTD_maybeRLE(seqStore_t const* seqStore)
+{
+    size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart);
+    size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart);
+
+    return nbSeqs < 4 && nbLits < 10;
+}
+
+static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
+{
+    ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
+    zc->blockState.prevCBlock = zc->blockState.nextCBlock;
+    zc->blockState.nextCBlock = tmp;
+}
+
+static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
+                                        void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize, U32 frame)
+{
+    /* This the upper bound for the length of an rle block.
+     * This isn't the actual upper bound. Finding the real threshold
+     * needs further investigation.
+     */
+    const U32 rleMaxLength = 25;
+    size_t cSize;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
+                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
+                (unsigned)zc->blockState.matchState.nextToUpdate);
+
+    {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+        FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+        if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
+    }
+
+    if (zc->seqCollector.collectSequences) {
+        ZSTD_copyBlockSequences(zc);
+        ZSTD_confirmRepcodesAndEntropyTables(zc);
+        return 0;
+    }
+
+    /* encode sequences and literals */
+    cSize = ZSTD_entropyCompressSequences(&zc->seqStore,
+            &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
+            &zc->appliedParams,
+            dst, dstCapacity,
+            srcSize,
+            zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+            zc->bmi2);
+
+    if (zc->seqCollector.collectSequences) {
+        ZSTD_copyBlockSequences(zc);
+        return 0;
+    }
+
+
+    if (frame &&
+        /* We don't want to emit our first block as a RLE even if it qualifies because
+         * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+         * This is only an issue for zstd <= v1.4.3
+         */
+        !zc->isFirstBlock &&
+        cSize < rleMaxLength &&
+        ZSTD_isRLE(ip, srcSize))
+    {
+        cSize = 1;
+        op[0] = ip[0];
+    }
+
+out:
+    if (!ZSTD_isError(cSize) && cSize > 1) {
+        ZSTD_confirmRepcodesAndEntropyTables(zc);
+    }
+    /* We check that dictionaries have offset codes available for the first
+     * block. After the first block, the offcode table might not have large
+     * enough codes to represent the offsets in the data.
+     */
+    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+    return cSize;
+}
+
+static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                               const size_t bss, U32 lastBlock)
+{
+    DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()");
+    if (bss == ZSTDbss_compress) {
+        if (/* We don't want to emit our first block as a RLE even if it qualifies because
+            * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+            * This is only an issue for zstd <= v1.4.3
+            */
+            !zc->isFirstBlock &&
+            ZSTD_maybeRLE(&zc->seqStore) &&
+            ZSTD_isRLE((BYTE const*)src, srcSize))
+        {
+            return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock);
+        }
+        /* Attempt superblock compression.
+         *
+         * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the
+         * standard ZSTD_compressBound(). This is a problem, because even if we have
+         * space now, taking an extra byte now could cause us to run out of space later
+         * and violate ZSTD_compressBound().
+         *
+         * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize.
+         *
+         * In order to respect ZSTD_compressBound() we must attempt to emit a raw
+         * uncompressed block in these cases:
+         *   * cSize == 0: Return code for an uncompressed block.
+         *   * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize).
+         *     ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of
+         *     output space.
+         *   * cSize >= blockBound(srcSize): We have expanded the block too much so
+         *     emit an uncompressed block.
+         */
+        {
+            size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock);
+            if (cSize != ERROR(dstSize_tooSmall)) {
+                size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
+                if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
+                    ZSTD_confirmRepcodesAndEntropyTables(zc);
+                    return cSize;
+                }
+            }
+        }
+    }
+
+    DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()");
+    /* Superblock compression failed, attempt to emit a single no compress block.
+     * The decoder will be able to stream this block since it is uncompressed.
+     */
+    return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
+}
+
+static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                               U32 lastBlock)
+{
+    size_t cSize = 0;
+    const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+    DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)",
+                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize);
+    FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+
+    cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock);
+    FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed");
+
+    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+    return cSize;
+}
+
+static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
+                                         ZSTD_cwksp* ws,
+                                         ZSTD_CCtx_params const* params,
+                                         void const* ip,
+                                         void const* iend)
+{
+    if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
+        U32 const maxDist = (U32)1 << params->cParams.windowLog;
+        U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
+        U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
+        ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
+        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
+        ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
+        ZSTD_cwksp_mark_tables_dirty(ws);
+        ZSTD_reduceIndex(ms, params, correction);
+        ZSTD_cwksp_mark_tables_clean(ws);
+        if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
+        else ms->nextToUpdate -= correction;
+        /* invalidate dictionaries on overflow correction */
+        ms->loadedDictEnd = 0;
+        ms->dictMatchState = NULL;
+    }
+}
+
+/*! ZSTD_compress_frameChunk() :
+*   Compress a chunk of data into one or multiple blocks.
+*   All blocks will be terminated, all input will be consumed.
+*   Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
+*   Frame is supposed already started (header already produced)
+*   @return : compressed size, or an error code
+*/
+static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
+                                     void* dst, size_t dstCapacity,
+                               const void* src, size_t srcSize,
+                                     U32 lastFrameChunk)
+{
+    size_t blockSize = cctx->blockSize;
+    size_t remaining = srcSize;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
+
+    assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
+
+    DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
+    if (cctx->appliedParams.fParams.checksumFlag && srcSize)
+        xxh64_update(&cctx->xxhState, src, srcSize);
+
+    while (remaining) {
+        ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
+        U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
+
+        RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE,
+                        dstSize_tooSmall,
+                        "not enough space to store compressed block");
+        if (remaining < blockSize) blockSize = remaining;
+
+        ZSTD_overflowCorrectIfNeeded(
+            ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
+        ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
+
+        /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
+        if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
+
+        {   size_t cSize;
+            if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) {
+                cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
+                assert(cSize > 0);
+                assert(cSize <= blockSize + ZSTD_blockHeaderSize);
+            } else {
+                cSize = ZSTD_compressBlock_internal(cctx,
+                                        op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
+                                        ip, blockSize, 1 /* frame */);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed");
+
+                if (cSize == 0) {  /* block is not compressible */
+                    cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+                    FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+                } else {
+                    U32 const cBlockHeader = cSize == 1 ?
+                        lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
+                        lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+                    MEM_writeLE24(op, cBlockHeader);
+                    cSize += ZSTD_blockHeaderSize;
+                }
+            }
+
+
+            ip += blockSize;
+            assert(remaining >= blockSize);
+            remaining -= blockSize;
+            op += cSize;
+            assert(dstCapacity >= cSize);
+            dstCapacity -= cSize;
+            cctx->isFirstBlock = 0;
+            DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
+                        (unsigned)cSize);
+    }   }
+
+    if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
+    return (size_t)(op-ostart);
+}
+
+
+static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
+                                    const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID)
+{   BYTE* const op = (BYTE*)dst;
+    U32   const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536);   /* 0-3 */
+    U32   const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength;   /* 0-3 */
+    U32   const checksumFlag = params->fParams.checksumFlag>0;
+    U32   const windowSize = (U32)1 << params->cParams.windowLog;
+    U32   const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
+    BYTE  const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
+    U32   const fcsCode = params->fParams.contentSizeFlag ?
+                     (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0;  /* 0-3 */
+    BYTE  const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
+    size_t pos=0;
+
+    assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
+    RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall,
+                    "dst buf is too small to fit worst-case frame header size.");
+    DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
+                !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
+    if (params->format == ZSTD_f_zstd1) {
+        MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
+        pos = 4;
+    }
+    op[pos++] = frameHeaderDescriptionByte;
+    if (!singleSegment) op[pos++] = windowLogByte;
+    switch(dictIDSizeCode)
+    {
+        default:
+            assert(0); /* impossible */
+            ZSTD_FALLTHROUGH;
+        case 0 : break;
+        case 1 : op[pos] = (BYTE)(dictID); pos++; break;
+        case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
+        case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
+    }
+    switch(fcsCode)
+    {
+        default:
+            assert(0); /* impossible */
+            ZSTD_FALLTHROUGH;
+        case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
+        case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
+        case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
+        case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
+    }
+    return pos;
+}
+
+/* ZSTD_writeSkippableFrame_advanced() :
+ * Writes out a skippable frame with the specified magic number variant (16 are supported),
+ * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data.
+ *
+ * Returns the total number of bytes written, or a ZSTD error code.
+ */
+size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize, unsigned magicVariant) {
+    BYTE* op = (BYTE*)dst;
+    RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */,
+                    dstSize_tooSmall, "Not enough room for skippable frame");
+    RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame");
+    RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported");
+
+    MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant));
+    MEM_writeLE32(op+4, (U32)srcSize);
+    ZSTD_memcpy(op+8, src, srcSize);
+    return srcSize + ZSTD_SKIPPABLEHEADERSIZE;
+}
+
+/* ZSTD_writeLastEmptyBlock() :
+ * output an empty Block with end-of-frame mark to complete a frame
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
+ *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
+ */
+size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
+{
+    RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall,
+                    "dst buf is too small to write frame trailer empty block.");
+    {   U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1);  /* 0 size */
+        MEM_writeLE24(dst, cBlockHeader24);
+        return ZSTD_blockHeaderSize;
+    }
+}
+
+size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
+{
+    RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
+                    "wrong cctx stage");
+    RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm,
+                    parameter_unsupported,
+                    "incompatible with ldm");
+    cctx->externSeqStore.seq = seq;
+    cctx->externSeqStore.size = nbSeq;
+    cctx->externSeqStore.capacity = nbSeq;
+    cctx->externSeqStore.pos = 0;
+    cctx->externSeqStore.posInSequence = 0;
+    return 0;
+}
+
+
+static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                               U32 frame, U32 lastFrameChunk)
+{
+    ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
+    size_t fhSize = 0;
+
+    DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
+                cctx->stage, (unsigned)srcSize);
+    RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong,
+                    "missing init (ZSTD_compressBegin)");
+
+    if (frame && (cctx->stage==ZSTDcs_init)) {
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams,
+                                       cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
+        FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
+        assert(fhSize <= dstCapacity);
+        dstCapacity -= fhSize;
+        dst = (char*)dst + fhSize;
+        cctx->stage = ZSTDcs_ongoing;
+    }
+
+    if (!srcSize) return fhSize;  /* do not generate an empty block if no input */
+
+    if (!ZSTD_window_update(&ms->window, src, srcSize)) {
+        ms->nextToUpdate = ms->window.dictLimit;
+    }
+    if (cctx->appliedParams.ldmParams.enableLdm) {
+        ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
+    }
+
+    if (!frame) {
+        /* overflow check and correction for block mode */
+        ZSTD_overflowCorrectIfNeeded(
+            ms, &cctx->workspace, &cctx->appliedParams,
+            src, (BYTE const*)src + srcSize);
+    }
+
+    DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
+    {   size_t const cSize = frame ?
+                             ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
+                             ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */);
+        FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed");
+        cctx->consumedSrcSize += srcSize;
+        cctx->producedCSize += (cSize + fhSize);
+        assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
+        if (cctx->pledgedSrcSizePlusOne != 0) {  /* control src size */
+            ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
+            RETURN_ERROR_IF(
+                cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne,
+                srcSize_wrong,
+                "error : pledgedSrcSize = %u, while realSrcSize >= %u",
+                (unsigned)cctx->pledgedSrcSizePlusOne-1,
+                (unsigned)cctx->consumedSrcSize);
+        }
+        return cSize + fhSize;
+    }
+}
+
+size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
+    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
+}
+
+
+size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
+{
+    ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
+    assert(!ZSTD_checkCParams(cParams));
+    return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);
+}
+
+size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize);
+    { size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
+      RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); }
+
+    return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
+}
+
+/*! ZSTD_loadDictionaryContent() :
+ *  @return : 0, or an error code
+ */
+static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
+                                         ldmState_t* ls,
+                                         ZSTD_cwksp* ws,
+                                         ZSTD_CCtx_params const* params,
+                                         const void* src, size_t srcSize,
+                                         ZSTD_dictTableLoadMethod_e dtlm)
+{
+    const BYTE* ip = (const BYTE*) src;
+    const BYTE* const iend = ip + srcSize;
+
+    ZSTD_window_update(&ms->window, src, srcSize);
+    ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
+
+    if (params->ldmParams.enableLdm && ls != NULL) {
+        ZSTD_window_update(&ls->window, src, srcSize);
+        ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
+    }
+
+    /* Assert that we the ms params match the params we're being given */
+    ZSTD_assertEqualCParams(params->cParams, ms->cParams);
+
+    if (srcSize <= HASH_READ_SIZE) return 0;
+
+    while (iend - ip > HASH_READ_SIZE) {
+        size_t const remaining = (size_t)(iend - ip);
+        size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
+        const BYTE* const ichunk = ip + chunk;
+
+        ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);
+
+        if (params->ldmParams.enableLdm && ls != NULL)
+            ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, &params->ldmParams);
+
+        switch(params->cParams.strategy)
+        {
+        case ZSTD_fast:
+            ZSTD_fillHashTable(ms, ichunk, dtlm);
+            break;
+        case ZSTD_dfast:
+            ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
+            break;
+
+        case ZSTD_greedy:
+        case ZSTD_lazy:
+        case ZSTD_lazy2:
+            if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) {
+                assert(chunk == remaining); /* must load everything in one go */
+                ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE);
+            } else if (chunk >= HASH_READ_SIZE) {
+                ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
+            }
+            break;
+
+        case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
+        case ZSTD_btopt:
+        case ZSTD_btultra:
+        case ZSTD_btultra2:
+            if (chunk >= HASH_READ_SIZE)
+                ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
+            break;
+
+        default:
+            assert(0);  /* not possible : not a valid strategy id */
+        }
+
+        ip = ichunk;
+    }
+
+    ms->nextToUpdate = (U32)(iend - ms->window.base);
+    return 0;
+}
+
+
+/* Dictionaries that assign zero probability to symbols that show up causes problems
+ * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check
+ * and only dictionaries with 100% valid symbols can be assumed valid.
+ */
+static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue)
+{
+    U32 s;
+    if (dictMaxSymbolValue < maxSymbolValue) {
+        return FSE_repeat_check;
+    }
+    for (s = 0; s <= maxSymbolValue; ++s) {
+        if (normalizedCounter[s] == 0) {
+            return FSE_repeat_check;
+        }
+    }
+    return FSE_repeat_valid;
+}
+
+size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
+                         const void* const dict, size_t dictSize)
+{
+    short offcodeNCount[MaxOff+1];
+    unsigned offcodeMaxValue = MaxOff;
+    const BYTE* dictPtr = (const BYTE*)dict;    /* skip magic num and dict ID */
+    const BYTE* const dictEnd = dictPtr + dictSize;
+    dictPtr += 8;
+    bs->entropy.huf.repeatMode = HUF_repeat_check;
+
+    {   unsigned maxSymbolValue = 255;
+        unsigned hasZeroWeights = 1;
+        size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr,
+            dictEnd-dictPtr, &hasZeroWeights);
+
+        /* We only set the loaded table as valid if it contains all non-zero
+         * weights. Otherwise, we set it to check */
+        if (!hasZeroWeights)
+            bs->entropy.huf.repeatMode = HUF_repeat_valid;
+
+        RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");
+        dictPtr += hufHeaderSize;
+    }
+
+    {   unsigned offcodeLog;
+        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
+        /* fill all offset symbols to avoid garbage at end of table */
+        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
+                bs->entropy.fse.offcodeCTable,
+                offcodeNCount, MaxOff, offcodeLog,
+                workspace, HUF_WORKSPACE_SIZE)),
+            dictionary_corrupted, "");
+        /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
+        dictPtr += offcodeHeaderSize;
+    }
+
+    {   short matchlengthNCount[MaxML+1];
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
+        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
+                bs->entropy.fse.matchlengthCTable,
+                matchlengthNCount, matchlengthMaxValue, matchlengthLog,
+                workspace, HUF_WORKSPACE_SIZE)),
+            dictionary_corrupted, "");
+        bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML);
+        dictPtr += matchlengthHeaderSize;
+    }
+
+    {   short litlengthNCount[MaxLL+1];
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
+        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
+        RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
+        RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
+                bs->entropy.fse.litlengthCTable,
+                litlengthNCount, litlengthMaxValue, litlengthLog,
+                workspace, HUF_WORKSPACE_SIZE)),
+            dictionary_corrupted, "");
+        bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL);
+        dictPtr += litlengthHeaderSize;
+    }
+
+    RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
+    bs->rep[0] = MEM_readLE32(dictPtr+0);
+    bs->rep[1] = MEM_readLE32(dictPtr+4);
+    bs->rep[2] = MEM_readLE32(dictPtr+8);
+    dictPtr += 12;
+
+    {   size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
+        U32 offcodeMax = MaxOff;
+        if (dictContentSize <= ((U32)-1) - 128 KB) {
+            U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
+            offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
+        }
+        /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */
+        bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff));
+
+        /* All repCodes must be <= dictContentSize and != 0 */
+        {   U32 u;
+            for (u=0; u<3; u++) {
+                RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, "");
+                RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, "");
+    }   }   }
+
+    return dictPtr - (const BYTE*)dict;
+}
+
+/* Dictionary format :
+ * See :
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format
+ */
+/*! ZSTD_loadZstdDictionary() :
+ * @return : dictID, or an error code
+ *  assumptions : magic number supposed already checked
+ *                dictSize supposed >= 8
+ */
+static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
+                                      ZSTD_matchState_t* ms,
+                                      ZSTD_cwksp* ws,
+                                      ZSTD_CCtx_params const* params,
+                                      const void* dict, size_t dictSize,
+                                      ZSTD_dictTableLoadMethod_e dtlm,
+                                      void* workspace)
+{
+    const BYTE* dictPtr = (const BYTE*)dict;
+    const BYTE* const dictEnd = dictPtr + dictSize;
+    size_t dictID;
+    size_t eSize;
+
+    ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
+    assert(dictSize >= 8);
+    assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
+
+    dictID = params->fParams.noDictIDFlag ? 0 :  MEM_readLE32(dictPtr + 4 /* skip magic number */ );
+    eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize);
+    FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed");
+    dictPtr += eSize;
+
+    {
+        size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
+        FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(
+            ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), "");
+    }
+    return dictID;
+}
+
+/* ZSTD_compress_insertDictionary() :
+*   @return : dictID, or an error code */
+static size_t
+ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
+                               ZSTD_matchState_t* ms,
+                               ldmState_t* ls,
+                               ZSTD_cwksp* ws,
+                         const ZSTD_CCtx_params* params,
+                         const void* dict, size_t dictSize,
+                               ZSTD_dictContentType_e dictContentType,
+                               ZSTD_dictTableLoadMethod_e dtlm,
+                               void* workspace)
+{
+    DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
+    if ((dict==NULL) || (dictSize<8)) {
+        RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
+        return 0;
+    }
+
+    ZSTD_reset_compressedBlockState(bs);
+
+    /* dict restricted modes */
+    if (dictContentType == ZSTD_dct_rawContent)
+        return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm);
+
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
+        if (dictContentType == ZSTD_dct_auto) {
+            DEBUGLOG(4, "raw content dictionary detected");
+            return ZSTD_loadDictionaryContent(
+                ms, ls, ws, params, dict, dictSize, dtlm);
+        }
+        RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, "");
+        assert(0);   /* impossible */
+    }
+
+    /* dict as full zstd dictionary */
+    return ZSTD_loadZstdDictionary(
+        bs, ms, ws, params, dict, dictSize, dtlm, workspace);
+}
+
+#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB)
+#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL)
+
+/*! ZSTD_compressBegin_internal() :
+ * @return : 0, or an error code */
+static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
+                                    const void* dict, size_t dictSize,
+                                    ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                    const ZSTD_CDict* cdict,
+                                    const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
+                                    ZSTD_buffered_policy_e zbuff)
+{
+    DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
+    /* params are supposed to be fully validated at this point */
+    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+    if ( (cdict)
+      && (cdict->dictContentSize > 0)
+      && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
+        || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
+        || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+        || cdict->compressionLevel == 0)
+      && (params->attachDictPref != ZSTD_dictForceLoad) ) {
+        return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
+    }
+
+    FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
+                                     ZSTDcrp_makeClean, zbuff) , "");
+    {   size_t const dictID = cdict ?
+                ZSTD_compress_insertDictionary(
+                        cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+                        &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent,
+                        cdict->dictContentSize, cdict->dictContentType, dtlm,
+                        cctx->entropyWorkspace)
+              : ZSTD_compress_insertDictionary(
+                        cctx->blockState.prevCBlock, &cctx->blockState.matchState,
+                        &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize,
+                        dictContentType, dtlm, cctx->entropyWorkspace);
+        FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
+        assert(dictID <= UINT_MAX);
+        cctx->dictID = (U32)dictID;
+        cctx->dictContentSize = cdict ? cdict->dictContentSize : dictSize;
+    }
+    return 0;
+}
+
+size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
+                                    const void* dict, size_t dictSize,
+                                    ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                    const ZSTD_CDict* cdict,
+                                    const ZSTD_CCtx_params* params,
+                                    unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog);
+    /* compression parameters verification and optimization */
+    FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , "");
+    return ZSTD_compressBegin_internal(cctx,
+                                       dict, dictSize, dictContentType, dtlm,
+                                       cdict,
+                                       params, pledgedSrcSize,
+                                       ZSTDb_not_buffered);
+}
+
+/*! ZSTD_compressBegin_advanced() :
+*   @return : 0, or an error code */
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
+                             const void* dict, size_t dictSize,
+                                   ZSTD_parameters params, unsigned long long pledgedSrcSize)
+{
+    ZSTD_CCtx_params cctxParams;
+    ZSTD_CCtxParams_init_internal(&cctxParams, &params, ZSTD_NO_CLEVEL);
+    return ZSTD_compressBegin_advanced_internal(cctx,
+                                            dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
+                                            NULL /*cdict*/,
+                                            &cctxParams, pledgedSrcSize);
+}
+
+size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_CCtx_params cctxParams;
+    {
+        ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict);
+        ZSTD_CCtxParams_init_internal(&cctxParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel);
+    }
+    DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
+    return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
+                                       &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
+}
+
+size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
+{
+    return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
+}
+
+
+/*! ZSTD_writeEpilogue() :
+*   Ends a frame.
+*   @return : nb of bytes written into dst (or an error code) */
+static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* op = ostart;
+    size_t fhSize = 0;
+
+    DEBUGLOG(4, "ZSTD_writeEpilogue");
+    RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
+
+    /* special case : empty frame */
+    if (cctx->stage == ZSTDcs_init) {
+        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
+        FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
+        dstCapacity -= fhSize;
+        op += fhSize;
+        cctx->stage = ZSTDcs_ongoing;
+    }
+
+    if (cctx->stage != ZSTDcs_ending) {
+        /* write one last empty block, make it the "last" block */
+        U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
+        MEM_writeLE32(op, cBlockHeader24);
+        op += ZSTD_blockHeaderSize;
+        dstCapacity -= ZSTD_blockHeaderSize;
+    }
+
+    if (cctx->appliedParams.fParams.checksumFlag) {
+        U32 const checksum = (U32) xxh64_digest(&cctx->xxhState);
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
+        DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);
+        MEM_writeLE32(op, checksum);
+        op += 4;
+    }
+
+    cctx->stage = ZSTDcs_created;  /* return to "created but no init" status */
+    return op-ostart;
+}
+
+void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize)
+{
+    (void)cctx;
+    (void)extraCSize;
+}
+
+size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
+                         void* dst, size_t dstCapacity,
+                   const void* src, size_t srcSize)
+{
+    size_t endResult;
+    size_t const cSize = ZSTD_compressContinue_internal(cctx,
+                                dst, dstCapacity, src, srcSize,
+                                1 /* frame mode */, 1 /* last chunk */);
+    FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed");
+    endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
+    FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed");
+    assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
+    if (cctx->pledgedSrcSizePlusOne != 0) {  /* control src size */
+        ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
+        DEBUGLOG(4, "end of frame : controlling src size");
+        RETURN_ERROR_IF(
+            cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1,
+            srcSize_wrong,
+             "error : pledgedSrcSize = %u, while realSrcSize = %u",
+            (unsigned)cctx->pledgedSrcSizePlusOne-1,
+            (unsigned)cctx->consumedSrcSize);
+    }
+    ZSTD_CCtx_trace(cctx, endResult);
+    return cSize + endResult;
+}
+
+size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                         const void* dict,size_t dictSize,
+                               ZSTD_parameters params)
+{
+    ZSTD_CCtx_params cctxParams;
+    DEBUGLOG(4, "ZSTD_compress_advanced");
+    FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
+    ZSTD_CCtxParams_init_internal(&cctxParams, &params, ZSTD_NO_CLEVEL);
+    return ZSTD_compress_advanced_internal(cctx,
+                                           dst, dstCapacity,
+                                           src, srcSize,
+                                           dict, dictSize,
+                                           &cctxParams);
+}
+
+/* Internal */
+size_t ZSTD_compress_advanced_internal(
+        ZSTD_CCtx* cctx,
+        void* dst, size_t dstCapacity,
+        const void* src, size_t srcSize,
+        const void* dict,size_t dictSize,
+        const ZSTD_CCtx_params* params)
+{
+    DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);
+    FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
+                         dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
+                         params, srcSize, ZSTDb_not_buffered) , "");
+    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+}
+
+size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize,
+                         const void* dict, size_t dictSize,
+                               int compressionLevel)
+{
+    ZSTD_CCtx_params cctxParams;
+    {
+        ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);
+        assert(params.fParams.contentSizeFlag == 1);
+        ZSTD_CCtxParams_init_internal(&cctxParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);
+    }
+    DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
+    return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
+}
+
+size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
+                         void* dst, size_t dstCapacity,
+                   const void* src, size_t srcSize,
+                         int compressionLevel)
+{
+    DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize);
+    assert(cctx != NULL);
+    return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
+}
+
+size_t ZSTD_compress(void* dst, size_t dstCapacity,
+               const void* src, size_t srcSize,
+                     int compressionLevel)
+{
+    size_t result;
+    ZSTD_CCtx* cctx = ZSTD_createCCtx();
+    RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed");
+    result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel);
+    ZSTD_freeCCtx(cctx);
+    return result;
+}
+
+
+/* =====  Dictionary API  ===== */
+
+/*! ZSTD_estimateCDictSize_advanced() :
+ *  Estimate amount of memory that will be needed to create a dictionary with following arguments */
+size_t ZSTD_estimateCDictSize_advanced(
+        size_t dictSize, ZSTD_compressionParameters cParams,
+        ZSTD_dictLoadMethod_e dictLoadMethod)
+{
+    DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
+    return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
+         + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
+         + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
+         + (dictLoadMethod == ZSTD_dlm_byRef ? 0
+            : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
+}
+
+size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
+}
+
+size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
+{
+    if (cdict==NULL) return 0;   /* support sizeof on NULL */
+    DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));
+    /* cdict may be in the workspace */
+    return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict))
+        + ZSTD_cwksp_sizeof(&cdict->workspace);
+}
+
+static size_t ZSTD_initCDict_internal(
+                    ZSTD_CDict* cdict,
+              const void* dictBuffer, size_t dictSize,
+                    ZSTD_dictLoadMethod_e dictLoadMethod,
+                    ZSTD_dictContentType_e dictContentType,
+                    ZSTD_CCtx_params params)
+{
+    DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType);
+    assert(!ZSTD_checkCParams(params.cParams));
+    cdict->matchState.cParams = params.cParams;
+    cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch;
+    if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) {
+        cdict->matchState.dedicatedDictSearch = 0;
+    }
+    if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
+        cdict->dictContent = dictBuffer;
+    } else {
+         void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*)));
+        RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!");
+        cdict->dictContent = internalBuffer;
+        ZSTD_memcpy(internalBuffer, dictBuffer, dictSize);
+    }
+    cdict->dictContentSize = dictSize;
+    cdict->dictContentType = dictContentType;
+
+    cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE);
+
+
+    /* Reset the state to no dictionary */
+    ZSTD_reset_compressedBlockState(&cdict->cBlockState);
+    FORWARD_IF_ERROR(ZSTD_reset_matchState(
+        &cdict->matchState,
+        &cdict->workspace,
+        &params.cParams,
+        ZSTDcrp_makeClean,
+        ZSTDirp_reset,
+        ZSTD_resetTarget_CDict), "");
+    /* (Maybe) load the dictionary
+     * Skips loading the dictionary if it is < 8 bytes.
+     */
+    {   params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
+        params.fParams.contentSizeFlag = 1;
+        {   size_t const dictID = ZSTD_compress_insertDictionary(
+                    &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace,
+                    &params, cdict->dictContent, cdict->dictContentSize,
+                    dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace);
+            FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
+            assert(dictID <= (size_t)(U32)-1);
+            cdict->dictID = (U32)dictID;
+        }
+    }
+
+    return 0;
+}
+
+static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
+{
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+
+    {   size_t const workspaceSize =
+            ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
+            ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
+            ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
+            (dictLoadMethod == ZSTD_dlm_byRef ? 0
+             : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
+        void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);
+        ZSTD_cwksp ws;
+        ZSTD_CDict* cdict;
+
+        if (!workspace) {
+            ZSTD_customFree(workspace, customMem);
+            return NULL;
+        }
+
+        ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc);
+
+        cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
+        assert(cdict != NULL);
+        ZSTD_cwksp_move(&cdict->workspace, &ws);
+        cdict->customMem = customMem;
+        cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */
+
+        return cdict;
+    }
+}
+
+ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType,
+                                      ZSTD_compressionParameters cParams,
+                                      ZSTD_customMem customMem)
+{
+    ZSTD_CCtx_params cctxParams;
+    ZSTD_memset(&cctxParams, 0, sizeof(cctxParams));
+    ZSTD_CCtxParams_init(&cctxParams, 0);
+    cctxParams.cParams = cParams;
+    cctxParams.customMem = customMem;
+    return ZSTD_createCDict_advanced2(
+        dictBuffer, dictSize,
+        dictLoadMethod, dictContentType,
+        &cctxParams, customMem);
+}
+
+ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
+        const void* dict, size_t dictSize,
+        ZSTD_dictLoadMethod_e dictLoadMethod,
+        ZSTD_dictContentType_e dictContentType,
+        const ZSTD_CCtx_params* originalCctxParams,
+        ZSTD_customMem customMem)
+{
+    ZSTD_CCtx_params cctxParams = *originalCctxParams;
+    ZSTD_compressionParameters cParams;
+    ZSTD_CDict* cdict;
+
+    DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType);
+    if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
+
+    if (cctxParams.enableDedicatedDictSearch) {
+        cParams = ZSTD_dedicatedDictSearch_getCParams(
+            cctxParams.compressionLevel, dictSize);
+        ZSTD_overrideCParams(&cParams, &cctxParams.cParams);
+    } else {
+        cParams = ZSTD_getCParamsFromCCtxParams(
+            &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    }
+
+    if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) {
+        /* Fall back to non-DDSS params */
+        cctxParams.enableDedicatedDictSearch = 0;
+        cParams = ZSTD_getCParamsFromCCtxParams(
+            &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    }
+
+    cctxParams.cParams = cParams;
+
+    cdict = ZSTD_createCDict_advanced_internal(dictSize,
+                        dictLoadMethod, cctxParams.cParams,
+                        customMem);
+
+    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+                                    dict, dictSize,
+                                    dictLoadMethod, dictContentType,
+                                    cctxParams) )) {
+        ZSTD_freeCDict(cdict);
+        return NULL;
+    }
+
+    return cdict;
+}
+
+ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
+                                                  ZSTD_dlm_byCopy, ZSTD_dct_auto,
+                                                  cParams, ZSTD_defaultCMem);
+    if (cdict)
+        cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
+    return cdict;
+}
+
+ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
+{
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
+    ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize,
+                                     ZSTD_dlm_byRef, ZSTD_dct_auto,
+                                     cParams, ZSTD_defaultCMem);
+    if (cdict)
+        cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel;
+    return cdict;
+}
+
+size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
+{
+    if (cdict==NULL) return 0;   /* support free on NULL */
+    {   ZSTD_customMem const cMem = cdict->customMem;
+        int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict);
+        ZSTD_cwksp_free(&cdict->workspace, cMem);
+        if (!cdictInWorkspace) {
+            ZSTD_customFree(cdict, cMem);
+        }
+        return 0;
+    }
+}
+
+/*! ZSTD_initStaticCDict_advanced() :
+ *  Generate a digested dictionary in provided memory area.
+ *  workspace: The memory area to emplace the dictionary into.
+ *             Provided pointer must 8-bytes aligned.
+ *             It must outlive dictionary usage.
+ *  workspaceSize: Use ZSTD_estimateCDictSize()
+ *                 to determine how large workspace must be.
+ *  cParams : use ZSTD_getCParams() to transform a compression level
+ *            into its relevants cParams.
+ * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
+ *  Note : there is no corresponding "free" function.
+ *         Since workspace was allocated externally, it must be freed externally.
+ */
+const ZSTD_CDict* ZSTD_initStaticCDict(
+                                 void* workspace, size_t workspaceSize,
+                           const void* dict, size_t dictSize,
+                                 ZSTD_dictLoadMethod_e dictLoadMethod,
+                                 ZSTD_dictContentType_e dictContentType,
+                                 ZSTD_compressionParameters cParams)
+{
+    size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
+    size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
+                            + (dictLoadMethod == ZSTD_dlm_byRef ? 0
+                               : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
+                            + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
+                            + matchStateSize;
+    ZSTD_CDict* cdict;
+    ZSTD_CCtx_params params;
+
+    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
+
+    {
+        ZSTD_cwksp ws;
+        ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc);
+        cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict));
+        if (cdict == NULL) return NULL;
+        ZSTD_cwksp_move(&cdict->workspace, &ws);
+    }
+
+    DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
+        (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
+    if (workspaceSize < neededSize) return NULL;
+
+    ZSTD_CCtxParams_init(&params, 0);
+    params.cParams = cParams;
+
+    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+                                              dict, dictSize,
+                                              dictLoadMethod, dictContentType,
+                                              params) ))
+        return NULL;
+
+    return cdict;
+}
+
+ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
+{
+    assert(cdict != NULL);
+    return cdict->matchState.cParams;
+}
+
+/*! ZSTD_getDictID_fromCDict() :
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict)
+{
+    if (cdict==NULL) return 0;
+    return cdict->dictID;
+}
+
+
+/* ZSTD_compressBegin_usingCDict_advanced() :
+ * cdict must be != NULL */
+size_t ZSTD_compressBegin_usingCDict_advanced(
+    ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
+    ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
+{
+    ZSTD_CCtx_params cctxParams;
+    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
+    RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");
+    /* Initialize the cctxParams from the cdict */
+    {
+        ZSTD_parameters params;
+        params.fParams = fParams;
+        params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF
+                        || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER
+                        || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
+                        || cdict->compressionLevel == 0 ) ?
+                ZSTD_getCParamsFromCDict(cdict)
+              : ZSTD_getCParams(cdict->compressionLevel,
+                                pledgedSrcSize,
+                                cdict->dictContentSize);
+        ZSTD_CCtxParams_init_internal(&cctxParams, &params, cdict->compressionLevel);
+    }
+    /* Increase window log to fit the entire dictionary and source if the
+     * source size is known. Limit the increase to 19, which is the
+     * window log for compression level 1 with the largest source size.
+     */
+    if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+        U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);
+        U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;
+        cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog);
+    }
+    return ZSTD_compressBegin_internal(cctx,
+                                        NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
+                                        cdict,
+                                        &cctxParams, pledgedSrcSize,
+                                        ZSTDb_not_buffered);
+}
+
+/* ZSTD_compressBegin_usingCDict() :
+ * pledgedSrcSize=0 means "unknown"
+ * if pledgedSrcSize>0, it will enable contentSizeFlag */
+size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
+{
+    ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
+    return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
+}
+
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
+{
+    FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), "");   /* will check if cdict != NULL */
+    return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
+}
+
+/*! ZSTD_compress_usingCDict() :
+ *  Compression using a digested Dictionary.
+ *  Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
+ *  Note that compression parameters are decided at CDict creation time
+ *  while frame parameters are hardcoded */
+size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const ZSTD_CDict* cdict)
+{
+    ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
+    return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
+}
+
+
+
+/* ******************************************************************
+*  Streaming
+********************************************************************/
+
+ZSTD_CStream* ZSTD_createCStream(void)
+{
+    DEBUGLOG(3, "ZSTD_createCStream");
+    return ZSTD_createCStream_advanced(ZSTD_defaultCMem);
+}
+
+ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)
+{
+    return ZSTD_initStaticCCtx(workspace, workspaceSize);
+}
+
+ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
+{   /* CStream and CCtx are now same object */
+    return ZSTD_createCCtx_advanced(customMem);
+}
+
+size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
+{
+    return ZSTD_freeCCtx(zcs);   /* same object */
+}
+
+
+
+/*======   Initialization   ======*/
+
+size_t ZSTD_CStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX; }
+
+size_t ZSTD_CStreamOutSize(void)
+{
+    return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
+}
+
+static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize)
+{
+    if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize))
+        return ZSTD_cpm_attachDict;
+    else
+        return ZSTD_cpm_noAttachDict;
+}
+
+/* ZSTD_resetCStream():
+ * pledgedSrcSize == 0 means "unknown" */
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss)
+{
+    /* temporary : 0 interpreted as "unknown" during transition period.
+     * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
+     * 0 will be interpreted as "empty" in the future.
+     */
+    U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
+    DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize);
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    return 0;
+}
+
+/*! ZSTD_initCStream_internal() :
+ *  Note : for lib/compress only. Used by zstdmt_compress.c.
+ *  Assumption 1 : params are valid
+ *  Assumption 2 : either dict, or cdict, is defined, not both */
+size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
+                    const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
+                    const ZSTD_CCtx_params* params,
+                    unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_internal");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
+    zcs->requestedParams = *params;
+    assert(!((dict) && (cdict)));  /* either dict or cdict, not both */
+    if (dict) {
+        FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
+    } else {
+        /* Dictionary is cleared if !cdict */
+        FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
+    }
+    return 0;
+}
+
+/* ZSTD_initCStream_usingCDict_advanced() :
+ * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+                                            const ZSTD_CDict* cdict,
+                                            ZSTD_frameParameters fParams,
+                                            unsigned long long pledgedSrcSize)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    zcs->requestedParams.fParams = fParams;
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
+    return 0;
+}
+
+/* note : cdict must outlive compression session */
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_usingCDict");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , "");
+    return 0;
+}
+
+
+/* ZSTD_initCStream_advanced() :
+ * pledgedSrcSize must be exact.
+ * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
+ * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+                                 const void* dict, size_t dictSize,
+                                 ZSTD_parameters params, unsigned long long pss)
+{
+    /* for compatibility with older programs relying on this behavior.
+     * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN.
+     * This line will be removed in the future.
+     */
+    U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
+    DEBUGLOG(4, "ZSTD_initCStream_advanced");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , "");
+    ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, &params);
+    FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
+    return 0;
+}
+
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
+{
+    DEBUGLOG(4, "ZSTD_initCStream_usingDict");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , "");
+    return 0;
+}
+
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss)
+{
+    /* temporary : 0 interpreted as "unknown" during transition period.
+     * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
+     * 0 will be interpreted as "empty" in the future.
+     */
+    U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
+    DEBUGLOG(4, "ZSTD_initCStream_srcSize");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , "");
+    return 0;
+}
+
+size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
+{
+    DEBUGLOG(4, "ZSTD_initCStream");
+    FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , "");
+    FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , "");
+    return 0;
+}
+
+/*======   Compression   ======*/
+
+static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
+{
+    size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
+    if (hintInSize==0) hintInSize = cctx->blockSize;
+    return hintInSize;
+}
+
+/* ZSTD_compressStream_generic():
+ *  internal function for all *compressStream*() variants
+ *  non-static, because can be called from zstdmt_compress.c
+ * @return : hint size for next input */
+static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
+                                          ZSTD_outBuffer* output,
+                                          ZSTD_inBuffer* input,
+                                          ZSTD_EndDirective const flushMode)
+{
+    const char* const istart = (const char*)input->src;
+    const char* const iend = input->size != 0 ? istart + input->size : istart;
+    const char* ip = input->pos != 0 ? istart + input->pos : istart;
+    char* const ostart = (char*)output->dst;
+    char* const oend = output->size != 0 ? ostart + output->size : ostart;
+    char* op = output->pos != 0 ? ostart + output->pos : ostart;
+    U32 someMoreWork = 1;
+
+    /* check expectations */
+    DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);
+    if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
+        assert(zcs->inBuff != NULL);
+        assert(zcs->inBuffSize > 0);
+    }
+    if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) {
+        assert(zcs->outBuff !=  NULL);
+        assert(zcs->outBuffSize > 0);
+    }
+    assert(output->pos <= output->size);
+    assert(input->pos <= input->size);
+    assert((U32)flushMode <= (U32)ZSTD_e_end);
+
+    while (someMoreWork) {
+        switch(zcs->streamStage)
+        {
+        case zcss_init:
+            RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!");
+
+        case zcss_load:
+            if ( (flushMode == ZSTD_e_end)
+              && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip)     /* Enough output space */
+                || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)  /* OR we are allowed to return dstSizeTooSmall */
+              && (zcs->inBuffPos == 0) ) {
+                /* shortcut to compression pass directly into output buffer */
+                size_t const cSize = ZSTD_compressEnd(zcs,
+                                                op, oend-op, ip, iend-ip);
+                DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed");
+                ip = iend;
+                op += cSize;
+                zcs->frameEnded = 1;
+                ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+                someMoreWork = 0; break;
+            }
+            /* complete loading into inBuffer in buffered mode */
+            if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
+                size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
+                size_t const loaded = ZSTD_limitCopy(
+                                        zcs->inBuff + zcs->inBuffPos, toLoad,
+                                        ip, iend-ip);
+                zcs->inBuffPos += loaded;
+                if (loaded != 0)
+                    ip += loaded;
+                if ( (flushMode == ZSTD_e_continue)
+                  && (zcs->inBuffPos < zcs->inBuffTarget) ) {
+                    /* not enough input to fill full block : stop here */
+                    someMoreWork = 0; break;
+                }
+                if ( (flushMode == ZSTD_e_flush)
+                  && (zcs->inBuffPos == zcs->inToCompress) ) {
+                    /* empty */
+                    someMoreWork = 0; break;
+                }
+            }
+            /* compress current block (note : this stage cannot be stopped in the middle) */
+            DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
+            {   int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered);
+                void* cDst;
+                size_t cSize;
+                size_t oSize = oend-op;
+                size_t const iSize = inputBuffered
+                    ? zcs->inBuffPos - zcs->inToCompress
+                    : MIN((size_t)(iend - ip), zcs->blockSize);
+                if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable)
+                    cDst = op;   /* compress into output buffer, to skip flush stage */
+                else
+                    cDst = zcs->outBuff, oSize = zcs->outBuffSize;
+                if (inputBuffered) {
+                    unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
+                    cSize = lastBlock ?
+                            ZSTD_compressEnd(zcs, cDst, oSize,
+                                        zcs->inBuff + zcs->inToCompress, iSize) :
+                            ZSTD_compressContinue(zcs, cDst, oSize,
+                                        zcs->inBuff + zcs->inToCompress, iSize);
+                    FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
+                    zcs->frameEnded = lastBlock;
+                    /* prepare next block */
+                    zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
+                    if (zcs->inBuffTarget > zcs->inBuffSize)
+                        zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
+                    DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
+                            (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
+                    if (!lastBlock)
+                        assert(zcs->inBuffTarget <= zcs->inBuffSize);
+                    zcs->inToCompress = zcs->inBuffPos;
+                } else {
+                    unsigned const lastBlock = (ip + iSize == iend);
+                    assert(flushMode == ZSTD_e_end /* Already validated */);
+                    cSize = lastBlock ?
+                            ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) :
+                            ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize);
+                    /* Consume the input prior to error checking to mirror buffered mode. */
+                    if (iSize > 0)
+                        ip += iSize;
+                    FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed");
+                    zcs->frameEnded = lastBlock;
+                    if (lastBlock)
+                        assert(ip == iend);
+                }
+                if (cDst == op) {  /* no need to flush */
+                    op += cSize;
+                    if (zcs->frameEnded) {
+                        DEBUGLOG(5, "Frame completed directly in outBuffer");
+                        someMoreWork = 0;
+                        ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+                    }
+                    break;
+                }
+                zcs->outBuffContentSize = cSize;
+                zcs->outBuffFlushedSize = 0;
+                zcs->streamStage = zcss_flush; /* pass-through to flush stage */
+            }
+           ZSTD_FALLTHROUGH;
+        case zcss_flush:
+            DEBUGLOG(5, "flush stage");
+            assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered);
+            {   size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
+                size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
+                            zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
+                DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
+                            (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
+                if (flushed)
+                    op += flushed;
+                zcs->outBuffFlushedSize += flushed;
+                if (toFlush!=flushed) {
+                    /* flush not fully completed, presumably because dst is too small */
+                    assert(op==oend);
+                    someMoreWork = 0;
+                    break;
+                }
+                zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
+                if (zcs->frameEnded) {
+                    DEBUGLOG(5, "Frame completed on flush");
+                    someMoreWork = 0;
+                    ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
+                    break;
+                }
+                zcs->streamStage = zcss_load;
+                break;
+            }
+
+        default: /* impossible */
+            assert(0);
+        }
+    }
+
+    input->pos = ip - istart;
+    output->pos = op - ostart;
+    if (zcs->frameEnded) return 0;
+    return ZSTD_nextInputSizeHint(zcs);
+}
+
+static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx)
+{
+    return ZSTD_nextInputSizeHint(cctx);
+
+}
+
+size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+    FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , "");
+    return ZSTD_nextInputSizeHint_MTorST(zcs);
+}
+
+/* After a compression call set the expected input/output buffer.
+ * This is validated at the start of the next compression call.
+ */
+static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input)
+{
+    if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
+        cctx->expectedInBuffer = *input;
+    }
+    if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
+        cctx->expectedOutBufferSize = output->size - output->pos;
+    }
+}
+
+/* Validate that the input/output buffers match the expectations set by
+ * ZSTD_setBufferExpectations.
+ */
+static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx,
+                                        ZSTD_outBuffer const* output,
+                                        ZSTD_inBuffer const* input,
+                                        ZSTD_EndDirective endOp)
+{
+    if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) {
+        ZSTD_inBuffer const expect = cctx->expectedInBuffer;
+        if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size)
+            RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!");
+        if (endOp != ZSTD_e_end)
+            RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!");
+    }
+    if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) {
+        size_t const outBufferSize = output->size - output->pos;
+        if (cctx->expectedOutBufferSize != outBufferSize)
+            RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!");
+    }
+    return 0;
+}
+
+static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
+                                             ZSTD_EndDirective endOp,
+                                             size_t inSize) {
+    ZSTD_CCtx_params params = cctx->requestedParams;
+    ZSTD_prefixDict const prefixDict = cctx->prefixDict;
+    FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
+    ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));   /* single usage */
+    assert(prefixDict.dict==NULL || cctx->cdict==NULL);    /* only one can be set */
+    if (cctx->cdict)
+        params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */
+    DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
+    if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1;  /* auto-fix pledgedSrcSize */
+    {
+        size_t const dictSize = prefixDict.dict
+                ? prefixDict.dictSize
+                : (cctx->cdict ? cctx->cdict->dictContentSize : 0);
+        ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, &params, cctx->pledgedSrcSizePlusOne - 1);
+        params.cParams = ZSTD_getCParamsFromCCtxParams(
+                &params, cctx->pledgedSrcSizePlusOne-1,
+                dictSize, mode);
+    }
+
+    if (ZSTD_CParams_shouldEnableLdm(&params.cParams)) {
+        /* Enable LDM by default for optimal parser and window size >= 128MB */
+        DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)");
+        params.ldmParams.enableLdm = 1;
+    }
+
+    {   U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
+        assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+        FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
+                prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast,
+                cctx->cdict,
+                &params, pledgedSrcSize,
+                ZSTDb_buffered) , "");
+        assert(cctx->appliedParams.nbWorkers == 0);
+        cctx->inToCompress = 0;
+        cctx->inBuffPos = 0;
+        if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) {
+            /* for small input: avoid automatic flush on reaching end of block, since
+            * it would require to add a 3-bytes null block to end frame
+            */
+            cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize);
+        } else {
+            cctx->inBuffTarget = 0;
+        }
+        cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
+        cctx->streamStage = zcss_load;
+        cctx->frameEnded = 0;
+    }
+    return 0;
+}
+
+size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
+                             ZSTD_outBuffer* output,
+                             ZSTD_inBuffer* input,
+                             ZSTD_EndDirective endOp)
+{
+    DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp);
+    /* check conditions */
+    RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer");
+    RETURN_ERROR_IF(input->pos  > input->size, srcSize_wrong, "invalid input buffer");
+    RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective");
+    assert(cctx != NULL);
+
+    /* transparent initialization stage */
+    if (cctx->streamStage == zcss_init) {
+        FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed");
+        ZSTD_setBufferExpectations(cctx, output, input);    /* Set initial buffer expectations now that we've initialized */
+    }
+    /* end of transparent initialization stage */
+
+    FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers");
+    /* compression stage */
+    FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , "");
+    DEBUGLOG(5, "completed ZSTD_compressStream2");
+    ZSTD_setBufferExpectations(cctx, output, input);
+    return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
+}
+
+size_t ZSTD_compressStream2_simpleArgs (
+                            ZSTD_CCtx* cctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos,
+                            ZSTD_EndDirective endOp)
+{
+    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
+    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
+    /* ZSTD_compressStream2() will check validity of dstPos and srcPos */
+    size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
+    *dstPos = output.pos;
+    *srcPos = input.pos;
+    return cErr;
+}
+
+size_t ZSTD_compress2(ZSTD_CCtx* cctx,
+                      void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode;
+    ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode;
+    DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize);
+    ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
+    /* Enable stable input/output buffers. */
+    cctx->requestedParams.inBufferMode = ZSTD_bm_stable;
+    cctx->requestedParams.outBufferMode = ZSTD_bm_stable;
+    {   size_t oPos = 0;
+        size_t iPos = 0;
+        size_t const result = ZSTD_compressStream2_simpleArgs(cctx,
+                                        dst, dstCapacity, &oPos,
+                                        src, srcSize, &iPos,
+                                        ZSTD_e_end);
+        /* Reset to the original values. */
+        cctx->requestedParams.inBufferMode = originalInBufferMode;
+        cctx->requestedParams.outBufferMode = originalOutBufferMode;
+        FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed");
+        if (result != 0) {  /* compression not completed, due to lack of output space */
+            assert(oPos == dstCapacity);
+            RETURN_ERROR(dstSize_tooSmall, "");
+        }
+        assert(iPos == srcSize);   /* all input is expected consumed */
+        return oPos;
+    }
+}
+
+typedef struct {
+    U32 idx;             /* Index in array of ZSTD_Sequence */
+    U32 posInSequence;   /* Position within sequence at idx */
+    size_t posInSrc;        /* Number of bytes given by sequences provided so far */
+} ZSTD_sequencePosition;
+
+/* Returns a ZSTD error code if sequence is not valid */
+static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength,
+                                    size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) {
+    size_t offsetBound;
+    U32 windowSize = 1 << windowLog;
+    /* posInSrc represents the amount of data the the decoder would decode up to this point.
+     * As long as the amount of data decoded is less than or equal to window size, offsets may be
+     * larger than the total length of output decoded in order to reference the dict, even larger than
+     * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
+     */
+    offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
+    RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!");
+    RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small");
+    return 0;
+}
+
+/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
+static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) {
+    U32 offCode = rawOffset + ZSTD_REP_MOVE;
+    U32 repCode = 0;
+
+    if (!ll0 && rawOffset == rep[0]) {
+        repCode = 1;
+    } else if (rawOffset == rep[1]) {
+        repCode = 2 - ll0;
+    } else if (rawOffset == rep[2]) {
+        repCode = 3 - ll0;
+    } else if (ll0 && rawOffset == rep[0] - 1) {
+        repCode = 3;
+    }
+    if (repCode) {
+        /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */
+        offCode = repCode - 1;
+    }
+    return offCode;
+}
+
+/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
+ * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
+ */
+static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                                             const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                                             const void* src, size_t blockSize) {
+    U32 idx = seqPos->idx;
+    BYTE const* ip = (BYTE const*)(src);
+    const BYTE* const iend = ip + blockSize;
+    repcodes_t updatedRepcodes;
+    U32 dictSize;
+    U32 litLength;
+    U32 matchLength;
+    U32 ll0;
+    U32 offCode;
+
+    if (cctx->cdict) {
+        dictSize = (U32)cctx->cdict->dictContentSize;
+    } else if (cctx->prefixDict.dict) {
+        dictSize = (U32)cctx->prefixDict.dictSize;
+    } else {
+        dictSize = 0;
+    }
+    ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {
+        litLength = inSeqs[idx].litLength;
+        matchLength = inSeqs[idx].matchLength;
+        ll0 = litLength == 0;
+        offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
+        updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
+
+        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
+        if (cctx->appliedParams.validateSequences) {
+            seqPos->posInSrc += litLength + matchLength;
+            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
+                                                cctx->appliedParams.cParams.windowLog, dictSize,
+                                                cctx->appliedParams.cParams.minMatch),
+                                                "Sequence validation failed");
+        }
+        RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
+                        "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
+        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
+        ip += matchLength + litLength;
+    }
+    ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
+
+    if (inSeqs[idx].litLength) {
+        DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength);
+        ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength);
+        ip += inSeqs[idx].litLength;
+        seqPos->posInSrc += inSeqs[idx].litLength;
+    }
+    RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!");
+    seqPos->idx = idx+1;
+    return 0;
+}
+
+/* Returns the number of bytes to move the current read position back by. Only non-zero
+ * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something
+ * went wrong.
+ *
+ * This function will attempt to scan through blockSize bytes represented by the sequences
+ * in inSeqs, storing any (partial) sequences.
+ *
+ * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to
+ * avoid splitting a match, or to avoid splitting a match such that it would produce a match
+ * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
+ */
+static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                                       const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                                       const void* src, size_t blockSize) {
+    U32 idx = seqPos->idx;
+    U32 startPosInSequence = seqPos->posInSequence;
+    U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize;
+    size_t dictSize;
+    BYTE const* ip = (BYTE const*)(src);
+    BYTE const* iend = ip + blockSize;  /* May be adjusted if we decide to process fewer than blockSize bytes */
+    repcodes_t updatedRepcodes;
+    U32 bytesAdjustment = 0;
+    U32 finalMatchSplit = 0;
+    U32 litLength;
+    U32 matchLength;
+    U32 rawOffset;
+    U32 offCode;
+
+    if (cctx->cdict) {
+        dictSize = cctx->cdict->dictContentSize;
+    } else if (cctx->prefixDict.dict) {
+        dictSize = cctx->prefixDict.dictSize;
+    } else {
+        dictSize = 0;
+    }
+    DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize);
+    DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
+    ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
+        const ZSTD_Sequence currSeq = inSeqs[idx];
+        litLength = currSeq.litLength;
+        matchLength = currSeq.matchLength;
+        rawOffset = currSeq.offset;
+
+        /* Modify the sequence depending on where endPosInSequence lies */
+        if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
+            if (startPosInSequence >= litLength) {
+                startPosInSequence -= litLength;
+                litLength = 0;
+                matchLength -= startPosInSequence;
+            } else {
+                litLength -= startPosInSequence;
+            }
+            /* Move to the next sequence */
+            endPosInSequence -= currSeq.litLength + currSeq.matchLength;
+            startPosInSequence = 0;
+            idx++;
+        } else {
+            /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence
+               does not reach the end of the match. So, we have to split the sequence */
+            DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u",
+                     currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence);
+            if (endPosInSequence > litLength) {
+                U32 firstHalfMatchLength;
+                litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence;
+                firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength;
+                if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) {
+                    /* Only ever split the match if it is larger than the block size */
+                    U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence;
+                    if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) {
+                        /* Move the endPosInSequence backward so that it creates match of minMatch length */
+                        endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
+                        bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength;
+                        firstHalfMatchLength -= bytesAdjustment;
+                    }
+                    matchLength = firstHalfMatchLength;
+                    /* Flag that we split the last match - after storing the sequence, exit the loop,
+                       but keep the value of endPosInSequence */
+                    finalMatchSplit = 1;
+                } else {
+                    /* Move the position in sequence backwards so that we don't split match, and break to store
+                     * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence
+                     * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so
+                     * would cause the first half of the match to be too small
+                     */
+                    bytesAdjustment = endPosInSequence - currSeq.litLength;
+                    endPosInSequence = currSeq.litLength;
+                    break;
+                }
+            } else {
+                /* This sequence ends inside the literals, break to store the last literals */
+                break;
+            }
+        }
+        /* Check if this offset can be represented with a repcode */
+        {   U32 ll0 = (litLength == 0);
+            offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);
+            updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
+        }
+
+        if (cctx->appliedParams.validateSequences) {
+            seqPos->posInSrc += litLength + matchLength;
+            FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
+                                                   cctx->appliedParams.cParams.windowLog, dictSize,
+                                                   cctx->appliedParams.cParams.minMatch),
+                                                   "Sequence validation failed");
+        }
+        DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
+        RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
+                        "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
+        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
+        ip += matchLength + litLength;
+    }
+    DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
+    assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength);
+    seqPos->idx = idx;
+    seqPos->posInSequence = endPosInSequence;
+    ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
+
+    iend -= bytesAdjustment;
+    if (ip != iend) {
+        /* Store any last literals */
+        U32 lastLLSize = (U32)(iend - ip);
+        assert(ip <= iend);
+        DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize);
+        ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize);
+        seqPos->posInSrc += lastLLSize;
+    }
+
+    return bytesAdjustment;
+}
+
+typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                       const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                       const void* src, size_t blockSize);
+static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) {
+    ZSTD_sequenceCopier sequenceCopier = NULL;
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
+    if (mode == ZSTD_sf_explicitBlockDelimiters) {
+        return ZSTD_copySequencesToSeqStoreExplicitBlockDelim;
+    } else if (mode == ZSTD_sf_noBlockDelimiters) {
+        return ZSTD_copySequencesToSeqStoreNoBlockDelim;
+    }
+    assert(sequenceCopier != NULL);
+    return sequenceCopier;
+}
+
+/* Compress, block-by-block, all of the sequences given.
+ *
+ * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error.
+ */
+static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
+                                              void* dst, size_t dstCapacity,
+                                              const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                                              const void* src, size_t srcSize) {
+    size_t cSize = 0;
+    U32 lastBlock;
+    size_t blockSize;
+    size_t compressedSeqsSize;
+    size_t remaining = srcSize;
+    ZSTD_sequencePosition seqPos = {0, 0, 0};
+
+    BYTE const* ip = (BYTE const*)src;
+    BYTE* op = (BYTE*)dst;
+    ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
+
+    DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
+    /* Special case: empty frame */
+    if (remaining == 0) {
+        U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1);
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header");
+        MEM_writeLE32(op, cBlockHeader24);
+        op += ZSTD_blockHeaderSize;
+        dstCapacity -= ZSTD_blockHeaderSize;
+        cSize += ZSTD_blockHeaderSize;
+    }
+
+    while (remaining) {
+        size_t cBlockSize;
+        size_t additionalByteAdjustment;
+        lastBlock = remaining <= cctx->blockSize;
+        blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize;
+        ZSTD_resetSeqStore(&cctx->seqStore);
+        DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize);
+
+        additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize);
+        FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy");
+        blockSize -= additionalByteAdjustment;
+
+        /* If blocks are too small, emit as a nocompress block */
+        if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
+            cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+            FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
+            DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize);
+            cSize += cBlockSize;
+            ip += blockSize;
+            op += cBlockSize;
+            remaining -= blockSize;
+            dstCapacity -= cBlockSize;
+            continue;
+        }
+
+        compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore,
+                                &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
+                                &cctx->appliedParams,
+                                op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
+                                blockSize,
+                                cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+                                cctx->bmi2);
+        FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
+        DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize);
+
+        if (!cctx->isFirstBlock &&
+            ZSTD_maybeRLE(&cctx->seqStore) &&
+            ZSTD_isRLE((BYTE const*)src, srcSize)) {
+            /* We don't want to emit our first block as a RLE even if it qualifies because
+            * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+            * This is only an issue for zstd <= v1.4.3
+            */
+            compressedSeqsSize = 1;
+        }
+
+        if (compressedSeqsSize == 0) {
+            /* ZSTD_noCompressBlock writes the block header as well */
+            cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
+            FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed");
+            DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize);
+        } else if (compressedSeqsSize == 1) {
+            cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock);
+            FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed");
+            DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize);
+        } else {
+            U32 cBlockHeader;
+            /* Error checking and repcodes update */
+            ZSTD_confirmRepcodesAndEntropyTables(cctx);
+            if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+                cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+            /* Write block header into beginning of block*/
+            cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3);
+            MEM_writeLE24(op, cBlockHeader);
+            cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize;
+            DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize);
+        }
+
+        cSize += cBlockSize;
+        DEBUGLOG(4, "cSize running total: %zu", cSize);
+
+        if (lastBlock) {
+            break;
+        } else {
+            ip += blockSize;
+            op += cBlockSize;
+            remaining -= blockSize;
+            dstCapacity -= cBlockSize;
+            cctx->isFirstBlock = 0;
+        }
+    }
+
+    return cSize;
+}
+
+size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity,
+                              const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                              const void* src, size_t srcSize) {
+    BYTE* op = (BYTE*)dst;
+    size_t cSize = 0;
+    size_t compressedBlocksSize = 0;
+    size_t frameHeaderSize = 0;
+
+    /* Transparent initialization stage, same as compressStream2() */
+    DEBUGLOG(3, "ZSTD_compressSequences()");
+    assert(cctx != NULL);
+    FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed");
+    /* Begin writing output, starting with frame header */
+    frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID);
+    op += frameHeaderSize;
+    dstCapacity -= frameHeaderSize;
+    cSize += frameHeaderSize;
+    if (cctx->appliedParams.fParams.checksumFlag && srcSize) {
+        xxh64_update(&cctx->xxhState, src, srcSize);
+    }
+    /* cSize includes block header size and compressed sequences size */
+    compressedBlocksSize = ZSTD_compressSequences_internal(cctx,
+                                                           op, dstCapacity,
+                                                           inSeqs, inSeqsSize,
+                                                           src, srcSize);
+    FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!");
+    cSize += compressedBlocksSize;
+    dstCapacity -= compressedBlocksSize;
+
+    if (cctx->appliedParams.fParams.checksumFlag) {
+        U32 const checksum = (U32) xxh64_digest(&cctx->xxhState);
+        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum");
+        DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum);
+        MEM_writeLE32((char*)dst + cSize, checksum);
+        cSize += 4;
+    }
+
+    DEBUGLOG(3, "Final compressed size: %zu", cSize);
+    return cSize;
+}
+
+/*======   Finalize   ======*/
+
+/*! ZSTD_flushStream() :
+ * @return : amount of data remaining to flush */
+size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
+{
+    ZSTD_inBuffer input = { NULL, 0, 0 };
+    return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
+}
+
+
+size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
+{
+    ZSTD_inBuffer input = { NULL, 0, 0 };
+    size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
+    FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed");
+    if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush;   /* minimal estimation */
+    /* single thread mode : attempt to calculate remaining to flush more precisely */
+    {   size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
+        size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
+        size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
+        DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
+        return toFlush;
+    }
+}
+
+
+/*-=====  Pre-defined compression levels  =====-*/
+
+#define ZSTD_MAX_CLEVEL     22
+int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
+int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
+
+static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
+{   /* "default" - for any srcSize > 256 KB */
+    /* W,  C,  H,  S,  L, TL, strat */
+    { 19, 12, 13,  1,  6,  1, ZSTD_fast    },  /* base for negative levels */
+    { 19, 13, 14,  1,  7,  0, ZSTD_fast    },  /* level  1 */
+    { 20, 15, 16,  1,  6,  0, ZSTD_fast    },  /* level  2 */
+    { 21, 16, 17,  1,  5,  0, ZSTD_dfast   },  /* level  3 */
+    { 21, 18, 18,  1,  5,  0, ZSTD_dfast   },  /* level  4 */
+    { 21, 18, 19,  2,  5,  2, ZSTD_greedy  },  /* level  5 */
+    { 21, 19, 19,  3,  5,  4, ZSTD_greedy  },  /* level  6 */
+    { 21, 19, 19,  3,  5,  8, ZSTD_lazy    },  /* level  7 */
+    { 21, 19, 19,  3,  5, 16, ZSTD_lazy2   },  /* level  8 */
+    { 21, 19, 20,  4,  5, 16, ZSTD_lazy2   },  /* level  9 */
+    { 22, 20, 21,  4,  5, 16, ZSTD_lazy2   },  /* level 10 */
+    { 22, 21, 22,  4,  5, 16, ZSTD_lazy2   },  /* level 11 */
+    { 22, 21, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 12 */
+    { 22, 21, 22,  5,  5, 32, ZSTD_btlazy2 },  /* level 13 */
+    { 22, 22, 23,  5,  5, 32, ZSTD_btlazy2 },  /* level 14 */
+    { 22, 23, 23,  6,  5, 32, ZSTD_btlazy2 },  /* level 15 */
+    { 22, 22, 22,  5,  5, 48, ZSTD_btopt   },  /* level 16 */
+    { 23, 23, 22,  5,  4, 64, ZSTD_btopt   },  /* level 17 */
+    { 23, 23, 22,  6,  3, 64, ZSTD_btultra },  /* level 18 */
+    { 23, 24, 22,  7,  3,256, ZSTD_btultra2},  /* level 19 */
+    { 25, 25, 23,  7,  3,256, ZSTD_btultra2},  /* level 20 */
+    { 26, 26, 24,  7,  3,512, ZSTD_btultra2},  /* level 21 */
+    { 27, 27, 25,  9,  3,999, ZSTD_btultra2},  /* level 22 */
+},
+{   /* for srcSize <= 256 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 18, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 18, 13, 14,  1,  6,  0, ZSTD_fast    },  /* level  1 */
+    { 18, 14, 14,  1,  5,  0, ZSTD_dfast   },  /* level  2 */
+    { 18, 16, 16,  1,  4,  0, ZSTD_dfast   },  /* level  3 */
+    { 18, 16, 17,  2,  5,  2, ZSTD_greedy  },  /* level  4.*/
+    { 18, 18, 18,  3,  5,  2, ZSTD_greedy  },  /* level  5.*/
+    { 18, 18, 19,  3,  5,  4, ZSTD_lazy    },  /* level  6.*/
+    { 18, 18, 19,  4,  4,  4, ZSTD_lazy    },  /* level  7 */
+    { 18, 18, 19,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
+    { 18, 18, 19,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
+    { 18, 18, 19,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
+    { 18, 18, 19,  5,  4, 12, ZSTD_btlazy2 },  /* level 11.*/
+    { 18, 19, 19,  7,  4, 12, ZSTD_btlazy2 },  /* level 12.*/
+    { 18, 18, 19,  4,  4, 16, ZSTD_btopt   },  /* level 13 */
+    { 18, 18, 19,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
+    { 18, 18, 19,  6,  3,128, ZSTD_btopt   },  /* level 15.*/
+    { 18, 19, 19,  6,  3,128, ZSTD_btultra },  /* level 16.*/
+    { 18, 19, 19,  8,  3,256, ZSTD_btultra },  /* level 17.*/
+    { 18, 19, 19,  6,  3,128, ZSTD_btultra2},  /* level 18.*/
+    { 18, 19, 19,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 18, 19, 19, 10,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 18, 19, 19, 12,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 18, 19, 19, 13,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+{   /* for srcSize <= 128 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 17, 12, 12,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 17, 12, 13,  1,  6,  0, ZSTD_fast    },  /* level  1 */
+    { 17, 13, 15,  1,  5,  0, ZSTD_fast    },  /* level  2 */
+    { 17, 15, 16,  2,  5,  0, ZSTD_dfast   },  /* level  3 */
+    { 17, 17, 17,  2,  4,  0, ZSTD_dfast   },  /* level  4 */
+    { 17, 16, 17,  3,  4,  2, ZSTD_greedy  },  /* level  5 */
+    { 17, 17, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */
+    { 17, 17, 17,  3,  4,  8, ZSTD_lazy2   },  /* level  7 */
+    { 17, 17, 17,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
+    { 17, 17, 17,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
+    { 17, 17, 17,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
+    { 17, 17, 17,  5,  4,  8, ZSTD_btlazy2 },  /* level 11 */
+    { 17, 18, 17,  7,  4, 12, ZSTD_btlazy2 },  /* level 12 */
+    { 17, 18, 17,  3,  4, 12, ZSTD_btopt   },  /* level 13.*/
+    { 17, 18, 17,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
+    { 17, 18, 17,  6,  3,256, ZSTD_btopt   },  /* level 15.*/
+    { 17, 18, 17,  6,  3,128, ZSTD_btultra },  /* level 16.*/
+    { 17, 18, 17,  8,  3,256, ZSTD_btultra },  /* level 17.*/
+    { 17, 18, 17, 10,  3,512, ZSTD_btultra },  /* level 18.*/
+    { 17, 18, 17,  5,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 17, 18, 17,  7,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 17, 18, 17,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 17, 18, 17, 11,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+{   /* for srcSize <= 16 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 14, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 14, 14, 15,  1,  5,  0, ZSTD_fast    },  /* level  1 */
+    { 14, 14, 15,  1,  4,  0, ZSTD_fast    },  /* level  2 */
+    { 14, 14, 15,  2,  4,  0, ZSTD_dfast   },  /* level  3 */
+    { 14, 14, 14,  4,  4,  2, ZSTD_greedy  },  /* level  4 */
+    { 14, 14, 14,  3,  4,  4, ZSTD_lazy    },  /* level  5.*/
+    { 14, 14, 14,  4,  4,  8, ZSTD_lazy2   },  /* level  6 */
+    { 14, 14, 14,  6,  4,  8, ZSTD_lazy2   },  /* level  7 */
+    { 14, 14, 14,  8,  4,  8, ZSTD_lazy2   },  /* level  8.*/
+    { 14, 15, 14,  5,  4,  8, ZSTD_btlazy2 },  /* level  9.*/
+    { 14, 15, 14,  9,  4,  8, ZSTD_btlazy2 },  /* level 10.*/
+    { 14, 15, 14,  3,  4, 12, ZSTD_btopt   },  /* level 11.*/
+    { 14, 15, 14,  4,  3, 24, ZSTD_btopt   },  /* level 12.*/
+    { 14, 15, 14,  5,  3, 32, ZSTD_btultra },  /* level 13.*/
+    { 14, 15, 15,  6,  3, 64, ZSTD_btultra },  /* level 14.*/
+    { 14, 15, 15,  7,  3,256, ZSTD_btultra },  /* level 15.*/
+    { 14, 15, 15,  5,  3, 48, ZSTD_btultra2},  /* level 16.*/
+    { 14, 15, 15,  6,  3,128, ZSTD_btultra2},  /* level 17.*/
+    { 14, 15, 15,  7,  3,256, ZSTD_btultra2},  /* level 18.*/
+    { 14, 15, 15,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 14, 15, 15,  8,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 14, 15, 15,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 14, 15, 15, 10,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+};
+
+static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize)
+{
+    ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict);
+    switch (cParams.strategy) {
+        case ZSTD_fast:
+        case ZSTD_dfast:
+            break;
+        case ZSTD_greedy:
+        case ZSTD_lazy:
+        case ZSTD_lazy2:
+            cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG;
+            break;
+        case ZSTD_btlazy2:
+        case ZSTD_btopt:
+        case ZSTD_btultra:
+        case ZSTD_btultra2:
+            break;
+    }
+    return cParams;
+}
+
+static int ZSTD_dedicatedDictSearch_isSupported(
+        ZSTD_compressionParameters const* cParams)
+{
+    return (cParams->strategy >= ZSTD_greedy)
+        && (cParams->strategy <= ZSTD_lazy2)
+        && (cParams->hashLog >= cParams->chainLog)
+        && (cParams->chainLog <= 24);
+}
+
+/*
+ * Reverses the adjustment applied to cparams when enabling dedicated dict
+ * search. This is used to recover the params set to be used in the working
+ * context. (Otherwise, those tables would also grow.)
+ */
+static void ZSTD_dedicatedDictSearch_revertCParams(
+        ZSTD_compressionParameters* cParams) {
+    switch (cParams->strategy) {
+        case ZSTD_fast:
+        case ZSTD_dfast:
+            break;
+        case ZSTD_greedy:
+        case ZSTD_lazy:
+        case ZSTD_lazy2:
+            cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG;
+            break;
+        case ZSTD_btlazy2:
+        case ZSTD_btopt:
+        case ZSTD_btultra:
+        case ZSTD_btultra2:
+            break;
+    }
+}
+
+static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
+{
+    switch (mode) {
+    case ZSTD_cpm_unknown:
+    case ZSTD_cpm_noAttachDict:
+    case ZSTD_cpm_createCDict:
+        break;
+    case ZSTD_cpm_attachDict:
+        dictSize = 0;
+        break;
+    default:
+        assert(0);
+        break;
+    }
+    {   int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN;
+        size_t const addedSize = unknown && dictSize > 0 ? 500 : 0;
+        return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize;
+    }
+}
+
+/*! ZSTD_getCParams_internal() :
+ * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
+ *  Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown.
+ *        Use dictSize == 0 for unknown or unused.
+ *  Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */
+static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode)
+{
+    U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode);
+    U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);
+    int row;
+    DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel);
+
+    /* row */
+    if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT;   /* 0 == default */
+    else if (compressionLevel < 0) row = 0;   /* entry 0 is baseline for fast mode */
+    else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
+    else row = compressionLevel;
+
+    {   ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
+        /* acceleration factor */
+        if (compressionLevel < 0) {
+            int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel);
+            cp.targetLength = (unsigned)(-clampedCompressionLevel);
+        }
+        /* refine parameters based on srcSize & dictSize */
+        return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode);
+    }
+}
+
+/*! ZSTD_getCParams() :
+ * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
+ *  Size values are optional, provide 0 if not known or unused */
+ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
+{
+    if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
+    return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
+}
+
+/*! ZSTD_getParams() :
+ *  same idea as ZSTD_getCParams()
+ * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
+ *  Fields of `ZSTD_frameParameters` are set to default values */
+static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) {
+    ZSTD_parameters params;
+    ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode);
+    DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
+    ZSTD_memset(&params, 0, sizeof(params));
+    params.cParams = cParams;
+    params.fParams.contentSizeFlag = 1;
+    return params;
+}
+
+/*! ZSTD_getParams() :
+ *  same idea as ZSTD_getCParams()
+ * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
+ *  Fields of `ZSTD_frameParameters` are set to default values */
+ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
+    if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN;
+    return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown);
+}
diff --git a/lib/zstd/compress/zstd_compress_internal.h b/lib/zstd/compress/zstd_compress_internal.h
new file mode 100644 (file)
index 0000000..685d2f9
--- /dev/null
@@ -0,0 +1,1188 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* This header contains definitions
+ * that shall **only** be used by modules within lib/compress.
+ */
+
+#ifndef ZSTD_COMPRESS_H
+#define ZSTD_COMPRESS_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "../common/zstd_internal.h"
+#include "zstd_cwksp.h"
+
+
+/*-*************************************
+*  Constants
+***************************************/
+#define kSearchStrength      8
+#define HASH_READ_SIZE       8
+#define ZSTD_DUBT_UNSORTED_MARK 1   /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted".
+                                       It could be confused for a real successor at index "1", if sorted as larger than its predecessor.
+                                       It's not a big deal though : candidate will just be sorted again.
+                                       Additionally, candidate position 1 will be lost.
+                                       But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
+                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
+                                       This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
+
+
+/*-*************************************
+*  Context memory management
+***************************************/
+typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
+typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
+
+typedef struct ZSTD_prefixDict_s {
+    const void* dict;
+    size_t dictSize;
+    ZSTD_dictContentType_e dictContentType;
+} ZSTD_prefixDict;
+
+typedef struct {
+    void* dictBuffer;
+    void const* dict;
+    size_t dictSize;
+    ZSTD_dictContentType_e dictContentType;
+    ZSTD_CDict* cdict;
+} ZSTD_localDict;
+
+typedef struct {
+    HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
+    HUF_repeat repeatMode;
+} ZSTD_hufCTables_t;
+
+typedef struct {
+    FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
+    FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
+    FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
+    FSE_repeat offcode_repeatMode;
+    FSE_repeat matchlength_repeatMode;
+    FSE_repeat litlength_repeatMode;
+} ZSTD_fseCTables_t;
+
+typedef struct {
+    ZSTD_hufCTables_t huf;
+    ZSTD_fseCTables_t fse;
+} ZSTD_entropyCTables_t;
+
+typedef struct {
+    U32 off;            /* Offset code (offset + ZSTD_REP_MOVE) for the match */
+    U32 len;            /* Raw length of match */
+} ZSTD_match_t;
+
+typedef struct {
+    U32 offset;         /* Offset of sequence */
+    U32 litLength;      /* Length of literals prior to match */
+    U32 matchLength;    /* Raw length of match */
+} rawSeq;
+
+typedef struct {
+  rawSeq* seq;          /* The start of the sequences */
+  size_t pos;           /* The index in seq where reading stopped. pos <= size. */
+  size_t posInSequence; /* The position within the sequence at seq[pos] where reading
+                           stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */
+  size_t size;          /* The number of sequences. <= capacity. */
+  size_t capacity;      /* The capacity starting from `seq` pointer */
+} rawSeqStore_t;
+
+UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
+
+typedef struct {
+    int price;
+    U32 off;
+    U32 mlen;
+    U32 litlen;
+    U32 rep[ZSTD_REP_NUM];
+} ZSTD_optimal_t;
+
+typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
+
+typedef struct {
+    /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
+    unsigned* litFreq;           /* table of literals statistics, of size 256 */
+    unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
+    unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
+    unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
+    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
+    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
+
+    U32  litSum;                 /* nb of literals */
+    U32  litLengthSum;           /* nb of litLength codes */
+    U32  matchLengthSum;         /* nb of matchLength codes */
+    U32  offCodeSum;             /* nb of offset codes */
+    U32  litSumBasePrice;        /* to compare to log2(litfreq) */
+    U32  litLengthSumBasePrice;  /* to compare to log2(llfreq)  */
+    U32  matchLengthSumBasePrice;/* to compare to log2(mlfreq)  */
+    U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
+    ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
+    const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
+    ZSTD_literalCompressionMode_e literalCompressionMode;
+} optState_t;
+
+typedef struct {
+  ZSTD_entropyCTables_t entropy;
+  U32 rep[ZSTD_REP_NUM];
+} ZSTD_compressedBlockState_t;
+
+typedef struct {
+    BYTE const* nextSrc;    /* next block here to continue on current prefix */
+    BYTE const* base;       /* All regular indexes relative to this position */
+    BYTE const* dictBase;   /* extDict indexes relative to this position */
+    U32 dictLimit;          /* below that point, need extDict */
+    U32 lowLimit;           /* below that point, no more valid data */
+} ZSTD_window_t;
+
+typedef struct ZSTD_matchState_t ZSTD_matchState_t;
+struct ZSTD_matchState_t {
+    ZSTD_window_t window;   /* State for window round buffer management */
+    U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
+                             * When loadedDictEnd != 0, a dictionary is in use, and still valid.
+                             * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance.
+                             * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity().
+                             * When dict referential is copied into active context (i.e. not attached),
+                             * loadedDictEnd == dictSize, since referential starts from zero.
+                             */
+    U32 nextToUpdate;       /* index from which to continue table update */
+    U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
+    U32* hashTable;
+    U32* hashTable3;
+    U32* chainTable;
+    int dedicatedDictSearch;  /* Indicates whether this matchState is using the
+                               * dedicated dictionary search structure.
+                               */
+    optState_t opt;         /* optimal parser state */
+    const ZSTD_matchState_t* dictMatchState;
+    ZSTD_compressionParameters cParams;
+    const rawSeqStore_t* ldmSeqStore;
+};
+
+typedef struct {
+    ZSTD_compressedBlockState_t* prevCBlock;
+    ZSTD_compressedBlockState_t* nextCBlock;
+    ZSTD_matchState_t matchState;
+} ZSTD_blockState_t;
+
+typedef struct {
+    U32 offset;
+    U32 checksum;
+} ldmEntry_t;
+
+typedef struct {
+    BYTE const* split;
+    U32 hash;
+    U32 checksum;
+    ldmEntry_t* bucket;
+} ldmMatchCandidate_t;
+
+#define LDM_BATCH_SIZE 64
+
+typedef struct {
+    ZSTD_window_t window;   /* State for the window round buffer management */
+    ldmEntry_t* hashTable;
+    U32 loadedDictEnd;
+    BYTE* bucketOffsets;    /* Next position in bucket to insert entry */
+    size_t splitIndices[LDM_BATCH_SIZE];
+    ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE];
+} ldmState_t;
+
+typedef struct {
+    U32 enableLdm;          /* 1 if enable long distance matching */
+    U32 hashLog;            /* Log size of hashTable */
+    U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
+    U32 minMatchLength;     /* Minimum match length */
+    U32 hashRateLog;       /* Log number of entries to skip */
+    U32 windowLog;          /* Window log for the LDM */
+} ldmParams_t;
+
+typedef struct {
+    int collectSequences;
+    ZSTD_Sequence* seqStart;
+    size_t seqIndex;
+    size_t maxSequences;
+} SeqCollector;
+
+struct ZSTD_CCtx_params_s {
+    ZSTD_format_e format;
+    ZSTD_compressionParameters cParams;
+    ZSTD_frameParameters fParams;
+
+    int compressionLevel;
+    int forceWindow;           /* force back-references to respect limit of
+                                * 1<<wLog, even for dictionary */
+    size_t targetCBlockSize;   /* Tries to fit compressed block size to be around targetCBlockSize.
+                                * No target when targetCBlockSize == 0.
+                                * There is no guarantee on compressed block size */
+    int srcSizeHint;           /* User's best guess of source size.
+                                * Hint is not valid when srcSizeHint == 0.
+                                * There is no guarantee that hint is close to actual source size */
+
+    ZSTD_dictAttachPref_e attachDictPref;
+    ZSTD_literalCompressionMode_e literalCompressionMode;
+
+    /* Multithreading: used to pass parameters to mtctx */
+    int nbWorkers;
+    size_t jobSize;
+    int overlapLog;
+    int rsyncable;
+
+    /* Long distance matching parameters */
+    ldmParams_t ldmParams;
+
+    /* Dedicated dict search algorithm trigger */
+    int enableDedicatedDictSearch;
+
+    /* Input/output buffer modes */
+    ZSTD_bufferMode_e inBufferMode;
+    ZSTD_bufferMode_e outBufferMode;
+
+    /* Sequence compression API */
+    ZSTD_sequenceFormat_e blockDelimiters;
+    int validateSequences;
+
+    /* Internal use, for createCCtxParams() and freeCCtxParams() only */
+    ZSTD_customMem customMem;
+};  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
+
+#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2))
+#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE)
+
+/*
+ * Indicates whether this compression proceeds directly from user-provided
+ * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or
+ * whether the context needs to buffer the input/output (ZSTDb_buffered).
+ */
+typedef enum {
+    ZSTDb_not_buffered,
+    ZSTDb_buffered
+} ZSTD_buffered_policy_e;
+
+struct ZSTD_CCtx_s {
+    ZSTD_compressionStage_e stage;
+    int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
+    int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
+    ZSTD_CCtx_params requestedParams;
+    ZSTD_CCtx_params appliedParams;
+    U32   dictID;
+    size_t dictContentSize;
+
+    ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */
+    size_t blockSize;
+    unsigned long long pledgedSrcSizePlusOne;  /* this way, 0 (default) == unknown */
+    unsigned long long consumedSrcSize;
+    unsigned long long producedCSize;
+    struct xxh64_state xxhState;
+    ZSTD_customMem customMem;
+    ZSTD_threadPool* pool;
+    size_t staticSize;
+    SeqCollector seqCollector;
+    int isFirstBlock;
+    int initialized;
+
+    seqStore_t seqStore;      /* sequences storage ptrs */
+    ldmState_t ldmState;      /* long distance matching state */
+    rawSeq* ldmSequences;     /* Storage for the ldm output sequences */
+    size_t maxNbLdmSequences;
+    rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
+    ZSTD_blockState_t blockState;
+    U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
+
+    /* Wether we are streaming or not */
+    ZSTD_buffered_policy_e bufferedPolicy;
+
+    /* streaming */
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inToCompress;
+    size_t inBuffPos;
+    size_t inBuffTarget;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outBuffContentSize;
+    size_t outBuffFlushedSize;
+    ZSTD_cStreamStage streamStage;
+    U32    frameEnded;
+
+    /* Stable in/out buffer verification */
+    ZSTD_inBuffer expectedInBuffer;
+    size_t expectedOutBufferSize;
+
+    /* Dictionary */
+    ZSTD_localDict localDict;
+    const ZSTD_CDict* cdict;
+    ZSTD_prefixDict prefixDict;   /* single-usage dictionary */
+
+    /* Multi-threading */
+
+    /* Tracing */
+};
+
+typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
+
+typedef enum {
+    ZSTD_noDict = 0,
+    ZSTD_extDict = 1,
+    ZSTD_dictMatchState = 2,
+    ZSTD_dedicatedDictSearch = 3
+} ZSTD_dictMode_e;
+
+typedef enum {
+    ZSTD_cpm_noAttachDict = 0,  /* Compression with ZSTD_noDict or ZSTD_extDict.
+                                 * In this mode we use both the srcSize and the dictSize
+                                 * when selecting and adjusting parameters.
+                                 */
+    ZSTD_cpm_attachDict = 1,    /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch.
+                                 * In this mode we only take the srcSize into account when selecting
+                                 * and adjusting parameters.
+                                 */
+    ZSTD_cpm_createCDict = 2,   /* Creating a CDict.
+                                 * In this mode we take both the source size and the dictionary size
+                                 * into account when selecting and adjusting the parameters.
+                                 */
+    ZSTD_cpm_unknown = 3,       /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams.
+                                 * We don't know what these parameters are for. We default to the legacy
+                                 * behavior of taking both the source size and the dict size into account
+                                 * when selecting and adjusting parameters.
+                                 */
+} ZSTD_cParamMode_e;
+
+typedef size_t (*ZSTD_blockCompressor) (
+        ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
+
+
+MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
+{
+    static const BYTE LL_Code[64] = {  0,  1,  2,  3,  4,  5,  6,  7,
+                                       8,  9, 10, 11, 12, 13, 14, 15,
+                                      16, 16, 17, 17, 18, 18, 19, 19,
+                                      20, 20, 20, 20, 21, 21, 21, 21,
+                                      22, 22, 22, 22, 22, 22, 22, 22,
+                                      23, 23, 23, 23, 23, 23, 23, 23,
+                                      24, 24, 24, 24, 24, 24, 24, 24,
+                                      24, 24, 24, 24, 24, 24, 24, 24 };
+    static const U32 LL_deltaCode = 19;
+    return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
+}
+
+/* ZSTD_MLcode() :
+ * note : mlBase = matchLength - MINMATCH;
+ *        because it's the format it's stored in seqStore->sequences */
+MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
+{
+    static const BYTE ML_Code[128] = { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+                                      16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+                                      32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
+                                      38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
+                                      40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+                                      41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+                                      42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
+    static const U32 ML_deltaCode = 36;
+    return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
+}
+
+typedef struct repcodes_s {
+    U32 rep[3];
+} repcodes_t;
+
+MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
+{
+    repcodes_t newReps;
+    if (offset >= ZSTD_REP_NUM) {  /* full offset */
+        newReps.rep[2] = rep[1];
+        newReps.rep[1] = rep[0];
+        newReps.rep[0] = offset - ZSTD_REP_MOVE;
+    } else {   /* repcode */
+        U32 const repCode = offset + ll0;
+        if (repCode > 0) {  /* note : if repCode==0, no change */
+            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+            newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
+            newReps.rep[1] = rep[0];
+            newReps.rep[0] = currentOffset;
+        } else {   /* repCode == 0 */
+            ZSTD_memcpy(&newReps, rep, sizeof(newReps));
+        }
+    }
+    return newReps;
+}
+
+/* ZSTD_cParam_withinBounds:
+ * @return 1 if value is within cParam bounds,
+ * 0 otherwise */
+MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
+{
+    ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
+    if (ZSTD_isError(bounds.error)) return 0;
+    if (value < bounds.lowerBound) return 0;
+    if (value > bounds.upperBound) return 0;
+    return 1;
+}
+
+/* ZSTD_noCompressBlock() :
+ * Writes uncompressed block to dst buffer from given src.
+ * Returns the size of the block */
+MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
+{
+    U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
+    RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
+                    dstSize_tooSmall, "dst buf too small for uncompressed block");
+    MEM_writeLE24(dst, cBlockHeader24);
+    ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
+    return ZSTD_blockHeaderSize + srcSize;
+}
+
+MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock)
+{
+    BYTE* const op = (BYTE*)dst;
+    U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3);
+    RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, "");
+    MEM_writeLE24(op, cBlockHeader);
+    op[3] = src;
+    return 4;
+}
+
+
+/* ZSTD_minGain() :
+ * minimum compression required
+ * to generate a compress block or a compressed literals section.
+ * note : use same formula for both situations */
+MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
+{
+    U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
+    ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
+    assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
+    return (srcSize >> minlog) + 2;
+}
+
+MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
+{
+    switch (cctxParams->literalCompressionMode) {
+    case ZSTD_lcm_huffman:
+        return 0;
+    case ZSTD_lcm_uncompressed:
+        return 1;
+    default:
+        assert(0 /* impossible: pre-validated */);
+        ZSTD_FALLTHROUGH;
+    case ZSTD_lcm_auto:
+        return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
+    }
+}
+
+/*! ZSTD_safecopyLiterals() :
+ *  memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w.
+ *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
+ *  large copies.
+ */
+static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
+    assert(iend > ilimit_w);
+    if (ip <= ilimit_w) {
+        ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
+        op += ilimit_w - ip;
+        ip = ilimit_w;
+    }
+    while (ip < iend) *op++ = *ip++;
+}
+
+/*! ZSTD_storeSeq() :
+ *  Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
+ *  `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
+ *  `mlBase` : matchLength - MINMATCH
+ *  Allowed to overread literals up to litLimit.
+*/
+HINT_INLINE UNUSED_ATTR
+void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
+{
+    BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
+    BYTE const* const litEnd = literals + litLength;
+#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
+    static const BYTE* g_start = NULL;
+    if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
+    {   U32 const pos = (U32)((const BYTE*)literals - g_start);
+        DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
+               pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
+    }
+#endif
+    assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
+    /* copy Literals */
+    assert(seqStorePtr->maxNbLit <= 128 KB);
+    assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
+    assert(literals + litLength <= litLimit);
+    if (litEnd <= litLimit_w) {
+        /* Common case we can use wildcopy.
+        * First copy 16 bytes, because literals are likely short.
+        */
+        assert(WILDCOPY_OVERLENGTH >= 16);
+        ZSTD_copy16(seqStorePtr->lit, literals);
+        if (litLength > 16) {
+            ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap);
+        }
+    } else {
+        ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w);
+    }
+    seqStorePtr->lit += litLength;
+
+    /* literal Length */
+    if (litLength>0xFFFF) {
+        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
+        seqStorePtr->longLengthID = 1;
+        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    }
+    seqStorePtr->sequences[0].litLength = (U16)litLength;
+
+    /* match offset */
+    seqStorePtr->sequences[0].offset = offCode + 1;
+
+    /* match Length */
+    if (mlBase>0xFFFF) {
+        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
+        seqStorePtr->longLengthID = 2;
+        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    }
+    seqStorePtr->sequences[0].matchLength = (U16)mlBase;
+
+    seqStorePtr->sequences++;
+}
+
+
+/*-*************************************
+*  Match length counter
+***************************************/
+static unsigned ZSTD_NbCommonBytes (size_t val)
+{
+    if (MEM_isLittleEndian()) {
+        if (MEM_64bits()) {
+#       if (__GNUC__ >= 4)
+            return (__builtin_ctzll((U64)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
+                                                     0, 3, 1, 3, 1, 4, 2, 7,
+                                                     0, 2, 3, 6, 1, 5, 3, 5,
+                                                     1, 3, 4, 4, 2, 5, 6, 7,
+                                                     7, 0, 1, 2, 3, 3, 4, 6,
+                                                     2, 6, 5, 5, 3, 4, 5, 6,
+                                                     7, 1, 2, 4, 6, 4, 4, 5,
+                                                     7, 2, 6, 5, 7, 6, 7, 7 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+        } else { /* 32 bits */
+#       if (__GNUC__ >= 3)
+            return (__builtin_ctz((U32)val) >> 3);
+#       else
+            static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
+                                                     3, 2, 2, 1, 3, 2, 0, 1,
+                                                     3, 3, 1, 2, 2, 2, 2, 0,
+                                                     3, 1, 2, 0, 1, 0, 1, 1 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+        }
+    } else {  /* Big Endian CPU */
+        if (MEM_64bits()) {
+#       if (__GNUC__ >= 4)
+            return (__builtin_clzll(val) >> 3);
+#       else
+            unsigned r;
+            const unsigned n32 = sizeof(size_t)*4;   /* calculate this way due to compiler complaining in 32-bits mode */
+            if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
+            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+        } else { /* 32 bits */
+#       if (__GNUC__ >= 3)
+            return (__builtin_clz((U32)val) >> 3);
+#       else
+            unsigned r;
+            if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+            r += (!val);
+            return r;
+#       endif
+    }   }
+}
+
+
+MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
+{
+    const BYTE* const pStart = pIn;
+    const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
+
+    if (pIn < pInLoopLimit) {
+        { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
+          if (diff) return ZSTD_NbCommonBytes(diff); }
+        pIn+=sizeof(size_t); pMatch+=sizeof(size_t);
+        while (pIn < pInLoopLimit) {
+            size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
+            if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
+            pIn += ZSTD_NbCommonBytes(diff);
+            return (size_t)(pIn - pStart);
+    }   }
+    if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
+    if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
+    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
+    return (size_t)(pIn - pStart);
+}
+
+/* ZSTD_count_2segments() :
+ *  can count match length with `ip` & `match` in 2 different segments.
+ *  convention : on reaching mEnd, match count continue starting from iStart
+ */
+MEM_STATIC size_t
+ZSTD_count_2segments(const BYTE* ip, const BYTE* match,
+                     const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
+{
+    const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
+    size_t const matchLength = ZSTD_count(ip, match, vEnd);
+    if (match + matchLength != mEnd) return matchLength;
+    DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
+    DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
+    DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
+    DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
+    DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
+    return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
+}
+
+
+/*-*************************************
+ *  Hashes
+ ***************************************/
+static const U32 prime3bytes = 506832829U;
+static U32    ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes)  >> (32-h) ; }
+MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
+
+static const U32 prime4bytes = 2654435761U;
+static U32    ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
+static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
+
+static const U64 prime5bytes = 889523592379ULL;
+static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u  << (64-40)) * prime5bytes) >> (64-h)) ; }
+static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
+
+static const U64 prime6bytes = 227718039650203ULL;
+static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u  << (64-48)) * prime6bytes) >> (64-h)) ; }
+static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
+
+static const U64 prime7bytes = 58295818150454627ULL;
+static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u  << (64-56)) * prime7bytes) >> (64-h)) ; }
+static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
+
+static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
+static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
+static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
+
+MEM_STATIC FORCE_INLINE_ATTR
+size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
+{
+    switch(mls)
+    {
+    default:
+    case 4: return ZSTD_hash4Ptr(p, hBits);
+    case 5: return ZSTD_hash5Ptr(p, hBits);
+    case 6: return ZSTD_hash6Ptr(p, hBits);
+    case 7: return ZSTD_hash7Ptr(p, hBits);
+    case 8: return ZSTD_hash8Ptr(p, hBits);
+    }
+}
+
+/* ZSTD_ipow() :
+ * Return base^exponent.
+ */
+static U64 ZSTD_ipow(U64 base, U64 exponent)
+{
+    U64 power = 1;
+    while (exponent) {
+      if (exponent & 1) power *= base;
+      exponent >>= 1;
+      base *= base;
+    }
+    return power;
+}
+
+#define ZSTD_ROLL_HASH_CHAR_OFFSET 10
+
+/* ZSTD_rollingHash_append() :
+ * Add the buffer to the hash value.
+ */
+static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size)
+{
+    BYTE const* istart = (BYTE const*)buf;
+    size_t pos;
+    for (pos = 0; pos < size; ++pos) {
+        hash *= prime8bytes;
+        hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET;
+    }
+    return hash;
+}
+
+/* ZSTD_rollingHash_compute() :
+ * Compute the rolling hash value of the buffer.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size)
+{
+    return ZSTD_rollingHash_append(0, buf, size);
+}
+
+/* ZSTD_rollingHash_primePower() :
+ * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash
+ * over a window of length bytes.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length)
+{
+    return ZSTD_ipow(prime8bytes, length - 1);
+}
+
+/* ZSTD_rollingHash_rotate() :
+ * Rotate the rolling hash by one byte.
+ */
+MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower)
+{
+    hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower;
+    hash *= prime8bytes;
+    hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET;
+    return hash;
+}
+
+/*-*************************************
+*  Round buffer management
+***************************************/
+#if (ZSTD_WINDOWLOG_MAX_64 > 31)
+# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX"
+#endif
+/* Max current allowed */
+#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX))
+/* Maximum chunk size before overflow correction needs to be called again */
+#define ZSTD_CHUNKSIZE_MAX                                                     \
+    ( ((U32)-1)                  /* Maximum ending current index */            \
+    - ZSTD_CURRENT_MAX)          /* Maximum beginning lowLimit */
+
+/*
+ * ZSTD_window_clear():
+ * Clears the window containing the history by simply setting it to empty.
+ */
+MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
+{
+    size_t const endT = (size_t)(window->nextSrc - window->base);
+    U32 const end = (U32)endT;
+
+    window->lowLimit = end;
+    window->dictLimit = end;
+}
+
+/*
+ * ZSTD_window_hasExtDict():
+ * Returns non-zero if the window has a non-empty extDict.
+ */
+MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window)
+{
+    return window.lowLimit < window.dictLimit;
+}
+
+/*
+ * ZSTD_matchState_dictMode():
+ * Inspects the provided matchState and figures out what dictMode should be
+ * passed to the compressor.
+ */
+MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
+{
+    return ZSTD_window_hasExtDict(ms->window) ?
+        ZSTD_extDict :
+        ms->dictMatchState != NULL ?
+            (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) :
+            ZSTD_noDict;
+}
+
+/*
+ * ZSTD_window_needOverflowCorrection():
+ * Returns non-zero if the indices are getting too large and need overflow
+ * protection.
+ */
+MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
+                                                  void const* srcEnd)
+{
+    U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
+    return curr > ZSTD_CURRENT_MAX;
+}
+
+/*
+ * ZSTD_window_correctOverflow():
+ * Reduces the indices to protect from index overflow.
+ * Returns the correction made to the indices, which must be applied to every
+ * stored index.
+ *
+ * The least significant cycleLog bits of the indices must remain the same,
+ * which may be 0. Every index up to maxDist in the past must be valid.
+ * NOTE: (maxDist & cycleMask) must be zero.
+ */
+MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
+                                           U32 maxDist, void const* src)
+{
+    /* preemptive overflow correction:
+     * 1. correction is large enough:
+     *    lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog
+     *    1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog
+     *
+     *    current - newCurrent
+     *    > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog)
+     *    > (3<<29) - (1<<chainLog)
+     *    > (3<<29) - (1<<30)             (NOTE: chainLog <= 30)
+     *    > 1<<29
+     *
+     * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow:
+     *    After correction, current is less than (1<<chainLog + 1<<windowLog).
+     *    In 64-bit mode we are safe, because we have 64-bit ptrdiff_t.
+     *    In 32-bit mode we are safe, because (chainLog <= 29), so
+     *    ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32.
+     * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
+     *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
+     */
+    U32 const cycleMask = (1U << cycleLog) - 1;
+    U32 const curr = (U32)((BYTE const*)src - window->base);
+    U32 const currentCycle0 = curr & cycleMask;
+    /* Exclude zero so that newCurrent - maxDist >= 1. */
+    U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
+    U32 const newCurrent = currentCycle1 + maxDist;
+    U32 const correction = curr - newCurrent;
+    assert((maxDist & cycleMask) == 0);
+    assert(curr > newCurrent);
+    /* Loose bound, should be around 1<<29 (see above) */
+    assert(correction > 1<<28);
+
+    window->base += correction;
+    window->dictBase += correction;
+    if (window->lowLimit <= correction) window->lowLimit = 1;
+    else window->lowLimit -= correction;
+    if (window->dictLimit <= correction) window->dictLimit = 1;
+    else window->dictLimit -= correction;
+
+    /* Ensure we can still reference the full window. */
+    assert(newCurrent >= maxDist);
+    assert(newCurrent - maxDist >= 1);
+    /* Ensure that lowLimit and dictLimit didn't underflow. */
+    assert(window->lowLimit <= newCurrent);
+    assert(window->dictLimit <= newCurrent);
+
+    DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
+             window->lowLimit);
+    return correction;
+}
+
+/*
+ * ZSTD_window_enforceMaxDist():
+ * Updates lowLimit so that:
+ *    (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
+ *
+ * It ensures index is valid as long as index >= lowLimit.
+ * This must be called before a block compression call.
+ *
+ * loadedDictEnd is only defined if a dictionary is in use for current compression.
+ * As the name implies, loadedDictEnd represents the index at end of dictionary.
+ * The value lies within context's referential, it can be directly compared to blockEndIdx.
+ *
+ * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0.
+ * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit.
+ * This is because dictionaries are allowed to be referenced fully
+ * as long as the last byte of the dictionary is in the window.
+ * Once input has progressed beyond window size, dictionary cannot be referenced anymore.
+ *
+ * In normal dict mode, the dictionary lies between lowLimit and dictLimit.
+ * In dictMatchState mode, lowLimit and dictLimit are the same,
+ * and the dictionary is below them.
+ * forceWindow and dictMatchState are therefore incompatible.
+ */
+MEM_STATIC void
+ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
+                     const void* blockEnd,
+                           U32   maxDist,
+                           U32*  loadedDictEndPtr,
+                     const ZSTD_matchState_t** dictMatchStatePtr)
+{
+    U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+    U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0;
+    DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+                (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+
+    /* - When there is no dictionary : loadedDictEnd == 0.
+         In which case, the test (blockEndIdx > maxDist) is merely to avoid
+         overflowing next operation `newLowLimit = blockEndIdx - maxDist`.
+       - When there is a standard dictionary :
+         Index referential is copied from the dictionary,
+         which means it starts from 0.
+         In which case, loadedDictEnd == dictSize,
+         and it makes sense to compare `blockEndIdx > maxDist + dictSize`
+         since `blockEndIdx` also starts from zero.
+       - When there is an attached dictionary :
+         loadedDictEnd is expressed within the referential of the context,
+         so it can be directly compared against blockEndIdx.
+    */
+    if (blockEndIdx > maxDist + loadedDictEnd) {
+        U32 const newLowLimit = blockEndIdx - maxDist;
+        if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
+        if (window->dictLimit < window->lowLimit) {
+            DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
+                        (unsigned)window->dictLimit, (unsigned)window->lowLimit);
+            window->dictLimit = window->lowLimit;
+        }
+        /* On reaching window size, dictionaries are invalidated */
+        if (loadedDictEndPtr) *loadedDictEndPtr = 0;
+        if (dictMatchStatePtr) *dictMatchStatePtr = NULL;
+    }
+}
+
+/* Similar to ZSTD_window_enforceMaxDist(),
+ * but only invalidates dictionary
+ * when input progresses beyond window size.
+ * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL)
+ *              loadedDictEnd uses same referential as window->base
+ *              maxDist is the window size */
+MEM_STATIC void
+ZSTD_checkDictValidity(const ZSTD_window_t* window,
+                       const void* blockEnd,
+                             U32   maxDist,
+                             U32*  loadedDictEndPtr,
+                       const ZSTD_matchState_t** dictMatchStatePtr)
+{
+    assert(loadedDictEndPtr != NULL);
+    assert(dictMatchStatePtr != NULL);
+    {   U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base);
+        U32 const loadedDictEnd = *loadedDictEndPtr;
+        DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u",
+                    (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd);
+        assert(blockEndIdx >= loadedDictEnd);
+
+        if (blockEndIdx > loadedDictEnd + maxDist) {
+            /* On reaching window size, dictionaries are invalidated.
+             * For simplification, if window size is reached anywhere within next block,
+             * the dictionary is invalidated for the full block.
+             */
+            DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)");
+            *loadedDictEndPtr = 0;
+            *dictMatchStatePtr = NULL;
+        } else {
+            if (*loadedDictEndPtr != 0) {
+                DEBUGLOG(6, "dictionary considered valid for current block");
+    }   }   }
+}
+
+MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
+    ZSTD_memset(window, 0, sizeof(*window));
+    window->base = (BYTE const*)"";
+    window->dictBase = (BYTE const*)"";
+    window->dictLimit = 1;    /* start from 1, so that 1st position is valid */
+    window->lowLimit = 1;     /* it ensures first and later CCtx usages compress the same */
+    window->nextSrc = window->base + 1;   /* see issue #1241 */
+}
+
+/*
+ * ZSTD_window_update():
+ * Updates the window by appending [src, src + srcSize) to the window.
+ * If it is not contiguous, the current prefix becomes the extDict, and we
+ * forget about the extDict. Handles overlap of the prefix and extDict.
+ * Returns non-zero if the segment is contiguous.
+ */
+MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
+                                  void const* src, size_t srcSize)
+{
+    BYTE const* const ip = (BYTE const*)src;
+    U32 contiguous = 1;
+    DEBUGLOG(5, "ZSTD_window_update");
+    if (srcSize == 0)
+        return contiguous;
+    assert(window->base != NULL);
+    assert(window->dictBase != NULL);
+    /* Check if blocks follow each other */
+    if (src != window->nextSrc) {
+        /* not contiguous */
+        size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
+        DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
+        window->lowLimit = window->dictLimit;
+        assert(distanceFromBase == (size_t)(U32)distanceFromBase);  /* should never overflow */
+        window->dictLimit = (U32)distanceFromBase;
+        window->dictBase = window->base;
+        window->base = ip - distanceFromBase;
+        /* ms->nextToUpdate = window->dictLimit; */
+        if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit;   /* too small extDict */
+        contiguous = 0;
+    }
+    window->nextSrc = ip + srcSize;
+    /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
+    if ( (ip+srcSize > window->dictBase + window->lowLimit)
+       & (ip < window->dictBase + window->dictLimit)) {
+        ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
+        U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
+        window->lowLimit = lowLimitMax;
+        DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
+    }
+    return contiguous;
+}
+
+/*
+ * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix.
+ */
+MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
+{
+    U32    const maxDistance = 1U << windowLog;
+    U32    const lowestValid = ms->window.lowLimit;
+    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    U32    const isDictionary = (ms->loadedDictEnd != 0);
+    /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
+     * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
+     * valid for the entire block. So this check is sufficient to find the lowest valid match index.
+     */
+    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
+    return matchLowest;
+}
+
+/*
+ * Returns the lowest allowed match index in the prefix.
+ */
+MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
+{
+    U32    const maxDistance = 1U << windowLog;
+    U32    const lowestValid = ms->window.dictLimit;
+    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    U32    const isDictionary = (ms->loadedDictEnd != 0);
+    /* When computing the lowest prefix index we need to take the dictionary into account to handle
+     * the edge case where the dictionary and the source are contiguous in memory.
+     */
+    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
+    return matchLowest;
+}
+
+
+
+/* debug functions */
+#if (DEBUGLEVEL>=2)
+
+MEM_STATIC double ZSTD_fWeight(U32 rawStat)
+{
+    U32 const fp_accuracy = 8;
+    U32 const fp_multiplier = (1 << fp_accuracy);
+    U32 const newStat = rawStat + 1;
+    U32 const hb = ZSTD_highbit32(newStat);
+    U32 const BWeight = hb * fp_multiplier;
+    U32 const FWeight = (newStat << fp_accuracy) >> hb;
+    U32 const weight = BWeight + FWeight;
+    assert(hb + fp_accuracy < 31);
+    return (double)weight / fp_multiplier;
+}
+
+/* display a table content,
+ * listing each element, its frequency, and its predicted bit cost */
+MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
+{
+    unsigned u, sum;
+    for (u=0, sum=0; u<=max; u++) sum += table[u];
+    DEBUGLOG(2, "total nb elts: %u", sum);
+    for (u=0; u<=max; u++) {
+        DEBUGLOG(2, "%2u: %5u  (%.2f)",
+                u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
+    }
+}
+
+#endif
+
+
+
+/* ===============================================================
+ * Shared internal declarations
+ * These prototypes may be called from sources not in lib/compress
+ * =============================================================== */
+
+/* ZSTD_loadCEntropy() :
+ * dict : must point at beginning of a valid zstd dictionary.
+ * return : size of dictionary header (size of magic number + dict ID + entropy tables)
+ * assumptions : magic number supposed already checked
+ *               and dictSize >= 8 */
+size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
+                         const void* const dict, size_t dictSize);
+
+void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs);
+
+/* ==============================================================
+ * Private declarations
+ * These prototypes shall only be called from within lib/compress
+ * ============================================================== */
+
+/* ZSTD_getCParamsFromCCtxParams() :
+ * cParams are built depending on compressionLevel, src size hints,
+ * LDM and manually set compression parameters.
+ * Note: srcSizeHint == 0 means 0!
+ */
+ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
+        const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode);
+
+/*! ZSTD_initCStream_internal() :
+ *  Private use only. Init streaming operation.
+ *  expects params to be valid.
+ *  must receive dict, or cdict, or none, but not both.
+ *  @return : 0, or an error code */
+size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
+                     const void* dict, size_t dictSize,
+                     const ZSTD_CDict* cdict,
+                     const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize);
+
+void ZSTD_resetSeqStore(seqStore_t* ssPtr);
+
+/*! ZSTD_getCParamsFromCDict() :
+ *  as the name implies */
+ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict);
+
+/* ZSTD_compressBegin_advanced_internal() :
+ * Private use only. To be called from zstdmt_compress.c. */
+size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
+                                    const void* dict, size_t dictSize,
+                                    ZSTD_dictContentType_e dictContentType,
+                                    ZSTD_dictTableLoadMethod_e dtlm,
+                                    const ZSTD_CDict* cdict,
+                                    const ZSTD_CCtx_params* params,
+                                    unsigned long long pledgedSrcSize);
+
+/* ZSTD_compress_advanced_internal() :
+ * Private use only. To be called from zstdmt_compress.c. */
+size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
+                                       void* dst, size_t dstCapacity,
+                                 const void* src, size_t srcSize,
+                                 const void* dict,size_t dictSize,
+                                 const ZSTD_CCtx_params* params);
+
+
+/* ZSTD_writeLastEmptyBlock() :
+ * output an empty Block with end-of-frame mark to complete a frame
+ * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
+ *           or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
+ */
+size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
+
+
+/* ZSTD_referenceExternalSequences() :
+ * Must be called before starting a compression operation.
+ * seqs must parse a prefix of the source.
+ * This cannot be used when long range matching is enabled.
+ * Zstd will use these sequences, and pass the literals to a secondary block
+ * compressor.
+ * @return : An error code on failure.
+ * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
+ * access and data corruption.
+ */
+size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
+
+/* ZSTD_cycleLog() :
+ *  condition for correct operation : hashLog > 1 */
+U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat);
+
+/* ZSTD_CCtx_trace() :
+ *  Trace the end of a compression call.
+ */
+void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize);
+
+#endif /* ZSTD_COMPRESS_H */
diff --git a/lib/zstd/compress/zstd_compress_literals.c b/lib/zstd/compress/zstd_compress_literals.c
new file mode 100644 (file)
index 0000000..655bcda
--- /dev/null
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+#include "zstd_compress_literals.h"
+
+size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+    RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
+
+    switch(flSize)
+    {
+        case 1: /* 2 - 1 - 5 */
+            ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
+            break;
+        case 2: /* 2 - 2 - 12 */
+            MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
+            break;
+        case 3: /* 2 - 2 - 20 */
+            MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
+            break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
+    }
+
+    ZSTD_memcpy(ostart + flSize, src, srcSize);
+    DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
+    return srcSize + flSize;
+}
+
+size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    U32   const flSize = 1 + (srcSize>31) + (srcSize>4095);
+
+    (void)dstCapacity;  /* dstCapacity already guaranteed to be >=4, hence large enough */
+
+    switch(flSize)
+    {
+        case 1: /* 2 - 1 - 5 */
+            ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
+            break;
+        case 2: /* 2 - 2 - 12 */
+            MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
+            break;
+        case 3: /* 2 - 2 - 20 */
+            MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
+            break;
+        default:   /* not necessary : flSize is {1,2,3} */
+            assert(0);
+    }
+
+    ostart[flSize] = *(const BYTE*)src;
+    DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
+    return flSize+1;
+}
+
+size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+                              ZSTD_hufCTables_t* nextHuf,
+                              ZSTD_strategy strategy, int disableLiteralCompression,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                              void* entropyWorkspace, size_t entropyWorkspaceSize,
+                        const int bmi2)
+{
+    size_t const minGain = ZSTD_minGain(srcSize, strategy);
+    size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
+    BYTE*  const ostart = (BYTE*)dst;
+    U32 singleStream = srcSize < 256;
+    symbolEncodingType_e hType = set_compressed;
+    size_t cLitSize;
+
+    DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
+                disableLiteralCompression, (U32)srcSize);
+
+    /* Prepare nextEntropy assuming reusing the existing table */
+    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+    if (disableLiteralCompression)
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+
+    /* small ? don't even attempt compression (speed opt) */
+#   define COMPRESS_LITERALS_SIZE_MIN 63
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    }
+
+    RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
+    {   HUF_repeat repeat = prevHuf->repeatMode;
+        int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
+        if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
+        cLitSize = singleStream ?
+            HUF_compress1X_repeat(
+                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
+                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
+            HUF_compress4X_repeat(
+                ostart+lhSize, dstCapacity-lhSize, src, srcSize,
+                HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
+                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
+        if (repeat != HUF_repeat_none) {
+            /* reused the existing table */
+            DEBUGLOG(5, "Reusing previous huffman table");
+            hType = set_repeat;
+        }
+    }
+
+    if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
+        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+        return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
+    }
+    if (cLitSize==1) {
+        ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+        return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
+    }
+
+    if (hType == set_compressed) {
+        /* using a newly constructed table */
+        nextHuf->repeatMode = HUF_repeat_check;
+    }
+
+    /* Build header */
+    switch(lhSize)
+    {
+    case 3: /* 2 - 2 - 10 - 10 */
+        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
+            MEM_writeLE24(ostart, lhc);
+            break;
+        }
+    case 4: /* 2 - 2 - 14 - 14 */
+        {   U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
+            MEM_writeLE32(ostart, lhc);
+            break;
+        }
+    case 5: /* 2 - 2 - 18 - 18 */
+        {   U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
+            MEM_writeLE32(ostart, lhc);
+            ostart[4] = (BYTE)(cLitSize >> 10);
+            break;
+        }
+    default:  /* not possible : lhSize is {3,4,5} */
+        assert(0);
+    }
+    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
+    return lhSize+cLitSize;
+}
diff --git a/lib/zstd/compress/zstd_compress_literals.h b/lib/zstd/compress/zstd_compress_literals.h
new file mode 100644 (file)
index 0000000..9904c0c
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_LITERALS_H
+#define ZSTD_COMPRESS_LITERALS_H
+
+#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */
+
+
+size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
+                              ZSTD_hufCTables_t* nextHuf,
+                              ZSTD_strategy strategy, int disableLiteralCompression,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize,
+                              void* entropyWorkspace, size_t entropyWorkspaceSize,
+                        const int bmi2);
+
+#endif /* ZSTD_COMPRESS_LITERALS_H */
diff --git a/lib/zstd/compress/zstd_compress_sequences.c b/lib/zstd/compress/zstd_compress_sequences.c
new file mode 100644 (file)
index 0000000..dcfcdc9
--- /dev/null
@@ -0,0 +1,439 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+#include "zstd_compress_sequences.h"
+
+/*
+ * -log2(x / 256) lookup table for x in [0, 256).
+ * If x == 0: Return 0
+ * Else: Return floor(-log2(x / 256) * 256)
+ */
+static unsigned const kInverseProbabilityLog256[256] = {
+    0,    2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
+    1130, 1100, 1073, 1047, 1024, 1001, 980,  960,  941,  923,  906,  889,
+    874,  859,  844,  830,  817,  804,  791,  779,  768,  756,  745,  734,
+    724,  714,  704,  694,  685,  676,  667,  658,  650,  642,  633,  626,
+    618,  610,  603,  595,  588,  581,  574,  567,  561,  554,  548,  542,
+    535,  529,  523,  517,  512,  506,  500,  495,  489,  484,  478,  473,
+    468,  463,  458,  453,  448,  443,  438,  434,  429,  424,  420,  415,
+    411,  407,  402,  398,  394,  390,  386,  382,  377,  373,  370,  366,
+    362,  358,  354,  350,  347,  343,  339,  336,  332,  329,  325,  322,
+    318,  315,  311,  308,  305,  302,  298,  295,  292,  289,  286,  282,
+    279,  276,  273,  270,  267,  264,  261,  258,  256,  253,  250,  247,
+    244,  241,  239,  236,  233,  230,  228,  225,  222,  220,  217,  215,
+    212,  209,  207,  204,  202,  199,  197,  194,  192,  190,  187,  185,
+    182,  180,  178,  175,  173,  171,  168,  166,  164,  162,  159,  157,
+    155,  153,  151,  149,  146,  144,  142,  140,  138,  136,  134,  132,
+    130,  128,  126,  123,  121,  119,  117,  115,  114,  112,  110,  108,
+    106,  104,  102,  100,  98,   96,   94,   93,   91,   89,   87,   85,
+    83,   82,   80,   78,   76,   74,   73,   71,   69,   67,   66,   64,
+    62,   61,   59,   57,   55,   54,   52,   50,   49,   47,   46,   44,
+    42,   41,   39,   37,   36,   34,   33,   31,   30,   28,   26,   25,
+    23,   22,   20,   19,   17,   16,   14,   13,   11,   10,   8,    7,
+    5,    4,    2,    1,
+};
+
+static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
+  void const* ptr = ctable;
+  U16 const* u16ptr = (U16 const*)ptr;
+  U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
+  return maxSymbolValue;
+}
+
+/*
+ * Returns true if we should use ncount=-1 else we should
+ * use ncount=1 for low probability symbols instead.
+ */
+static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
+{
+    /* Heuristic: This should cover most blocks <= 16K and
+     * start to fade out after 16K to about 32K depending on
+     * comprssibility.
+     */
+    return nbSeq >= 2048;
+}
+
+/*
+ * Returns the cost in bytes of encoding the normalized count header.
+ * Returns an error if any of the helper functions return an error.
+ */
+static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
+                              size_t const nbSeq, unsigned const FSELog)
+{
+    BYTE wksp[FSE_NCOUNTBOUND];
+    S16 norm[MaxSeq + 1];
+    const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+    FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
+    return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
+}
+
+/*
+ * Returns the cost in bits of encoding the distribution described by count
+ * using the entropy bound.
+ */
+static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
+{
+    unsigned cost = 0;
+    unsigned s;
+    for (s = 0; s <= max; ++s) {
+        unsigned norm = (unsigned)((256 * count[s]) / total);
+        if (count[s] != 0 && norm == 0)
+            norm = 1;
+        assert(count[s] < total);
+        cost += count[s] * kInverseProbabilityLog256[norm];
+    }
+    return cost >> 8;
+}
+
+/*
+ * Returns the cost in bits of encoding the distribution in count using ctable.
+ * Returns an error if ctable cannot represent all the symbols in count.
+ */
+size_t ZSTD_fseBitCost(
+    FSE_CTable const* ctable,
+    unsigned const* count,
+    unsigned const max)
+{
+    unsigned const kAccuracyLog = 8;
+    size_t cost = 0;
+    unsigned s;
+    FSE_CState_t cstate;
+    FSE_initCState(&cstate, ctable);
+    if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
+        DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
+                    ZSTD_getFSEMaxSymbolValue(ctable), max);
+        return ERROR(GENERIC);
+    }
+    for (s = 0; s <= max; ++s) {
+        unsigned const tableLog = cstate.stateLog;
+        unsigned const badCost = (tableLog + 1) << kAccuracyLog;
+        unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
+        if (count[s] == 0)
+            continue;
+        if (bitCost >= badCost) {
+            DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
+            return ERROR(GENERIC);
+        }
+        cost += (size_t)count[s] * bitCost;
+    }
+    return cost >> kAccuracyLog;
+}
+
+/*
+ * Returns the cost in bits of encoding the distribution in count using the
+ * table described by norm. The max symbol support by norm is assumed >= max.
+ * norm must be valid for every symbol with non-zero probability in count.
+ */
+size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
+                             unsigned const* count, unsigned const max)
+{
+    unsigned const shift = 8 - accuracyLog;
+    size_t cost = 0;
+    unsigned s;
+    assert(accuracyLog <= 8);
+    for (s = 0; s <= max; ++s) {
+        unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
+        unsigned const norm256 = normAcc << shift;
+        assert(norm256 > 0);
+        assert(norm256 < 256);
+        cost += count[s] * kInverseProbabilityLog256[norm256];
+    }
+    return cost >> 8;
+}
+
+symbolEncodingType_e
+ZSTD_selectEncodingType(
+        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
+        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
+        FSE_CTable const* prevCTable,
+        short const* defaultNorm, U32 defaultNormLog,
+        ZSTD_defaultPolicy_e const isDefaultAllowed,
+        ZSTD_strategy const strategy)
+{
+    ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
+    if (mostFrequent == nbSeq) {
+        *repeatMode = FSE_repeat_none;
+        if (isDefaultAllowed && nbSeq <= 2) {
+            /* Prefer set_basic over set_rle when there are 2 or less symbols,
+             * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
+             * If basic encoding isn't possible, always choose RLE.
+             */
+            DEBUGLOG(5, "Selected set_basic");
+            return set_basic;
+        }
+        DEBUGLOG(5, "Selected set_rle");
+        return set_rle;
+    }
+    if (strategy < ZSTD_lazy) {
+        if (isDefaultAllowed) {
+            size_t const staticFse_nbSeq_max = 1000;
+            size_t const mult = 10 - strategy;
+            size_t const baseLog = 3;
+            size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog;  /* 28-36 for offset, 56-72 for lengths */
+            assert(defaultNormLog >= 5 && defaultNormLog <= 6);  /* xx_DEFAULTNORMLOG */
+            assert(mult <= 9 && mult >= 7);
+            if ( (*repeatMode == FSE_repeat_valid)
+              && (nbSeq < staticFse_nbSeq_max) ) {
+                DEBUGLOG(5, "Selected set_repeat");
+                return set_repeat;
+            }
+            if ( (nbSeq < dynamicFse_nbSeq_min)
+              || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
+                DEBUGLOG(5, "Selected set_basic");
+                /* The format allows default tables to be repeated, but it isn't useful.
+                 * When using simple heuristics to select encoding type, we don't want
+                 * to confuse these tables with dictionaries. When running more careful
+                 * analysis, we don't need to waste time checking both repeating tables
+                 * and default tables.
+                 */
+                *repeatMode = FSE_repeat_none;
+                return set_basic;
+            }
+        }
+    } else {
+        size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
+        size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
+        size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
+        size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
+
+        if (isDefaultAllowed) {
+            assert(!ZSTD_isError(basicCost));
+            assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
+        }
+        assert(!ZSTD_isError(NCountCost));
+        assert(compressedCost < ERROR(maxCode));
+        DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
+                    (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
+        if (basicCost <= repeatCost && basicCost <= compressedCost) {
+            DEBUGLOG(5, "Selected set_basic");
+            assert(isDefaultAllowed);
+            *repeatMode = FSE_repeat_none;
+            return set_basic;
+        }
+        if (repeatCost <= compressedCost) {
+            DEBUGLOG(5, "Selected set_repeat");
+            assert(!ZSTD_isError(repeatCost));
+            return set_repeat;
+        }
+        assert(compressedCost < basicCost && compressedCost < repeatCost);
+    }
+    DEBUGLOG(5, "Selected set_compressed");
+    *repeatMode = FSE_repeat_check;
+    return set_compressed;
+}
+
+typedef struct {
+    S16 norm[MaxSeq + 1];
+    U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)];
+} ZSTD_BuildCTableWksp;
+
+size_t
+ZSTD_buildCTable(void* dst, size_t dstCapacity,
+                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+                unsigned* count, U32 max,
+                const BYTE* codeTable, size_t nbSeq,
+                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                const FSE_CTable* prevCTable, size_t prevCTableSize,
+                void* entropyWorkspace, size_t entropyWorkspaceSize)
+{
+    BYTE* op = (BYTE*)dst;
+    const BYTE* const oend = op + dstCapacity;
+    DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
+
+    switch (type) {
+    case set_rle:
+        FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
+        RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
+        *op = codeTable[0];
+        return 1;
+    case set_repeat:
+        ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
+        return 0;
+    case set_basic:
+        FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), "");  /* note : could be pre-calculated */
+        return 0;
+    case set_compressed: {
+        ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace;
+        size_t nbSeq_1 = nbSeq;
+        const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
+        if (count[codeTable[nbSeq-1]] > 1) {
+            count[codeTable[nbSeq-1]]--;
+            nbSeq_1--;
+        }
+        assert(nbSeq_1 > 1);
+        assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
+        (void)entropyWorkspaceSize;
+        FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
+        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog);   /* overflow protected */
+            FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
+            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
+            return NCountSize;
+        }
+    }
+    default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
+    }
+}
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_encodeSequences_body(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    BIT_CStream_t blockStream;
+    FSE_CState_t  stateMatchLength;
+    FSE_CState_t  stateOffsetBits;
+    FSE_CState_t  stateLitLength;
+
+    RETURN_ERROR_IF(
+        ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
+        dstSize_tooSmall, "not enough space remaining");
+    DEBUGLOG(6, "available space for bitstream : %i  (dstCapacity=%u)",
+                (int)(blockStream.endPtr - blockStream.startPtr),
+                (unsigned)dstCapacity);
+
+    /* first symbols */
+    FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
+    FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
+    FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
+    if (MEM_32bits()) BIT_flushBits(&blockStream);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
+    if (MEM_32bits()) BIT_flushBits(&blockStream);
+    if (longOffsets) {
+        U32 const ofBits = ofCodeTable[nbSeq-1];
+        unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+        if (extraBits) {
+            BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
+            BIT_flushBits(&blockStream);
+        }
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
+                    ofBits - extraBits);
+    } else {
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+    }
+    BIT_flushBits(&blockStream);
+
+    {   size_t n;
+        for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
+            BYTE const llCode = llCodeTable[n];
+            BYTE const ofCode = ofCodeTable[n];
+            BYTE const mlCode = mlCodeTable[n];
+            U32  const llBits = LL_bits[llCode];
+            U32  const ofBits = ofCode;
+            U32  const mlBits = ML_bits[mlCode];
+            DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
+                        (unsigned)sequences[n].litLength,
+                        (unsigned)sequences[n].matchLength + MINMATCH,
+                        (unsigned)sequences[n].offset);
+                                                                            /* 32b*/  /* 64b*/
+                                                                            /* (7)*/  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
+            FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
+            if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
+            FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
+            if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
+                BIT_flushBits(&blockStream);                                /* (7)*/
+            BIT_addBits(&blockStream, sequences[n].litLength, llBits);
+            if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
+            BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
+            if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
+            if (longOffsets) {
+                unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
+                if (extraBits) {
+                    BIT_addBits(&blockStream, sequences[n].offset, extraBits);
+                    BIT_flushBits(&blockStream);                            /* (7)*/
+                }
+                BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
+                            ofBits - extraBits);                            /* 31 */
+            } else {
+                BIT_addBits(&blockStream, sequences[n].offset, ofBits);     /* 31 */
+            }
+            BIT_flushBits(&blockStream);                                    /* (7)*/
+            DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
+    }   }
+
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
+    FSE_flushCState(&blockStream, &stateMatchLength);
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
+    FSE_flushCState(&blockStream, &stateOffsetBits);
+    DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
+    FSE_flushCState(&blockStream, &stateLitLength);
+
+    {   size_t const streamSize = BIT_closeCStream(&blockStream);
+        RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
+        return streamSize;
+    }
+}
+
+static size_t
+ZSTD_encodeSequences_default(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    return ZSTD_encodeSequences_body(dst, dstCapacity,
+                                    CTable_MatchLength, mlCodeTable,
+                                    CTable_OffsetBits, ofCodeTable,
+                                    CTable_LitLength, llCodeTable,
+                                    sequences, nbSeq, longOffsets);
+}
+
+
+#if DYNAMIC_BMI2
+
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_encodeSequences_bmi2(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets)
+{
+    return ZSTD_encodeSequences_body(dst, dstCapacity,
+                                    CTable_MatchLength, mlCodeTable,
+                                    CTable_OffsetBits, ofCodeTable,
+                                    CTable_LitLength, llCodeTable,
+                                    sequences, nbSeq, longOffsets);
+}
+
+#endif
+
+size_t ZSTD_encodeSequences(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
+{
+    DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
+                                         CTable_MatchLength, mlCodeTable,
+                                         CTable_OffsetBits, ofCodeTable,
+                                         CTable_LitLength, llCodeTable,
+                                         sequences, nbSeq, longOffsets);
+    }
+#endif
+    (void)bmi2;
+    return ZSTD_encodeSequences_default(dst, dstCapacity,
+                                        CTable_MatchLength, mlCodeTable,
+                                        CTable_OffsetBits, ofCodeTable,
+                                        CTable_LitLength, llCodeTable,
+                                        sequences, nbSeq, longOffsets);
+}
diff --git a/lib/zstd/compress/zstd_compress_sequences.h b/lib/zstd/compress/zstd_compress_sequences.h
new file mode 100644 (file)
index 0000000..7991364
--- /dev/null
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_SEQUENCES_H
+#define ZSTD_COMPRESS_SEQUENCES_H
+
+#include "../common/fse.h" /* FSE_repeat, FSE_CTable */
+#include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */
+
+typedef enum {
+    ZSTD_defaultDisallowed = 0,
+    ZSTD_defaultAllowed = 1
+} ZSTD_defaultPolicy_e;
+
+symbolEncodingType_e
+ZSTD_selectEncodingType(
+        FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
+        size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
+        FSE_CTable const* prevCTable,
+        short const* defaultNorm, U32 defaultNormLog,
+        ZSTD_defaultPolicy_e const isDefaultAllowed,
+        ZSTD_strategy const strategy);
+
+size_t
+ZSTD_buildCTable(void* dst, size_t dstCapacity,
+                FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
+                unsigned* count, U32 max,
+                const BYTE* codeTable, size_t nbSeq,
+                const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                const FSE_CTable* prevCTable, size_t prevCTableSize,
+                void* entropyWorkspace, size_t entropyWorkspaceSize);
+
+size_t ZSTD_encodeSequences(
+            void* dst, size_t dstCapacity,
+            FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
+            FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
+            FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
+            seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
+
+size_t ZSTD_fseBitCost(
+    FSE_CTable const* ctable,
+    unsigned const* count,
+    unsigned const max);
+
+size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
+                             unsigned const* count, unsigned const max);
+#endif /* ZSTD_COMPRESS_SEQUENCES_H */
diff --git a/lib/zstd/compress/zstd_compress_superblock.c b/lib/zstd/compress/zstd_compress_superblock.c
new file mode 100644 (file)
index 0000000..ee03e0a
--- /dev/null
@@ -0,0 +1,850 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+ /*-*************************************
+ *  Dependencies
+ ***************************************/
+#include "zstd_compress_superblock.h"
+
+#include "../common/zstd_internal.h"  /* ZSTD_getSequenceLength */
+#include "hist.h"                     /* HIST_countFast_wksp */
+#include "zstd_compress_internal.h"
+#include "zstd_compress_sequences.h"
+#include "zstd_compress_literals.h"
+
+/*-*************************************
+*  Superblock entropy buffer structs
+***************************************/
+/* ZSTD_hufCTablesMetadata_t :
+ *  Stores Literals Block Type for a super-block in hType, and
+ *  huffman tree description in hufDesBuffer.
+ *  hufDesSize refers to the size of huffman tree description in bytes.
+ *  This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
+typedef struct {
+    symbolEncodingType_e hType;
+    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
+    size_t hufDesSize;
+} ZSTD_hufCTablesMetadata_t;
+
+/* ZSTD_fseCTablesMetadata_t :
+ *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
+ *  fse tables in fseTablesBuffer.
+ *  fseTablesSize refers to the size of fse tables in bytes.
+ *  This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
+typedef struct {
+    symbolEncodingType_e llType;
+    symbolEncodingType_e ofType;
+    symbolEncodingType_e mlType;
+    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
+    size_t fseTablesSize;
+    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
+} ZSTD_fseCTablesMetadata_t;
+
+typedef struct {
+    ZSTD_hufCTablesMetadata_t hufMetadata;
+    ZSTD_fseCTablesMetadata_t fseMetadata;
+} ZSTD_entropyCTablesMetadata_t;
+
+
+/* ZSTD_buildSuperBlockEntropy_literal() :
+ *  Builds entropy for the super-block literals.
+ *  Stores literals block type (raw, rle, compressed, repeat) and
+ *  huffman description table to hufMetadata.
+ *  @return : size of huffman description table or error code */
+static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
+                                            const ZSTD_hufCTables_t* prevHuf,
+                                                  ZSTD_hufCTables_t* nextHuf,
+                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                  const int disableLiteralsCompression,
+                                                  void* workspace, size_t wkspSize)
+{
+    BYTE* const wkspStart = (BYTE*)workspace;
+    BYTE* const wkspEnd = wkspStart + wkspSize;
+    BYTE* const countWkspStart = wkspStart;
+    unsigned* const countWksp = (unsigned*)workspace;
+    const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
+    BYTE* const nodeWksp = countWkspStart + countWkspSize;
+    const size_t nodeWkspSize = wkspEnd-nodeWksp;
+    unsigned maxSymbolValue = 255;
+    unsigned huffLog = HUF_TABLELOG_DEFAULT;
+    HUF_repeat repeat = prevHuf->repeatMode;
+
+    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
+
+    /* Prepare nextEntropy assuming reusing the existing table */
+    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+    if (disableLiteralsCompression) {
+        DEBUGLOG(5, "set_basic - disabled");
+        hufMetadata->hType = set_basic;
+        return 0;
+    }
+
+    /* small ? don't even attempt compression (speed opt) */
+#   define COMPRESS_LITERALS_SIZE_MIN 63
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) {
+            DEBUGLOG(5, "set_basic - too small");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Scan input and build symbol stats */
+    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
+        FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
+        if (largest == srcSize) {
+            DEBUGLOG(5, "set_rle");
+            hufMetadata->hType = set_rle;
+            return 0;
+        }
+        if (largest <= (srcSize >> 7)+4) {
+            DEBUGLOG(5, "set_basic - no gain");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Validate the previous Huffman table */
+    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
+        repeat = HUF_repeat_none;
+    }
+
+    /* Build Huffman Tree */
+    ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+    {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
+                                                    maxSymbolValue, huffLog,
+                                                    nodeWksp, nodeWkspSize);
+        FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
+        huffLog = (U32)maxBits;
+        {   /* Build and write the CTable */
+            size_t const newCSize = HUF_estimateCompressedSize(
+                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
+            size_t const hSize = HUF_writeCTable_wksp(
+                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
+                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
+                    nodeWksp, nodeWkspSize);
+            /* Check against repeating the previous CTable */
+            if (repeat != HUF_repeat_none) {
+                size_t const oldCSize = HUF_estimateCompressedSize(
+                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
+                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
+                    DEBUGLOG(5, "set_repeat - smaller");
+                    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                    hufMetadata->hType = set_repeat;
+                    return 0;
+                }
+            }
+            if (newCSize + hSize >= srcSize) {
+                DEBUGLOG(5, "set_basic - no gains");
+                ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                hufMetadata->hType = set_basic;
+                return 0;
+            }
+            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
+            hufMetadata->hType = set_compressed;
+            nextHuf->repeatMode = HUF_repeat_check;
+            return hSize;
+        }
+    }
+}
+
+/* ZSTD_buildSuperBlockEntropy_sequences() :
+ *  Builds entropy for the super-block sequences.
+ *  Stores symbol compression modes and fse table to fseMetadata.
+ *  @return : size of fse tables or error code */
+static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
+                                              const ZSTD_fseCTables_t* prevEntropy,
+                                                    ZSTD_fseCTables_t* nextEntropy,
+                                              const ZSTD_CCtx_params* cctxParams,
+                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                    void* workspace, size_t wkspSize)
+{
+    BYTE* const wkspStart = (BYTE*)workspace;
+    BYTE* const wkspEnd = wkspStart + wkspSize;
+    BYTE* const countWkspStart = wkspStart;
+    unsigned* const countWksp = (unsigned*)workspace;
+    const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
+    BYTE* const cTableWksp = countWkspStart + countWkspSize;
+    const size_t cTableWkspSize = wkspEnd-cTableWksp;
+    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+    FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
+    FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
+    FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
+    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    const BYTE* const llCodeTable = seqStorePtr->llCode;
+    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+    BYTE* const ostart = fseMetadata->fseTablesBuffer;
+    BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
+    BYTE* op = ostart;
+
+    assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
+    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
+    ZSTD_memset(workspace, 0, wkspSize);
+
+    fseMetadata->lastCountSize = 0;
+    /* convert length/distances into codes */
+    ZSTD_seqToCodes(seqStorePtr);
+    /* build CTable for Literal Lengths */
+    {   U32 LLtype;
+        unsigned max = MaxLL;
+        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+        DEBUGLOG(5, "Building LL table");
+        nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
+        LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
+                                        countWksp, max, mostFrequent, nbSeq,
+                                        LLFSELog, prevEntropy->litlengthCTable,
+                                        LL_defaultNorm, LL_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(set_basic < set_compressed && set_rle < set_compressed);
+        assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
+                                                    countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                                                    prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
+                                                    cTableWksp, cTableWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
+            if (LLtype == set_compressed)
+                fseMetadata->lastCountSize = countSize;
+            op += countSize;
+            fseMetadata->llType = (symbolEncodingType_e) LLtype;
+    }   }
+    /* build CTable for Offsets */
+    {   U32 Offtype;
+        unsigned max = MaxOff;
+        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
+        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
+        DEBUGLOG(5, "Building OF table");
+        nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
+        Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
+                                        countWksp, max, mostFrequent, nbSeq,
+                                        OffFSELog, prevEntropy->offcodeCTable,
+                                        OF_defaultNorm, OF_defaultNormLog,
+                                        defaultPolicy, strategy);
+        assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
+                                                    countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                                                    prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
+                                                    cTableWksp, cTableWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
+            if (Offtype == set_compressed)
+                fseMetadata->lastCountSize = countSize;
+            op += countSize;
+            fseMetadata->ofType = (symbolEncodingType_e) Offtype;
+    }   }
+    /* build CTable for MatchLengths */
+    {   U32 MLtype;
+        unsigned max = MaxML;
+        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
+        nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
+        MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
+                                        countWksp, max, mostFrequent, nbSeq,
+                                        MLFSELog, prevEntropy->matchlengthCTable,
+                                        ML_defaultNorm, ML_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
+                                                    countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
+                                                    prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
+                                                    cTableWksp, cTableWkspSize);
+            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
+            if (MLtype == set_compressed)
+                fseMetadata->lastCountSize = countSize;
+            op += countSize;
+            fseMetadata->mlType = (symbolEncodingType_e) MLtype;
+    }   }
+    assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
+    return op-ostart;
+}
+
+
+/* ZSTD_buildSuperBlockEntropy() :
+ *  Builds entropy for the super-block.
+ *  @return : 0 on success or error code */
+static size_t
+ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
+                      const ZSTD_entropyCTables_t* prevEntropy,
+                            ZSTD_entropyCTables_t* nextEntropy,
+                      const ZSTD_CCtx_params* cctxParams,
+                            ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                            void* workspace, size_t wkspSize)
+{
+    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
+    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
+    entropyMetadata->hufMetadata.hufDesSize =
+        ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
+                                            &prevEntropy->huf, &nextEntropy->huf,
+                                            &entropyMetadata->hufMetadata,
+                                            ZSTD_disableLiteralsCompression(cctxParams),
+                                            workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
+    entropyMetadata->fseMetadata.fseTablesSize =
+        ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
+                                              &prevEntropy->fse, &nextEntropy->fse,
+                                              cctxParams,
+                                              &entropyMetadata->fseMetadata,
+                                              workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
+    return 0;
+}
+
+/* ZSTD_compressSubBlock_literal() :
+ *  Compresses literals section for a sub-block.
+ *  When we have to write the Huffman table we will sometimes choose a header
+ *  size larger than necessary. This is because we have to pick the header size
+ *  before we know the table size + compressed size, so we have a bound on the
+ *  table size. If we guessed incorrectly, we fall back to uncompressed literals.
+ *
+ *  We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded
+ *  in writing the header, otherwise it is set to 0.
+ *
+ *  hufMetadata->hType has literals block type info.
+ *      If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block.
+ *      If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block.
+ *      If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block
+ *      If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
+ *      and the following sub-blocks' literals sections will be Treeless_Literals_Block.
+ *  @return : compressed size of literals section of a sub-block
+ *            Or 0 if it unable to compress.
+ *            Or error code */
+static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
+                                    const ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                    const BYTE* literals, size_t litSize,
+                                    void* dst, size_t dstSize,
+                                    const int bmi2, int writeEntropy, int* entropyWritten)
+{
+    size_t const header = writeEntropy ? 200 : 0;
+    size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstSize;
+    BYTE* op = ostart + lhSize;
+    U32 const singleStream = lhSize == 3;
+    symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
+    size_t cLitSize = 0;
+
+    (void)bmi2; /* TODO bmi2... */
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
+
+    *entropyWritten = 0;
+    if (litSize == 0 || hufMetadata->hType == set_basic) {
+      DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal");
+      return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+    } else if (hufMetadata->hType == set_rle) {
+      DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal");
+      return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize);
+    }
+
+    assert(litSize > 0);
+    assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat);
+
+    if (writeEntropy && hufMetadata->hType == set_compressed) {
+        ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize);
+        op += hufMetadata->hufDesSize;
+        cLitSize += hufMetadata->hufDesSize;
+        DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
+    }
+
+    /* TODO bmi2 */
+    {   const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
+                                          : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
+        op += cSize;
+        cLitSize += cSize;
+        if (cSize == 0 || ERR_isError(cSize)) {
+            DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize));
+            return 0;
+        }
+        /* If we expand and we aren't writing a header then emit uncompressed */
+        if (!writeEntropy && cLitSize >= litSize) {
+            DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible");
+            return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+        }
+        /* If we are writing headers then allow expansion that doesn't change our header size. */
+        if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) {
+            assert(cLitSize > litSize);
+            DEBUGLOG(5, "Literals expanded beyond allowed header size");
+            return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize);
+        }
+        DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize);
+    }
+
+    /* Build header */
+    switch(lhSize)
+    {
+    case 3: /* 2 - 2 - 10 - 10 */
+        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
+            MEM_writeLE24(ostart, lhc);
+            break;
+        }
+    case 4: /* 2 - 2 - 14 - 14 */
+        {   U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18);
+            MEM_writeLE32(ostart, lhc);
+            break;
+        }
+    case 5: /* 2 - 2 - 18 - 18 */
+        {   U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22);
+            MEM_writeLE32(ostart, lhc);
+            ostart[4] = (BYTE)(cLitSize >> 10);
+            break;
+        }
+    default:  /* not possible : lhSize is {3,4,5} */
+        assert(0);
+    }
+    *entropyWritten = 1;
+    DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
+    return op-ostart;
+}
+
+static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
+    const seqDef* const sstart = sequences;
+    const seqDef* const send = sequences + nbSeq;
+    const seqDef* sp = sstart;
+    size_t matchLengthSum = 0;
+    size_t litLengthSum = 0;
+    while (send-sp > 0) {
+        ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
+        litLengthSum += seqLen.litLength;
+        matchLengthSum += seqLen.matchLength;
+        sp++;
+    }
+    assert(litLengthSum <= litSize);
+    if (!lastSequence) {
+        assert(litLengthSum == litSize);
+    }
+    return matchLengthSum + litSize;
+}
+
+/* ZSTD_compressSubBlock_sequences() :
+ *  Compresses sequences section for a sub-block.
+ *  fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have
+ *  symbol compression modes for the super-block.
+ *  The first successfully compressed block will have these in its header.
+ *  We set entropyWritten=1 when we succeed in compressing the sequences.
+ *  The following sub-blocks will always have repeat mode.
+ *  @return : compressed size of sequences section of a sub-block
+ *            Or 0 if it is unable to compress
+ *            Or error code. */
+static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
+                                              const ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                              const seqDef* sequences, size_t nbSeq,
+                                              const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
+                                              const ZSTD_CCtx_params* cctxParams,
+                                              void* dst, size_t dstCapacity,
+                                              const int bmi2, int writeEntropy, int* entropyWritten)
+{
+    const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    BYTE* seqHead;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets);
+
+    *entropyWritten = 0;
+    /* Sequences Header */
+    RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
+                    dstSize_tooSmall, "");
+    if (nbSeq < 0x7F)
+        *op++ = (BYTE)nbSeq;
+    else if (nbSeq < LONGNBSEQ)
+        op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
+    else
+        op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
+    if (nbSeq==0) {
+        return op - ostart;
+    }
+
+    /* seqHead : flags for FSE encoding type */
+    seqHead = op++;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart));
+
+    if (writeEntropy) {
+        const U32 LLtype = fseMetadata->llType;
+        const U32 Offtype = fseMetadata->ofType;
+        const U32 MLtype = fseMetadata->mlType;
+        DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize);
+        *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+        ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize);
+        op += fseMetadata->fseTablesSize;
+    } else {
+        const U32 repeat = set_repeat;
+        *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2));
+    }
+
+    {   size_t const bitstreamSize = ZSTD_encodeSequences(
+                                        op, oend - op,
+                                        fseTables->matchlengthCTable, mlCode,
+                                        fseTables->offcodeCTable, ofCode,
+                                        fseTables->litlengthCTable, llCode,
+                                        sequences, nbSeq,
+                                        longOffsets, bmi2);
+        FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed");
+        op += bitstreamSize;
+        /* zstd versions <= 1.3.4 mistakenly report corruption when
+         * FSE_readNCount() receives a buffer < 4 bytes.
+         * Fixed by https://github.com/facebook/zstd/pull/1146.
+         * This can happen when the last set_compressed table present is 2
+         * bytes and the bitstream is only one byte.
+         * In this exceedingly rare case, we will simply emit an uncompressed
+         * block, since it isn't worth optimizing.
+         */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+        if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) {
+            /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
+            assert(fseMetadata->lastCountSize + bitstreamSize == 3);
+            DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
+                        "emitting an uncompressed block.");
+            return 0;
+        }
+#endif
+        DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize);
+    }
+
+    /* zstd versions <= 1.4.0 mistakenly report error when
+     * sequences section body size is less than 3 bytes.
+     * Fixed by https://github.com/facebook/zstd/pull/1664.
+     * This can happen when the previous sequences section block is compressed
+     * with rle mode and the current block's sequences section is compressed
+     * with repeat mode where sequences section body size can be 1 byte.
+     */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    if (op-seqHead < 4) {
+        DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting "
+                    "an uncompressed block when sequences are < 4 bytes");
+        return 0;
+    }
+#endif
+
+    *entropyWritten = 1;
+    return op - ostart;
+}
+
+/* ZSTD_compressSubBlock() :
+ *  Compresses a single sub-block.
+ *  @return : compressed size of the sub-block
+ *            Or 0 if it failed to compress. */
+static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
+                                    const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                    const seqDef* sequences, size_t nbSeq,
+                                    const BYTE* literals, size_t litSize,
+                                    const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
+                                    const ZSTD_CCtx_params* cctxParams,
+                                    void* dst, size_t dstCapacity,
+                                    const int bmi2,
+                                    int writeLitEntropy, int writeSeqEntropy,
+                                    int* litEntropyWritten, int* seqEntropyWritten,
+                                    U32 lastBlock)
+{
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart + ZSTD_blockHeaderSize;
+    DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)",
+                litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
+    {   size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
+                                                        &entropyMetadata->hufMetadata, literals, litSize,
+                                                        op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
+        FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
+        if (cLitSize == 0) return 0;
+        op += cLitSize;
+    }
+    {   size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse,
+                                                  &entropyMetadata->fseMetadata,
+                                                  sequences, nbSeq,
+                                                  llCode, mlCode, ofCode,
+                                                  cctxParams,
+                                                  op, oend-op,
+                                                  bmi2, writeSeqEntropy, seqEntropyWritten);
+        FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
+        if (cSeqSize == 0) return 0;
+        op += cSeqSize;
+    }
+    /* Write block header */
+    {   size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
+        U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+        MEM_writeLE24(ostart, cBlockHeader24);
+    }
+    return op-ostart;
+}
+
+static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
+                                                const ZSTD_hufCTables_t* huf,
+                                                const ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                void* workspace, size_t wkspSize,
+                                                int writeEntropy)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    unsigned maxSymbolValue = 255;
+    size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+
+    if (hufMetadata->hType == set_basic) return litSize;
+    else if (hufMetadata->hType == set_rle) return 1;
+    else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
+        size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
+        if (ZSTD_isError(largest)) return litSize;
+        {   size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
+            if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
+            return cLitSizeEstimate + literalSectionHeaderSize;
+    }   }
+    assert(0); /* impossible */
+    return 0;
+}
+
+static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
+                        const BYTE* codeTable, unsigned maxCode,
+                        size_t nbSeq, const FSE_CTable* fseCTable,
+                        const U32* additionalBits,
+                        short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                        void* workspace, size_t wkspSize)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    const BYTE* ctp = codeTable;
+    const BYTE* const ctStart = ctp;
+    const BYTE* const ctEnd = ctStart + nbSeq;
+    size_t cSymbolTypeSizeEstimateInBits = 0;
+    unsigned max = maxCode;
+
+    HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+    if (type == set_basic) {
+        /* We selected this encoding type, so it must be valid. */
+        assert(max <= defaultMax);
+        cSymbolTypeSizeEstimateInBits = max <= defaultMax
+                ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max)
+                : ERROR(GENERIC);
+    } else if (type == set_rle) {
+        cSymbolTypeSizeEstimateInBits = 0;
+    } else if (type == set_compressed || type == set_repeat) {
+        cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
+    }
+    if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10;
+    while (ctp < ctEnd) {
+        if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
+        else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
+        ctp++;
+    }
+    return cSymbolTypeSizeEstimateInBits / 8;
+}
+
+static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
+                                                  const BYTE* llCodeTable,
+                                                  const BYTE* mlCodeTable,
+                                                  size_t nbSeq,
+                                                  const ZSTD_fseCTables_t* fseTables,
+                                                  const ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                  void* workspace, size_t wkspSize,
+                                                  int writeEntropy)
+{
+    size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+    size_t cSeqSizeEstimate = 0;
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
+                                         nbSeq, fseTables->offcodeCTable, NULL,
+                                         OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL,
+                                         nbSeq, fseTables->litlengthCTable, LL_bits,
+                                         LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML,
+                                         nbSeq, fseTables->matchlengthCTable, ML_bits,
+                                         ML_defaultNorm, ML_defaultNormLog, MaxML,
+                                         workspace, wkspSize);
+    if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
+    return cSeqSizeEstimate + sequencesSectionHeaderSize;
+}
+
+static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
+                                        const BYTE* ofCodeTable,
+                                        const BYTE* llCodeTable,
+                                        const BYTE* mlCodeTable,
+                                        size_t nbSeq,
+                                        const ZSTD_entropyCTables_t* entropy,
+                                        const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                        void* workspace, size_t wkspSize,
+                                        int writeLitEntropy, int writeSeqEntropy) {
+    size_t cSizeEstimate = 0;
+    cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
+                                                         &entropy->huf, &entropyMetadata->hufMetadata,
+                                                         workspace, wkspSize, writeLitEntropy);
+    cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+                                                         nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
+                                                         workspace, wkspSize, writeSeqEntropy);
+    return cSizeEstimate + ZSTD_blockHeaderSize;
+}
+
+static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
+{
+    if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle)
+        return 1;
+    if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle)
+        return 1;
+    if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle)
+        return 1;
+    return 0;
+}
+
+/* ZSTD_compressSubBlock_multi() :
+ *  Breaks super-block into multiple sub-blocks and compresses them.
+ *  Entropy will be written to the first block.
+ *  The following blocks will use repeat mode to compress.
+ *  All sub-blocks are compressed blocks (no raw or rle blocks).
+ *  @return : compressed size of the super block (which is multiple ZSTD blocks)
+ *            Or 0 if it failed to compress. */
+static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
+                            const ZSTD_compressedBlockState_t* prevCBlock,
+                            ZSTD_compressedBlockState_t* nextCBlock,
+                            const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                            const ZSTD_CCtx_params* cctxParams,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const int bmi2, U32 lastBlock,
+                            void* workspace, size_t wkspSize)
+{
+    const seqDef* const sstart = seqStorePtr->sequencesStart;
+    const seqDef* const send = seqStorePtr->sequences;
+    const seqDef* sp = sstart;
+    const BYTE* const lstart = seqStorePtr->litStart;
+    const BYTE* const lend = seqStorePtr->lit;
+    const BYTE* lp = lstart;
+    BYTE const* ip = (BYTE const*)src;
+    BYTE const* const iend = ip + srcSize;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + dstCapacity;
+    BYTE* op = ostart;
+    const BYTE* llCodePtr = seqStorePtr->llCode;
+    const BYTE* mlCodePtr = seqStorePtr->mlCode;
+    const BYTE* ofCodePtr = seqStorePtr->ofCode;
+    size_t targetCBlockSize = cctxParams->targetCBlockSize;
+    size_t litSize, seqCount;
+    int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
+    int writeSeqEntropy = 1;
+    int lastSequence = 0;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
+                (unsigned)(lend-lp), (unsigned)(send-sstart));
+
+    litSize = 0;
+    seqCount = 0;
+    do {
+        size_t cBlockSizeEstimate = 0;
+        if (sstart == send) {
+            lastSequence = 1;
+        } else {
+            const seqDef* const sequence = sp + seqCount;
+            lastSequence = sequence == send - 1;
+            litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
+            seqCount++;
+        }
+        if (lastSequence) {
+            assert(lp <= lend);
+            assert(litSize <= (size_t)(lend - lp));
+            litSize = (size_t)(lend - lp);
+        }
+        /* I think there is an optimization opportunity here.
+         * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
+         * since it recalculates estimate from scratch.
+         * For example, it would recount literal distribution and symbol codes everytime.
+         */
+        cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
+                                                       &nextCBlock->entropy, entropyMetadata,
+                                                       workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
+        if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
+            int litEntropyWritten = 0;
+            int seqEntropyWritten = 0;
+            const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
+            const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+                                                       sp, seqCount,
+                                                       lp, litSize,
+                                                       llCodePtr, mlCodePtr, ofCodePtr,
+                                                       cctxParams,
+                                                       op, oend-op,
+                                                       bmi2, writeLitEntropy, writeSeqEntropy,
+                                                       &litEntropyWritten, &seqEntropyWritten,
+                                                       lastBlock && lastSequence);
+            FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
+            if (cSize > 0 && cSize < decompressedSize) {
+                DEBUGLOG(5, "Committed the sub-block");
+                assert(ip + decompressedSize <= iend);
+                ip += decompressedSize;
+                sp += seqCount;
+                lp += litSize;
+                op += cSize;
+                llCodePtr += seqCount;
+                mlCodePtr += seqCount;
+                ofCodePtr += seqCount;
+                litSize = 0;
+                seqCount = 0;
+                /* Entropy only needs to be written once */
+                if (litEntropyWritten) {
+                    writeLitEntropy = 0;
+                }
+                if (seqEntropyWritten) {
+                    writeSeqEntropy = 0;
+                }
+            }
+        }
+    } while (!lastSequence);
+    if (writeLitEntropy) {
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
+        ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
+    }
+    if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
+        /* If we haven't written our entropy tables, then we've violated our contract and
+         * must emit an uncompressed block.
+         */
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
+        return 0;
+    }
+    if (ip < iend) {
+        size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
+        DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
+        FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+        assert(cSize != 0);
+        op += cSize;
+        /* We have to regenerate the repcodes because we've skipped some sequences */
+        if (sp < send) {
+            seqDef const* seq;
+            repcodes_t rep;
+            ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
+            for (seq = sstart; seq < sp; ++seq) {
+                rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
+            }
+            ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
+        }
+    }
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
+    return op-ostart;
+}
+
+size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               void const* src, size_t srcSize,
+                               unsigned lastBlock) {
+    ZSTD_entropyCTablesMetadata_t entropyMetadata;
+
+    FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
+          &zc->blockState.prevCBlock->entropy,
+          &zc->blockState.nextCBlock->entropy,
+          &zc->appliedParams,
+          &entropyMetadata,
+          zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
+
+    return ZSTD_compressSubBlock_multi(&zc->seqStore,
+            zc->blockState.prevCBlock,
+            zc->blockState.nextCBlock,
+            &entropyMetadata,
+            &zc->appliedParams,
+            dst, dstCapacity,
+            src, srcSize,
+            zc->bmi2, lastBlock,
+            zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
+}
diff --git a/lib/zstd/compress/zstd_compress_superblock.h b/lib/zstd/compress/zstd_compress_superblock.h
new file mode 100644 (file)
index 0000000..224ece7
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_COMPRESS_ADVANCED_H
+#define ZSTD_COMPRESS_ADVANCED_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+
+#include <linux/zstd.h> /* ZSTD_CCtx */
+
+/*-*************************************
+*  Target Compressed Block Size
+***************************************/
+
+/* ZSTD_compressSuperBlock() :
+ * Used to compress a super block when targetCBlockSize is being used.
+ * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */
+size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
+                               void* dst, size_t dstCapacity,
+                               void const* src, size_t srcSize,
+                               unsigned lastBlock);
+
+#endif /* ZSTD_COMPRESS_ADVANCED_H */
diff --git a/lib/zstd/compress/zstd_cwksp.h b/lib/zstd/compress/zstd_cwksp.h
new file mode 100644 (file)
index 0000000..98e359a
--- /dev/null
@@ -0,0 +1,482 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CWKSP_H
+#define ZSTD_CWKSP_H
+
+/*-*************************************
+*  Dependencies
+***************************************/
+#include "../common/zstd_internal.h"
+
+
+/*-*************************************
+*  Constants
+***************************************/
+
+/* Since the workspace is effectively its own little malloc implementation /
+ * arena, when we run under ASAN, we should similarly insert redzones between
+ * each internal element of the workspace, so ASAN will catch overruns that
+ * reach outside an object but that stay inside the workspace.
+ *
+ * This defines the size of that redzone.
+ */
+#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE
+#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
+#endif
+
+/*-*************************************
+*  Structures
+***************************************/
+typedef enum {
+    ZSTD_cwksp_alloc_objects,
+    ZSTD_cwksp_alloc_buffers,
+    ZSTD_cwksp_alloc_aligned
+} ZSTD_cwksp_alloc_phase_e;
+
+/*
+ * Used to describe whether the workspace is statically allocated (and will not
+ * necessarily ever be freed), or if it's dynamically allocated and we can
+ * expect a well-formed caller to free this.
+ */
+typedef enum {
+    ZSTD_cwksp_dynamic_alloc,
+    ZSTD_cwksp_static_alloc
+} ZSTD_cwksp_static_alloc_e;
+
+/*
+ * Zstd fits all its internal datastructures into a single continuous buffer,
+ * so that it only needs to perform a single OS allocation (or so that a buffer
+ * can be provided to it and it can perform no allocations at all). This buffer
+ * is called the workspace.
+ *
+ * Several optimizations complicate that process of allocating memory ranges
+ * from this workspace for each internal datastructure:
+ *
+ * - These different internal datastructures have different setup requirements:
+ *
+ *   - The static objects need to be cleared once and can then be trivially
+ *     reused for each compression.
+ *
+ *   - Various buffers don't need to be initialized at all--they are always
+ *     written into before they're read.
+ *
+ *   - The matchstate tables have a unique requirement that they don't need
+ *     their memory to be totally cleared, but they do need the memory to have
+ *     some bound, i.e., a guarantee that all values in the memory they've been
+ *     allocated is less than some maximum value (which is the starting value
+ *     for the indices that they will then use for compression). When this
+ *     guarantee is provided to them, they can use the memory without any setup
+ *     work. When it can't, they have to clear the area.
+ *
+ * - These buffers also have different alignment requirements.
+ *
+ * - We would like to reuse the objects in the workspace for multiple
+ *   compressions without having to perform any expensive reallocation or
+ *   reinitialization work.
+ *
+ * - We would like to be able to efficiently reuse the workspace across
+ *   multiple compressions **even when the compression parameters change** and
+ *   we need to resize some of the objects (where possible).
+ *
+ * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp
+ * abstraction was created. It works as follows:
+ *
+ * Workspace Layout:
+ *
+ * [                        ... workspace ...                         ]
+ * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers]
+ *
+ * The various objects that live in the workspace are divided into the
+ * following categories, and are allocated separately:
+ *
+ * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict,
+ *   so that literally everything fits in a single buffer. Note: if present,
+ *   this must be the first object in the workspace, since ZSTD_customFree{CCtx,
+ *   CDict}() rely on a pointer comparison to see whether one or two frees are
+ *   required.
+ *
+ * - Fixed size objects: these are fixed-size, fixed-count objects that are
+ *   nonetheless "dynamically" allocated in the workspace so that we can
+ *   control how they're initialized separately from the broader ZSTD_CCtx.
+ *   Examples:
+ *   - Entropy Workspace
+ *   - 2 x ZSTD_compressedBlockState_t
+ *   - CDict dictionary contents
+ *
+ * - Tables: these are any of several different datastructures (hash tables,
+ *   chain tables, binary trees) that all respect a common format: they are
+ *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
+ *   Their sizes depend on the cparams.
+ *
+ * - Aligned: these buffers are used for various purposes that require 4 byte
+ *   alignment, but don't require any initialization before they're used.
+ *
+ * - Buffers: these buffers are used for various purposes that don't require
+ *   any alignment or initialization before they're used. This means they can
+ *   be moved around at no cost for a new compression.
+ *
+ * Allocating Memory:
+ *
+ * The various types of objects must be allocated in order, so they can be
+ * correctly packed into the workspace buffer. That order is:
+ *
+ * 1. Objects
+ * 2. Buffers
+ * 3. Aligned
+ * 4. Tables
+ *
+ * Attempts to reserve objects of different types out of order will fail.
+ */
+typedef struct {
+    void* workspace;
+    void* workspaceEnd;
+
+    void* objectEnd;
+    void* tableEnd;
+    void* tableValidEnd;
+    void* allocStart;
+
+    BYTE allocFailed;
+    int workspaceOversizedDuration;
+    ZSTD_cwksp_alloc_phase_e phase;
+    ZSTD_cwksp_static_alloc_e isStatic;
+} ZSTD_cwksp;
+
+/*-*************************************
+*  Functions
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws);
+
+MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
+    (void)ws;
+    assert(ws->workspace <= ws->objectEnd);
+    assert(ws->objectEnd <= ws->tableEnd);
+    assert(ws->objectEnd <= ws->tableValidEnd);
+    assert(ws->tableEnd <= ws->allocStart);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    assert(ws->allocStart <= ws->workspaceEnd);
+}
+
+/*
+ * Align must be a power of 2.
+ */
+MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
+    size_t const mask = align - 1;
+    assert((align & mask) == 0);
+    return (size + mask) & ~mask;
+}
+
+/*
+ * Use this to determine how much space in the workspace we will consume to
+ * allocate this object. (Normally it should be exactly the size of the object,
+ * but under special conditions, like ASAN, where we pad each object, it might
+ * be larger.)
+ *
+ * Since tables aren't currently redzoned, you don't need to call through this
+ * to figure out how much space you need for the matchState tables. Everything
+ * else is though.
+ */
+MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
+    if (size == 0)
+        return 0;
+    return size;
+}
+
+MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
+        ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
+    assert(phase >= ws->phase);
+    if (phase > ws->phase) {
+        if (ws->phase < ZSTD_cwksp_alloc_buffers &&
+                phase >= ZSTD_cwksp_alloc_buffers) {
+            ws->tableValidEnd = ws->objectEnd;
+        }
+        if (ws->phase < ZSTD_cwksp_alloc_aligned &&
+                phase >= ZSTD_cwksp_alloc_aligned) {
+            /* If unaligned allocations down from a too-large top have left us
+             * unaligned, we need to realign our alloc ptr. Technically, this
+             * can consume space that is unaccounted for in the neededSpace
+             * calculation. However, I believe this can only happen when the
+             * workspace is too large, and specifically when it is too large
+             * by a larger margin than the space that will be consumed. */
+            /* TODO: cleaner, compiler warning friendly way to do this??? */
+            ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
+            if (ws->allocStart < ws->tableValidEnd) {
+                ws->tableValidEnd = ws->allocStart;
+            }
+        }
+        ws->phase = phase;
+    }
+}
+
+/*
+ * Returns whether this object/buffer/etc was allocated in this workspace.
+ */
+MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
+    return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
+}
+
+/*
+ * Internal function. Do not use directly.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_internal(
+        ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
+    void* alloc;
+    void* bottom = ws->tableEnd;
+    ZSTD_cwksp_internal_advance_phase(ws, phase);
+    alloc = (BYTE *)ws->allocStart - bytes;
+
+    if (bytes == 0)
+        return NULL;
+
+
+    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(alloc >= bottom);
+    if (alloc < bottom) {
+        DEBUGLOG(4, "cwksp: alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    if (alloc < ws->tableValidEnd) {
+        ws->tableValidEnd = alloc;
+    }
+    ws->allocStart = alloc;
+
+
+    return alloc;
+}
+
+/*
+ * Reserves and returns unaligned memory.
+ */
+MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
+    return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
+}
+
+/*
+ * Reserves and returns memory sized on and aligned on sizeof(unsigned).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
+    assert((bytes & (sizeof(U32)-1)) == 0);
+    return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
+}
+
+/*
+ * Aligned on sizeof(unsigned). These buffers have the special property that
+ * their values remain constrained, allowing us to re-use them without
+ * memset()-ing them.
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
+    const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
+    void* alloc = ws->tableEnd;
+    void* end = (BYTE *)alloc + bytes;
+    void* top = ws->allocStart;
+
+    DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    assert((bytes & (sizeof(U32)-1)) == 0);
+    ZSTD_cwksp_internal_advance_phase(ws, phase);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(end <= top);
+    if (end > top) {
+        DEBUGLOG(4, "cwksp: table alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    ws->tableEnd = end;
+
+
+    return alloc;
+}
+
+/*
+ * Aligned on sizeof(void*).
+ */
+MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
+    size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
+    void* alloc = ws->objectEnd;
+    void* end = (BYTE*)alloc + roundedBytes;
+
+
+    DEBUGLOG(5,
+        "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
+        alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
+    assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
+    assert((bytes & (sizeof(void*)-1)) == 0);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    /* we must be in the first phase, no advance is possible */
+    if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
+        DEBUGLOG(4, "cwksp: object alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    ws->objectEnd = end;
+    ws->tableEnd = end;
+    ws->tableValidEnd = end;
+
+
+    return alloc;
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
+
+
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    ws->tableValidEnd = ws->objectEnd;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean");
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    if (ws->tableValidEnd < ws->tableEnd) {
+        ws->tableValidEnd = ws->tableEnd;
+    }
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/*
+ * Zero the part of the allocated tables not already marked clean.
+ */
+MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables");
+    assert(ws->tableValidEnd >= ws->objectEnd);
+    assert(ws->tableValidEnd <= ws->allocStart);
+    if (ws->tableValidEnd < ws->tableEnd) {
+        ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd);
+    }
+    ZSTD_cwksp_mark_tables_clean(ws);
+}
+
+/*
+ * Invalidates table allocations.
+ * All other allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: clearing tables!");
+
+
+    ws->tableEnd = ws->objectEnd;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/*
+ * Invalidates all buffer, aligned, and table allocations.
+ * Object allocations remain valid.
+ */
+MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
+    DEBUGLOG(4, "cwksp: clearing!");
+
+
+
+    ws->tableEnd = ws->objectEnd;
+    ws->allocStart = ws->workspaceEnd;
+    ws->allocFailed = 0;
+    if (ws->phase > ZSTD_cwksp_alloc_buffers) {
+        ws->phase = ZSTD_cwksp_alloc_buffers;
+    }
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+/*
+ * The provided workspace takes ownership of the buffer [start, start+size).
+ * Any existing values in the workspace are ignored (the previously managed
+ * buffer, if present, must be separately freed).
+ */
+MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) {
+    DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size);
+    assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
+    ws->workspace = start;
+    ws->workspaceEnd = (BYTE*)start + size;
+    ws->objectEnd = ws->workspace;
+    ws->tableValidEnd = ws->objectEnd;
+    ws->phase = ZSTD_cwksp_alloc_objects;
+    ws->isStatic = isStatic;
+    ZSTD_cwksp_clear(ws);
+    ws->workspaceOversizedDuration = 0;
+    ZSTD_cwksp_assert_internal_consistency(ws);
+}
+
+MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
+    void* workspace = ZSTD_customMalloc(size, customMem);
+    DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
+    RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
+    ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc);
+    return 0;
+}
+
+MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
+    void *ptr = ws->workspace;
+    DEBUGLOG(4, "cwksp: freeing workspace");
+    ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp));
+    ZSTD_customFree(ptr, customMem);
+}
+
+/*
+ * Moves the management of a workspace from one cwksp to another. The src cwksp
+ * is left in an invalid state (src must be re-init()'ed before it's used again).
+ */
+MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
+    *dst = *src;
+    ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
+}
+
+MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
+}
+
+MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
+         + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
+}
+
+MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
+    return ws->allocFailed;
+}
+
+/*-*************************************
+*  Functions Checking Free Space
+***************************************/
+
+MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace;
+}
+
+MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_check_available(
+        ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR);
+}
+
+MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)
+        && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+MEM_STATIC void ZSTD_cwksp_bump_oversized_duration(
+        ZSTD_cwksp* ws, size_t additionalNeededSpace) {
+    if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) {
+        ws->workspaceOversizedDuration++;
+    } else {
+        ws->workspaceOversizedDuration = 0;
+    }
+}
+
+
+#endif /* ZSTD_CWKSP_H */
diff --git a/lib/zstd/compress/zstd_double_fast.c b/lib/zstd/compress/zstd_double_fast.c
new file mode 100644 (file)
index 0000000..b0424d2
--- /dev/null
@@ -0,0 +1,519 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd_compress_internal.h"
+#include "zstd_double_fast.h"
+
+
+void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+                              void const* end, ZSTD_dictTableLoadMethod_e dtlm)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashLarge = ms->hashTable;
+    U32  const hBitsL = cParams->hashLog;
+    U32  const mls = cParams->minMatch;
+    U32* const hashSmall = ms->chainTable;
+    U32  const hBitsS = cParams->chainLog;
+    const BYTE* const base = ms->window.base;
+    const BYTE* ip = base + ms->nextToUpdate;
+    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
+    const U32 fastHashFillStep = 3;
+
+    /* Always insert every fastHashFillStep position into the hash tables.
+     * Insert the other positions into the large hash table if their entry
+     * is empty.
+     */
+    for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
+        U32 const curr = (U32)(ip - base);
+        U32 i;
+        for (i = 0; i < fastHashFillStep; ++i) {
+            size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
+            size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
+            if (i == 0)
+                hashSmall[smHash] = curr + i;
+            if (i == 0 || hashLarge[lgHash] == 0)
+                hashLarge[lgHash] = curr + i;
+            /* Only load extra positions for ZSTD_dtlm_full */
+            if (dtlm == ZSTD_dtlm_fast)
+                break;
+    }   }
+}
+
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_doubleFast_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize,
+        U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
+{
+    ZSTD_compressionParameters const* cParams = &ms->cParams;
+    U32* const hashLong = ms->hashTable;
+    const U32 hBitsL = cParams->hashLog;
+    U32* const hashSmall = ms->chainTable;
+    const U32 hBitsS = cParams->chainLog;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
+    /* presumes that, if there is a dictionary, it must be using Attach mode */
+    const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
+    const BYTE* const prefixLowest = base + prefixLowestIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const ZSTD_compressionParameters* const dictCParams =
+                                     dictMode == ZSTD_dictMatchState ?
+                                     &dms->cParams : NULL;
+    const U32* const dictHashLong  = dictMode == ZSTD_dictMatchState ?
+                                     dms->hashTable : NULL;
+    const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
+                                     dms->chainTable : NULL;
+    const U32 dictStartIndex       = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.dictLimit : 0;
+    const BYTE* const dictBase     = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.base : NULL;
+    const BYTE* const dictStart    = dictMode == ZSTD_dictMatchState ?
+                                     dictBase + dictStartIndex : NULL;
+    const BYTE* const dictEnd      = dictMode == ZSTD_dictMatchState ?
+                                     dms->window.nextSrc : NULL;
+    const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ?
+                                     prefixLowestIndex - (U32)(dictEnd - dictBase) :
+                                     0;
+    const U32 dictHBitsL           = dictMode == ZSTD_dictMatchState ?
+                                     dictCParams->hashLog : hBitsL;
+    const U32 dictHBitsS           = dictMode == ZSTD_dictMatchState ?
+                                     dictCParams->chainLog : hBitsS;
+    const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
+
+    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
+
+    assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
+
+    /* if a dictionary is attached, it must be within window range */
+    if (dictMode == ZSTD_dictMatchState) {
+        assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
+    }
+
+    /* init */
+    ip += (dictAndPrefixLength == 0);
+    if (dictMode == ZSTD_noDict) {
+        U32 const curr = (U32)(ip - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
+        U32 const maxRep = curr - windowLow;
+        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
+    }
+    if (dictMode == ZSTD_dictMatchState) {
+        /* dictMatchState repCode checks don't currently handle repCode == 0
+         * disabling. */
+        assert(offset_1 <= dictAndPrefixLength);
+        assert(offset_2 <= dictAndPrefixLength);
+    }
+
+    /* Main Search Loop */
+    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
+        size_t mLength;
+        U32 offset;
+        size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
+        size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
+        size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
+        size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
+        U32 const curr = (U32)(ip-base);
+        U32 const matchIndexL = hashLong[h2];
+        U32 matchIndexS = hashSmall[h];
+        const BYTE* matchLong = base + matchIndexL;
+        const BYTE* match = base + matchIndexS;
+        const U32 repIndex = curr + 1 - offset_1;
+        const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
+                            && repIndex < prefixLowestIndex) ?
+                               dictBase + (repIndex - dictIndexDelta) :
+                               base + repIndex;
+        hashLong[h2] = hashSmall[h] = curr;   /* update hash tables */
+
+        /* check dictMatchState repcode */
+        if (dictMode == ZSTD_dictMatchState
+            && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+            goto _match_stored;
+        }
+
+        /* check noDict repcode */
+        if ( dictMode == ZSTD_noDict
+          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
+            mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+            goto _match_stored;
+        }
+
+        if (matchIndexL > prefixLowestIndex) {
+            /* check prefix long match */
+            if (MEM_read64(matchLong) == MEM_read64(ip)) {
+                mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
+                offset = (U32)(ip-matchLong);
+                while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
+                goto _match_found;
+            }
+        } else if (dictMode == ZSTD_dictMatchState) {
+            /* check dictMatchState long match */
+            U32 const dictMatchIndexL = dictHashLong[dictHL];
+            const BYTE* dictMatchL = dictBase + dictMatchIndexL;
+            assert(dictMatchL < dictEnd);
+
+            if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
+                mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
+                offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
+                while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
+                goto _match_found;
+        }   }
+
+        if (matchIndexS > prefixLowestIndex) {
+            /* check prefix short match */
+            if (MEM_read32(match) == MEM_read32(ip)) {
+                goto _search_next_long;
+            }
+        } else if (dictMode == ZSTD_dictMatchState) {
+            /* check dictMatchState short match */
+            U32 const dictMatchIndexS = dictHashSmall[dictHS];
+            match = dictBase + dictMatchIndexS;
+            matchIndexS = dictMatchIndexS + dictIndexDelta;
+
+            if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
+                goto _search_next_long;
+        }   }
+
+        ip += ((ip-anchor) >> kSearchStrength) + 1;
+#if defined(__aarch64__)
+        PREFETCH_L1(ip+256);
+#endif
+        continue;
+
+_search_next_long:
+
+        {   size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+            size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
+            U32 const matchIndexL3 = hashLong[hl3];
+            const BYTE* matchL3 = base + matchIndexL3;
+            hashLong[hl3] = curr + 1;
+
+            /* check prefix long +1 match */
+            if (matchIndexL3 > prefixLowestIndex) {
+                if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
+                    mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
+                    ip++;
+                    offset = (U32)(ip-matchL3);
+                    while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
+                    goto _match_found;
+                }
+            } else if (dictMode == ZSTD_dictMatchState) {
+                /* check dict long +1 match */
+                U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
+                const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
+                assert(dictMatchL3 < dictEnd);
+                if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
+                    mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
+                    ip++;
+                    offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
+                    while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
+                    goto _match_found;
+        }   }   }
+
+        /* if no long +1 match, explore the short match we found */
+        if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
+            mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
+            offset = (U32)(curr - matchIndexS);
+            while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+        } else {
+            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
+            offset = (U32)(ip - match);
+            while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+        }
+
+_match_found:
+        offset_2 = offset_1;
+        offset_1 = offset;
+
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+
+_match_stored:
+        /* match found */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = curr+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }
+
+            /* check immediate repcode */
+            if (dictMode == ZSTD_dictMatchState) {
+                while (ip <= ilimit) {
+                    U32 const current2 = (U32)(ip-base);
+                    U32 const repIndex2 = current2 - offset_2;
+                    const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
+                        && repIndex2 < prefixLowestIndex ?
+                            dictBase + repIndex2 - dictIndexDelta :
+                            base + repIndex2;
+                    if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
+                       && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                        const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
+                        size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
+                        U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                        ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                        hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
+                        hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
+                        ip += repLength2;
+                        anchor = ip;
+                        continue;
+                    }
+                    break;
+            }   }
+
+            if (dictMode == ZSTD_noDict) {
+                while ( (ip <= ilimit)
+                     && ( (offset_2>0)
+                        & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
+                    /* store sequence */
+                    size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
+                    U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
+                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
+                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
+                    ip += rLength;
+                    anchor = ip;
+                    continue;   /* faster when present ... (?) */
+        }   }   }
+    }   /* while (ip < ilimit) */
+
+    /* save reps for next block */
+    rep[0] = offset_1 ? offset_1 : offsetSaved;
+    rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_doubleFast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    const U32 mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
+    case 5 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
+    case 6 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
+    case 7 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
+    }
+}
+
+
+size_t ZSTD_compressBlock_doubleFast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    const U32 mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
+    case 5 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
+    case 6 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
+    case 7 :
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
+    }
+}
+
+
+static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize,
+        U32 const mls /* template */)
+{
+    ZSTD_compressionParameters const* cParams = &ms->cParams;
+    U32* const hashLong = ms->hashTable;
+    U32  const hBitsL = cParams->hashLog;
+    U32* const hashSmall = ms->chainTable;
+    U32  const hBitsS = cParams->chainLog;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
+    const U32   dictStartIndex = lowLimit;
+    const U32   dictLimit = ms->window.dictLimit;
+    const U32   prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit;
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const BYTE* const dictStart = dictBase + dictStartIndex;
+    const BYTE* const dictEnd = dictBase + prefixStartIndex;
+    U32 offset_1=rep[0], offset_2=rep[1];
+
+    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
+
+    /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
+    if (prefixStartIndex == dictStartIndex)
+        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
+
+    /* Search Loop */
+    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
+        const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
+        const U32 matchIndex = hashSmall[hSmall];
+        const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* match = matchBase + matchIndex;
+
+        const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
+        const U32 matchLongIndex = hashLong[hLong];
+        const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* matchLong = matchLongBase + matchLongIndex;
+
+        const U32 curr = (U32)(ip-base);
+        const U32 repIndex = curr + 1 - offset_1;   /* offset_1 expected <= curr +1 */
+        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* const repMatch = repBase + repIndex;
+        size_t mLength;
+        hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */
+
+        if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
+            & (repIndex > dictStartIndex))
+          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+        } else {
+            if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
+                const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
+                const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
+                U32 offset;
+                mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
+                offset = curr - matchLongIndex;
+                while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+
+            } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
+                size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
+                U32 const matchIndex3 = hashLong[h3];
+                const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
+                const BYTE* match3 = match3Base + matchIndex3;
+                U32 offset;
+                hashLong[h3] = curr + 1;
+                if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
+                    const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
+                    const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
+                    mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
+                    ip++;
+                    offset = curr+1 - matchIndex3;
+                    while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
+                } else {
+                    const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
+                    const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
+                    mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
+                    offset = curr - matchIndex;
+                    while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
+                }
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+
+            } else {
+                ip += ((ip-anchor) >> kSearchStrength) + 1;
+                continue;
+        }   }
+
+        /* move to next sequence start */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = curr+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }
+
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
+                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
+                    & (repIndex2 > dictStartIndex))
+                  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                    U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
+                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+    }   }   }
+
+    /* save reps for next block */
+    rep[0] = offset_1;
+    rep[1] = offset_2;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_doubleFast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
diff --git a/lib/zstd/compress/zstd_double_fast.h b/lib/zstd/compress/zstd_double_fast.h
new file mode 100644 (file)
index 0000000..6822bde
--- /dev/null
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_DOUBLE_FAST_H
+#define ZSTD_DOUBLE_FAST_H
+
+
+#include "../common/mem.h"      /* U32 */
+#include "zstd_compress_internal.h"     /* ZSTD_CCtx, size_t */
+
+void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
+                              void const* end, ZSTD_dictTableLoadMethod_e dtlm);
+size_t ZSTD_compressBlock_doubleFast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_doubleFast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_doubleFast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+
+
+#endif /* ZSTD_DOUBLE_FAST_H */
diff --git a/lib/zstd/compress/zstd_fast.c b/lib/zstd/compress/zstd_fast.c
new file mode 100644 (file)
index 0000000..96b7d48
--- /dev/null
@@ -0,0 +1,496 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd_compress_internal.h"  /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
+#include "zstd_fast.h"
+
+
+void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+                        const void* const end,
+                        ZSTD_dictTableLoadMethod_e dtlm)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32  const hBits = cParams->hashLog;
+    U32  const mls = cParams->minMatch;
+    const BYTE* const base = ms->window.base;
+    const BYTE* ip = base + ms->nextToUpdate;
+    const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
+    const U32 fastHashFillStep = 3;
+
+    /* Always insert every fastHashFillStep position into the hash table.
+     * Insert the other positions if their hash entry is empty.
+     */
+    for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
+        U32 const curr = (U32)(ip - base);
+        size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
+        hashTable[hash0] = curr;
+        if (dtlm == ZSTD_dtlm_fast) continue;
+        /* Only load extra positions for ZSTD_dtlm_full */
+        {   U32 p;
+            for (p = 1; p < fastHashFillStep; ++p) {
+                size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
+                if (hashTable[hash] == 0) {  /* not yet filled */
+                    hashTable[hash] = curr + p;
+    }   }   }   }
+}
+
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_fast_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize,
+        U32 const mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32 const hlog = cParams->hashLog;
+    /* support stepSize of 0 */
+    size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
+    const BYTE* ip0 = istart;
+    const BYTE* ip1;
+    const BYTE* anchor = istart;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
+    ip0 += (ip0 == prefixStart);
+    ip1 = ip0 + 1;
+    {   U32 const curr = (U32)(ip0 - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
+        U32 const maxRep = curr - windowLow;
+        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
+    }
+
+    /* Main Search Loop */
+#ifdef __INTEL_COMPILER
+    /* From intel 'The vector pragma indicates that the loop should be
+     * vectorized if it is legal to do so'. Can be used together with
+     * #pragma ivdep (but have opted to exclude that because intel
+     * warns against using it).*/
+    #pragma vector always
+#endif
+    while (ip1 < ilimit) {   /* < instead of <=, because check at ip0+2 */
+        size_t mLength;
+        BYTE const* ip2 = ip0 + 2;
+        size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
+        U32 const val0 = MEM_read32(ip0);
+        size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
+        U32 const val1 = MEM_read32(ip1);
+        U32 const current0 = (U32)(ip0-base);
+        U32 const current1 = (U32)(ip1-base);
+        U32 const matchIndex0 = hashTable[h0];
+        U32 const matchIndex1 = hashTable[h1];
+        BYTE const* repMatch = ip2 - offset_1;
+        const BYTE* match0 = base + matchIndex0;
+        const BYTE* match1 = base + matchIndex1;
+        U32 offcode;
+
+#if defined(__aarch64__)
+        PREFETCH_L1(ip0+256);
+#endif
+
+        hashTable[h0] = current0;   /* update hash table */
+        hashTable[h1] = current1;   /* update hash table */
+
+        assert(ip0 + 1 == ip1);
+
+        if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
+            mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
+            ip0 = ip2 - mLength;
+            match0 = repMatch - mLength;
+            mLength += 4;
+            offcode = 0;
+            goto _match;
+        }
+        if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
+            /* found a regular match */
+            goto _offset;
+        }
+        if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
+            /* found a regular match after one literal */
+            ip0 = ip1;
+            match0 = match1;
+            goto _offset;
+        }
+        {   size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
+            assert(step >= 2);
+            ip0 += step;
+            ip1 += step;
+            continue;
+        }
+_offset: /* Requires: ip0, match0 */
+        /* Compute the offset code */
+        offset_2 = offset_1;
+        offset_1 = (U32)(ip0-match0);
+        offcode = offset_1 + ZSTD_REP_MOVE;
+        mLength = 4;
+        /* Count the backwards match length */
+        while (((ip0>anchor) & (match0>prefixStart))
+             && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
+
+_match: /* Requires: ip0, match0, offcode */
+        /* Count the forward length */
+        mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
+        ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
+        /* match found */
+        ip0 += mLength;
+        anchor = ip0;
+
+        if (ip0 <= ilimit) {
+            /* Fill Table */
+            assert(base+current0+2 > istart);  /* check base overflow */
+            hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
+            hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
+
+            if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
+                while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
+                    /* store sequence */
+                    size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
+                    { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
+                    hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
+                    ip0 += rLength;
+                    ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
+                    anchor = ip0;
+                    continue;   /* faster when present (confirmed on gcc-8) ... (?) */
+        }   }   }
+        ip1 = ip0 + 1;
+    }
+
+    /* save reps for next block */
+    rep[0] = offset_1 ? offset_1 : offsetSaved;
+    rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_fast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    assert(ms->dictMatchState == NULL);
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_fast_dictMatchState_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize, U32 const mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32 const hlog = cParams->hashLog;
+    /* support stepSize of 0 */
+    U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32   prefixStartIndex = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const ZSTD_compressionParameters* const dictCParams = &dms->cParams ;
+    const U32* const dictHashTable = dms->hashTable;
+    const U32 dictStartIndex       = dms->window.dictLimit;
+    const BYTE* const dictBase     = dms->window.base;
+    const BYTE* const dictStart    = dictBase + dictStartIndex;
+    const BYTE* const dictEnd      = dms->window.nextSrc;
+    const U32 dictIndexDelta       = prefixStartIndex - (U32)(dictEnd - dictBase);
+    const U32 dictAndPrefixLength  = (U32)(ip - prefixStart + dictEnd - dictStart);
+    const U32 dictHLog             = dictCParams->hashLog;
+
+    /* if a dictionary is still attached, it necessarily means that
+     * it is within window size. So we just check it. */
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 endIndex = (U32)((size_t)(ip - base) + srcSize);
+    assert(endIndex - prefixStartIndex <= maxDistance);
+    (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
+
+    /* ensure there will be no underflow
+     * when translating a dict index into a local index */
+    assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
+
+    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic");
+    ip += (dictAndPrefixLength == 0);
+    /* dictMatchState repCode checks don't currently handle repCode == 0
+     * disabling. */
+    assert(offset_1 <= dictAndPrefixLength);
+    assert(offset_2 <= dictAndPrefixLength);
+
+    /* Main Search Loop */
+    while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
+        size_t mLength;
+        size_t const h = ZSTD_hashPtr(ip, hlog, mls);
+        U32 const curr = (U32)(ip-base);
+        U32 const matchIndex = hashTable[h];
+        const BYTE* match = base + matchIndex;
+        const U32 repIndex = curr + 1 - offset_1;
+        const BYTE* repMatch = (repIndex < prefixStartIndex) ?
+                               dictBase + (repIndex - dictIndexDelta) :
+                               base + repIndex;
+        hashTable[h] = curr;   /* update hash table */
+
+        if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
+          && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+            mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+        } else if ( (matchIndex <= prefixStartIndex) ) {
+            size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
+            U32 const dictMatchIndex = dictHashTable[dictHash];
+            const BYTE* dictMatch = dictBase + dictMatchIndex;
+            if (dictMatchIndex <= dictStartIndex ||
+                MEM_read32(dictMatch) != MEM_read32(ip)) {
+                assert(stepSize >= 1);
+                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+                continue;
+            } else {
+                /* found a dict match */
+                U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
+                mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
+                while (((ip>anchor) & (dictMatch>dictStart))
+                     && (ip[-1] == dictMatch[-1])) {
+                    ip--; dictMatch--; mLength++;
+                } /* catch up */
+                offset_2 = offset_1;
+                offset_1 = offset;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+            }
+        } else if (MEM_read32(match) != MEM_read32(ip)) {
+            /* it's not a match, and we're not going to check the dictionary */
+            assert(stepSize >= 1);
+            ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+            continue;
+        } else {
+            /* found a regular match */
+            U32 const offset = (U32)(ip-match);
+            mLength = ZSTD_count(ip+4, match+4, iend) + 4;
+            while (((ip>anchor) & (match>prefixStart))
+                 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
+            offset_2 = offset_1;
+            offset_1 = offset;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+        }
+
+        /* match found */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Fill Table */
+            assert(base+curr+2 > istart);  /* check base overflow */
+            hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;  /* here because curr+2 could be > iend-8 */
+            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
+
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
+                        dictBase - dictIndexDelta + repIndex2 :
+                        base + repIndex2;
+                if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
+                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+            }
+        }
+    }
+
+    /* save reps for next block */
+    rep[0] = offset_1 ? offset_1 : offsetSaved;
+    rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+size_t ZSTD_compressBlock_fast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    assert(ms->dictMatchState != NULL);
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
+
+
+static size_t ZSTD_compressBlock_fast_extDict_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize, U32 const mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32 const hlog = cParams->hashLog;
+    /* support stepSize of 0 */
+    U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
+    const U32   lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog);
+    const U32   dictStartIndex = lowLimit;
+    const BYTE* const dictStart = dictBase + dictStartIndex;
+    const U32   dictLimit = ms->window.dictLimit;
+    const U32   prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit;
+    const BYTE* const prefixStart = base + prefixStartIndex;
+    const BYTE* const dictEnd = dictBase + prefixStartIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    U32 offset_1=rep[0], offset_2=rep[1];
+
+    DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
+
+    /* switch to "regular" variant if extDict is invalidated due to maxDistance */
+    if (prefixStartIndex == dictStartIndex)
+        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
+
+    /* Search Loop */
+    while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
+        const size_t h = ZSTD_hashPtr(ip, hlog, mls);
+        const U32    matchIndex = hashTable[h];
+        const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
+        const BYTE*  match = matchBase + matchIndex;
+        const U32    curr = (U32)(ip-base);
+        const U32    repIndex = curr + 1 - offset_1;
+        const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
+        const BYTE* const repMatch = repBase + repIndex;
+        hashTable[h] = curr;   /* update hash table */
+        DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
+        assert(offset_1 <= curr +1);   /* check repIndex */
+
+        if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
+           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+            const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
+            size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
+            ip++;
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
+            ip += rLength;
+            anchor = ip;
+        } else {
+            if ( (matchIndex < dictStartIndex) ||
+                 (MEM_read32(match) != MEM_read32(ip)) ) {
+                assert(stepSize >= 1);
+                ip += ((ip-anchor) >> kSearchStrength) + stepSize;
+                continue;
+            }
+            {   const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
+                const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
+                U32 const offset = curr - matchIndex;
+                size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
+                while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
+                offset_2 = offset_1; offset_1 = offset;  /* update offset history */
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ip += mLength;
+                anchor = ip;
+        }   }
+
+        if (ip <= ilimit) {
+            /* Fill Table */
+            hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
+            hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
+            /* check immediate repcode */
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
+                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex))  /* intentional overflow */
+                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
+                    { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
+                    hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
+                    ip += repLength2;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+    }   }   }
+
+    /* save reps for next block */
+    rep[0] = offset_1;
+    rep[1] = offset_2;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_fast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    U32 const mls = ms->cParams.minMatch;
+    switch(mls)
+    {
+    default: /* includes case 3 */
+    case 4 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
+    case 5 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
+    case 6 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
+    case 7 :
+        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
+    }
+}
diff --git a/lib/zstd/compress/zstd_fast.h b/lib/zstd/compress/zstd_fast.h
new file mode 100644 (file)
index 0000000..fddc2f5
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_FAST_H
+#define ZSTD_FAST_H
+
+
+#include "../common/mem.h"      /* U32 */
+#include "zstd_compress_internal.h"
+
+void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
+                        void const* end, ZSTD_dictTableLoadMethod_e dtlm);
+size_t ZSTD_compressBlock_fast(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_fast_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_fast_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+
+#endif /* ZSTD_FAST_H */
diff --git a/lib/zstd/compress/zstd_lazy.c b/lib/zstd/compress/zstd_lazy.c
new file mode 100644 (file)
index 0000000..fb54d4e
--- /dev/null
@@ -0,0 +1,1414 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd_compress_internal.h"
+#include "zstd_lazy.h"
+
+
+/*-*************************************
+*  Binary Tree search
+***************************************/
+
+static void
+ZSTD_updateDUBT(ZSTD_matchState_t* ms,
+                const BYTE* ip, const BYTE* iend,
+                U32 mls)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const hashTable = ms->hashTable;
+    U32  const hashLog = cParams->hashLog;
+
+    U32* const bt = ms->chainTable;
+    U32  const btLog  = cParams->chainLog - 1;
+    U32  const btMask = (1 << btLog) - 1;
+
+    const BYTE* const base = ms->window.base;
+    U32 const target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+
+    if (idx != target)
+        DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
+                    idx, target, ms->window.dictLimit);
+    assert(ip + 8 <= iend);   /* condition for ZSTD_hashPtr */
+    (void)iend;
+
+    assert(idx >= ms->window.dictLimit);   /* condition for valid base+idx */
+    for ( ; idx < target ; idx++) {
+        size_t const h  = ZSTD_hashPtr(base + idx, hashLog, mls);   /* assumption : ip + 8 <= iend */
+        U32    const matchIndex = hashTable[h];
+
+        U32*   const nextCandidatePtr = bt + 2*(idx&btMask);
+        U32*   const sortMarkPtr  = nextCandidatePtr + 1;
+
+        DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
+        hashTable[h] = idx;   /* Update Hash Table */
+        *nextCandidatePtr = matchIndex;   /* update BT like a chain */
+        *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
+    }
+    ms->nextToUpdate = target;
+}
+
+
+/* ZSTD_insertDUBT1() :
+ *  sort one already inserted but unsorted position
+ *  assumption : curr >= btlow == (curr - btmask)
+ *  doesn't fail */
+static void
+ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
+                 U32 curr, const BYTE* inputEnd,
+                 U32 nbCompares, U32 btLow,
+                 const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const bt = ms->chainTable;
+    U32  const btLog  = cParams->chainLog - 1;
+    U32  const btMask = (1 << btLog) - 1;
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
+    const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* match;
+    U32* smallerPtr = bt + 2*(curr&btMask);
+    U32* largerPtr  = smallerPtr + 1;
+    U32 matchIndex = *smallerPtr;   /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
+    U32 dummy32;   /* to be nullified at the end */
+    U32 const windowValid = ms->window.lowLimit;
+    U32 const maxDistance = 1U << cParams->windowLog;
+    U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
+
+
+    DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
+                curr, dictLimit, windowLow);
+    assert(curr >= btLow);
+    assert(ip < iend);   /* condition for ZSTD_count */
+
+    for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        assert(matchIndex < curr);
+        /* note : all candidates are now supposed sorted,
+         * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
+         * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
+
+        if ( (dictMode != ZSTD_extDict)
+          || (matchIndex+matchLength >= dictLimit)  /* both in current segment*/
+          || (curr < dictLimit) /* both in extDict */) {
+            const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
+                                     || (matchIndex+matchLength >= dictLimit)) ?
+                                        base : dictBase;
+            assert( (matchIndex+matchLength >= dictLimit)   /* might be wrong if extDict is incorrectly set to 0 */
+                 || (curr < dictLimit) );
+            match = mBase + matchIndex;
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+        } else {
+            match = dictBase + matchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* preparation for next read of match[matchLength] */
+        }
+
+        DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
+                    curr, matchIndex, (U32)matchLength);
+
+        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
+            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
+        }
+
+        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
+                        matchIndex, btLow, nextPtr[1]);
+            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
+            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
+        } else {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
+                        matchIndex, btLow, nextPtr[0]);
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+}
+
+
+static size_t
+ZSTD_DUBT_findBetterDictMatch (
+        ZSTD_matchState_t* ms,
+        const BYTE* const ip, const BYTE* const iend,
+        size_t* offsetPtr,
+        size_t bestLength,
+        U32 nbCompares,
+        U32 const mls,
+        const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_matchState_t * const dms = ms->dictMatchState;
+    const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
+    const U32 * const dictHashTable = dms->hashTable;
+    U32         const hashLog = dmsCParams->hashLog;
+    size_t      const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32               dictMatchIndex = dictHashTable[h];
+
+    const BYTE* const base = ms->window.base;
+    const BYTE* const prefixStart = base + ms->window.dictLimit;
+    U32         const curr = (U32)(ip-base);
+    const BYTE* const dictBase = dms->window.base;
+    const BYTE* const dictEnd = dms->window.nextSrc;
+    U32         const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
+    U32         const dictLowLimit = dms->window.lowLimit;
+    U32         const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
+
+    U32*        const dictBt = dms->chainTable;
+    U32         const btLog  = dmsCParams->chainLog - 1;
+    U32         const btMask = (1 << btLog) - 1;
+    U32         const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
+
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+
+    (void)dictMode;
+    assert(dictMode == ZSTD_dictMatchState);
+
+    for (; nbCompares && (dictMatchIndex > dictLowLimit); --nbCompares) {
+        U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        const BYTE* match = dictBase + dictMatchIndex;
+        matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+        if (dictMatchIndex+matchLength >= dictHighLimit)
+            match = base + dictMatchIndex + dictIndexDelta;   /* to prepare for next usage of match[matchLength] */
+
+        if (matchLength > bestLength) {
+            U32 matchIndex = dictMatchIndex + dictIndexDelta;
+            if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
+                DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
+                    curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
+                bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
+            }
+            if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
+                break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+            }
+        }
+
+        if (match[matchLength] < ip[matchLength]) {
+            if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+        } else {
+            /* match is larger than current */
+            if (dictMatchIndex <= btLow) { break; }   /* beyond tree size, stop the search */
+            commonLengthLarger = matchLength;
+            dictMatchIndex = nextPtr[0];
+        }
+    }
+
+    if (bestLength >= MINMATCH) {
+        U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+        DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
+                    curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
+    }
+    return bestLength;
+
+}
+
+
+static size_t
+ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
+                        const BYTE* const ip, const BYTE* const iend,
+                        size_t* offsetPtr,
+                        U32 const mls,
+                        const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32*   const hashTable = ms->hashTable;
+    U32    const hashLog = cParams->hashLog;
+    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32          matchIndex  = hashTable[h];
+
+    const BYTE* const base = ms->window.base;
+    U32    const curr = (U32)(ip-base);
+    U32    const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
+
+    U32*   const bt = ms->chainTable;
+    U32    const btLog  = cParams->chainLog - 1;
+    U32    const btMask = (1 << btLog) - 1;
+    U32    const btLow = (btMask >= curr) ? 0 : curr - btMask;
+    U32    const unsortLimit = MAX(btLow, windowLow);
+
+    U32*         nextCandidate = bt + 2*(matchIndex&btMask);
+    U32*         unsortedMark = bt + 2*(matchIndex&btMask) + 1;
+    U32          nbCompares = 1U << cParams->searchLog;
+    U32          nbCandidates = nbCompares;
+    U32          previousCandidate = 0;
+
+    DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
+    assert(ip <= iend-8);   /* required for h calculation */
+    assert(dictMode != ZSTD_dedicatedDictSearch);
+
+    /* reach end of unsorted candidates list */
+    while ( (matchIndex > unsortLimit)
+         && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
+         && (nbCandidates > 1) ) {
+        DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
+                    matchIndex);
+        *unsortedMark = previousCandidate;  /* the unsortedMark becomes a reversed chain, to move up back to original position */
+        previousCandidate = matchIndex;
+        matchIndex = *nextCandidate;
+        nextCandidate = bt + 2*(matchIndex&btMask);
+        unsortedMark = bt + 2*(matchIndex&btMask) + 1;
+        nbCandidates --;
+    }
+
+    /* nullify last candidate if it's still unsorted
+     * simplification, detrimental to compression ratio, beneficial for speed */
+    if ( (matchIndex > unsortLimit)
+      && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
+        DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
+                    matchIndex);
+        *nextCandidate = *unsortedMark = 0;
+    }
+
+    /* batch sort stacked candidates */
+    matchIndex = previousCandidate;
+    while (matchIndex) {  /* will end on matchIndex == 0 */
+        U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
+        U32 const nextCandidateIdx = *nextCandidateIdxPtr;
+        ZSTD_insertDUBT1(ms, matchIndex, iend,
+                         nbCandidates, unsortLimit, dictMode);
+        matchIndex = nextCandidateIdx;
+        nbCandidates++;
+    }
+
+    /* find longest match */
+    {   size_t commonLengthSmaller = 0, commonLengthLarger = 0;
+        const BYTE* const dictBase = ms->window.dictBase;
+        const U32 dictLimit = ms->window.dictLimit;
+        const BYTE* const dictEnd = dictBase + dictLimit;
+        const BYTE* const prefixStart = base + dictLimit;
+        U32* smallerPtr = bt + 2*(curr&btMask);
+        U32* largerPtr  = bt + 2*(curr&btMask) + 1;
+        U32 matchEndIdx = curr + 8 + 1;
+        U32 dummy32;   /* to be nullified at the end */
+        size_t bestLength = 0;
+
+        matchIndex  = hashTable[h];
+        hashTable[h] = curr;   /* Update Hash Table */
+
+        for (; nbCompares && (matchIndex > windowLow); --nbCompares) {
+            U32* const nextPtr = bt + 2*(matchIndex & btMask);
+            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+            const BYTE* match;
+
+            if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
+                match = base + matchIndex;
+                matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+            } else {
+                match = dictBase + matchIndex;
+                matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+                if (matchIndex+matchLength >= dictLimit)
+                    match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+            }
+
+            if (matchLength > bestLength) {
+                if (matchLength > matchEndIdx - matchIndex)
+                    matchEndIdx = matchIndex + (U32)matchLength;
+                if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
+                    bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
+                if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
+                    if (dictMode == ZSTD_dictMatchState) {
+                        nbCompares = 0; /* in addition to avoiding checking any
+                                         * further in this loop, make sure we
+                                         * skip checking in the dictionary. */
+                    }
+                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+                }
+            }
+
+            if (match[matchLength] < ip[matchLength]) {
+                /* match is smaller than current */
+                *smallerPtr = matchIndex;             /* update smaller idx */
+                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+                if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+                smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+                matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+            } else {
+                /* match is larger than current */
+                *largerPtr = matchIndex;
+                commonLengthLarger = matchLength;
+                if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+                largerPtr = nextPtr;
+                matchIndex = nextPtr[0];
+        }   }
+
+        *smallerPtr = *largerPtr = 0;
+
+        assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
+        if (dictMode == ZSTD_dictMatchState && nbCompares) {
+            bestLength = ZSTD_DUBT_findBetterDictMatch(
+                    ms, ip, iend,
+                    offsetPtr, bestLength, nbCompares,
+                    mls, dictMode);
+        }
+
+        assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
+        ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
+        if (bestLength >= MINMATCH) {
+            U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+            DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
+                        curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
+        }
+        return bestLength;
+    }
+}
+
+
+/* ZSTD_BtFindBestMatch() : Tree updater, providing best match */
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
+                const BYTE* const ip, const BYTE* const iLimit,
+                      size_t* offsetPtr,
+                const U32 mls /* template */,
+                const ZSTD_dictMode_e dictMode)
+{
+    DEBUGLOG(7, "ZSTD_BtFindBestMatch");
+    if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
+    ZSTD_updateDUBT(ms, ip, iLimit, mls);
+    return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
+}
+
+
+static size_t
+ZSTD_BtFindBestMatch_selectMLS (  ZSTD_matchState_t* ms,
+                            const BYTE* ip, const BYTE* const iLimit,
+                                  size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
+    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
+    case 7 :
+    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
+    }
+}
+
+
+static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
+    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
+    case 7 :
+    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
+    }
+}
+
+
+static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
+    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
+    case 7 :
+    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
+    }
+}
+
+
+
+/* *********************************
+*  Hash Chain
+***********************************/
+#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
+
+/* Update chains up to ip (excluded)
+   Assumption : always within prefix (i.e. not within extDict) */
+FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
+                        ZSTD_matchState_t* ms,
+                        const ZSTD_compressionParameters* const cParams,
+                        const BYTE* ip, U32 const mls)
+{
+    U32* const hashTable  = ms->hashTable;
+    const U32 hashLog = cParams->hashLog;
+    U32* const chainTable = ms->chainTable;
+    const U32 chainMask = (1 << cParams->chainLog) - 1;
+    const BYTE* const base = ms->window.base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+
+    while(idx < target) { /* catch up */
+        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
+        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
+        hashTable[h] = idx;
+        idx++;
+    }
+
+    ms->nextToUpdate = target;
+    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
+}
+
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
+}
+
+void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
+{
+    const BYTE* const base = ms->window.base;
+    U32 const target = (U32)(ip - base);
+    U32* const hashTable = ms->hashTable;
+    U32* const chainTable = ms->chainTable;
+    U32 const chainSize = 1 << ms->cParams.chainLog;
+    U32 idx = ms->nextToUpdate;
+    U32 const minChain = chainSize < target ? target - chainSize : idx;
+    U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
+    U32 const cacheSize = bucketSize - 1;
+    U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
+    U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
+
+    /* We know the hashtable is oversized by a factor of `bucketSize`.
+     * We are going to temporarily pretend `bucketSize == 1`, keeping only a
+     * single entry. We will use the rest of the space to construct a temporary
+     * chaintable.
+     */
+    U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
+    U32* const tmpHashTable = hashTable;
+    U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
+    U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
+    U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
+
+    U32 hashIdx;
+
+    assert(ms->cParams.chainLog <= 24);
+    assert(ms->cParams.hashLog >= ms->cParams.chainLog);
+    assert(idx != 0);
+    assert(tmpMinChain <= minChain);
+
+    /* fill conventional hash table and conventional chain table */
+    for ( ; idx < target; idx++) {
+        U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
+        if (idx >= tmpMinChain) {
+            tmpChainTable[idx - tmpMinChain] = hashTable[h];
+        }
+        tmpHashTable[h] = idx;
+    }
+
+    /* sort chains into ddss chain table */
+    {
+        U32 chainPos = 0;
+        for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
+            U32 count;
+            U32 countBeyondMinChain = 0;
+            U32 i = tmpHashTable[hashIdx];
+            for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
+                /* skip through the chain to the first position that won't be
+                 * in the hash cache bucket */
+                if (i < minChain) {
+                    countBeyondMinChain++;
+                }
+                i = tmpChainTable[i - tmpMinChain];
+            }
+            if (count == cacheSize) {
+                for (count = 0; count < chainLimit;) {
+                    if (i < minChain) {
+                        if (!i || countBeyondMinChain++ > cacheSize) {
+                            /* only allow pulling `cacheSize` number of entries
+                             * into the cache or chainTable beyond `minChain`,
+                             * to replace the entries pulled out of the
+                             * chainTable into the cache. This lets us reach
+                             * back further without increasing the total number
+                             * of entries in the chainTable, guaranteeing the
+                             * DDSS chain table will fit into the space
+                             * allocated for the regular one. */
+                            break;
+                        }
+                    }
+                    chainTable[chainPos++] = i;
+                    count++;
+                    if (i < tmpMinChain) {
+                        break;
+                    }
+                    i = tmpChainTable[i - tmpMinChain];
+                }
+            } else {
+                count = 0;
+            }
+            if (count) {
+                tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
+            } else {
+                tmpHashTable[hashIdx] = 0;
+            }
+        }
+        assert(chainPos <= chainSize); /* I believe this is guaranteed... */
+    }
+
+    /* move chain pointers into the last entry of each hash bucket */
+    for (hashIdx = (1 << hashLog); hashIdx; ) {
+        U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
+        U32 const chainPackedPointer = tmpHashTable[hashIdx];
+        U32 i;
+        for (i = 0; i < cacheSize; i++) {
+            hashTable[bucketIdx + i] = 0;
+        }
+        hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
+    }
+
+    /* fill the buckets of the hash table */
+    for (idx = ms->nextToUpdate; idx < target; idx++) {
+        U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
+                   << ZSTD_LAZY_DDSS_BUCKET_LOG;
+        U32 i;
+        /* Shift hash cache down 1. */
+        for (i = cacheSize - 1; i; i--)
+            hashTable[h + i] = hashTable[h + i - 1];
+        hashTable[h] = idx;
+    }
+
+    ms->nextToUpdate = target;
+}
+
+
+/* inlining is important to hardwire a hot branch (template emulation) */
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_HcFindBestMatch_generic (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 mls, const ZSTD_dictMode_e dictMode)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32* const chainTable = ms->chainTable;
+    const U32 chainSize = (1 << cParams->chainLog);
+    const U32 chainMask = chainSize-1;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const U32 curr = (U32)(ip-base);
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 lowestValid = ms->window.lowLimit;
+    const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    const U32 isDictionary = (ms->loadedDictEnd != 0);
+    const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
+    const U32 minChain = curr > chainSize ? curr - chainSize : 0;
+    U32 nbAttempts = 1U << cParams->searchLog;
+    size_t ml=4-1;
+
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
+                         ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
+    const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
+                        ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
+
+    U32 matchIndex;
+
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        const U32* entry = &dms->hashTable[ddsIdx];
+        PREFETCH_L1(entry);
+    }
+
+    /* HC4 match finder */
+    matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
+
+    for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
+        size_t currentMl=0;
+        if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+            const BYTE* const match = base + matchIndex;
+            assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
+            if (match[ml] == ip[ml])   /* potentially better */
+                currentMl = ZSTD_count(ip, match, iLimit);
+        } else {
+            const BYTE* const match = dictBase + matchIndex;
+            assert(match+4 <= dictEnd);
+            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
+        }
+
+        /* save best solution */
+        if (currentMl > ml) {
+            ml = currentMl;
+            *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
+            if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+        }
+
+        if (matchIndex <= minChain) break;
+        matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
+    }
+
+    assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        const U32 ddsLowestIndex  = dms->window.dictLimit;
+        const BYTE* const ddsBase = dms->window.base;
+        const BYTE* const ddsEnd  = dms->window.nextSrc;
+        const U32 ddsSize         = (U32)(ddsEnd - ddsBase);
+        const U32 ddsIndexDelta   = dictLimit - ddsSize;
+        const U32 bucketSize      = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
+        const U32 bucketLimit     = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
+        U32 ddsAttempt;
+
+        for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
+            PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
+        }
+
+        {
+            U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+            U32 const chainIndex = chainPackedPointer >> 8;
+
+            PREFETCH_L1(&dms->chainTable[chainIndex]);
+        }
+
+        for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
+            size_t currentMl=0;
+            const BYTE* match;
+            matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
+            match = ddsBase + matchIndex;
+
+            if (!matchIndex) {
+                return ml;
+            }
+
+            /* guaranteed by table construction */
+            (void)ddsLowestIndex;
+            assert(matchIndex >= ddsLowestIndex);
+            assert(match+4 <= ddsEnd);
+            if (MEM_read32(match) == MEM_read32(ip)) {
+                /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+            }
+
+            /* save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+                if (ip+currentMl == iLimit) {
+                    /* best possible, avoids read overflow on next attempt */
+                    return ml;
+                }
+            }
+        }
+
+        {
+            U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+            U32 chainIndex = chainPackedPointer >> 8;
+            U32 const chainLength = chainPackedPointer & 0xFF;
+            U32 const chainAttempts = nbAttempts - ddsAttempt;
+            U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
+            U32 chainAttempt;
+
+            for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
+                PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
+            }
+
+            for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
+                size_t currentMl=0;
+                const BYTE* match;
+                matchIndex = dms->chainTable[chainIndex];
+                match = ddsBase + matchIndex;
+
+                /* guaranteed by table construction */
+                assert(matchIndex >= ddsLowestIndex);
+                assert(match+4 <= ddsEnd);
+                if (MEM_read32(match) == MEM_read32(ip)) {
+                    /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                    currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+                }
+
+                /* save best solution */
+                if (currentMl > ml) {
+                    ml = currentMl;
+                    *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
+                    if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+                }
+            }
+        }
+    } else if (dictMode == ZSTD_dictMatchState) {
+        const U32* const dmsChainTable = dms->chainTable;
+        const U32 dmsChainSize         = (1 << dms->cParams.chainLog);
+        const U32 dmsChainMask         = dmsChainSize - 1;
+        const U32 dmsLowestIndex       = dms->window.dictLimit;
+        const BYTE* const dmsBase      = dms->window.base;
+        const BYTE* const dmsEnd       = dms->window.nextSrc;
+        const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
+        const U32 dmsIndexDelta        = dictLimit - dmsSize;
+        const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
+
+        matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
+
+        for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
+            size_t currentMl=0;
+            const BYTE* const match = dmsBase + matchIndex;
+            assert(match+4 <= dmsEnd);
+            if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
+
+            /* save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
+                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+            }
+
+            if (matchIndex <= dmsMinChain) break;
+
+            matchIndex = dmsChainTable[matchIndex & dmsChainMask];
+        }
+    }
+
+    return ml;
+}
+
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
+    }
+}
+
+
+static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
+    }
+}
+
+
+static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
+    }
+}
+
+
+FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* const iLimit,
+                        size_t* offsetPtr)
+{
+    switch(ms->cParams.minMatch)
+    {
+    default : /* includes case 3 */
+    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
+    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
+    case 7 :
+    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
+    }
+}
+
+
+/* *******************************
+*  Common parser - lazy strategy
+*********************************/
+typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_lazy_generic(
+                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
+                        U32 rep[ZSTD_REP_NUM],
+                        const void* src, size_t srcSize,
+                        const searchMethod_e searchMethod, const U32 depth,
+                        ZSTD_dictMode_e const dictMode)
+{
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const U32 prefixLowestIndex = ms->window.dictLimit;
+    const BYTE* const prefixLowest = base + prefixLowestIndex;
+
+    typedef size_t (*searchMax_f)(
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
+
+    /*
+     * This table is indexed first by the four ZSTD_dictMode_e values, and then
+     * by the two searchMethod_e values. NULLs are placed for configurations
+     * that should never occur (extDict modes go to the other implementation
+     * below and there is no DDSS for binary tree search yet).
+     */
+    const searchMax_f searchFuncs[4][2] = {
+        {
+            ZSTD_HcFindBestMatch_selectMLS,
+            ZSTD_BtFindBestMatch_selectMLS
+        },
+        {
+            NULL,
+            NULL
+        },
+        {
+            ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
+            ZSTD_BtFindBestMatch_dictMatchState_selectMLS
+        },
+        {
+            ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
+            NULL
+        }
+    };
+
+    searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
+    U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
+
+    const int isDMS = dictMode == ZSTD_dictMatchState;
+    const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
+    const int isDxS = isDMS || isDDS;
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+    const U32 dictLowestIndex      = isDxS ? dms->window.dictLimit : 0;
+    const BYTE* const dictBase     = isDxS ? dms->window.base : NULL;
+    const BYTE* const dictLowest   = isDxS ? dictBase + dictLowestIndex : NULL;
+    const BYTE* const dictEnd      = isDxS ? dms->window.nextSrc : NULL;
+    const U32 dictIndexDelta       = isDxS ?
+                                     prefixLowestIndex - (U32)(dictEnd - dictBase) :
+                                     0;
+    const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
+
+    assert(searchMax != NULL);
+
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
+
+    /* init */
+    ip += (dictAndPrefixLength == 0);
+    if (dictMode == ZSTD_noDict) {
+        U32 const curr = (U32)(ip - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
+        U32 const maxRep = curr - windowLow;
+        if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
+    }
+    if (isDxS) {
+        /* dictMatchState repCode checks don't currently handle repCode == 0
+         * disabling. */
+        assert(offset_1 <= dictAndPrefixLength);
+        assert(offset_2 <= dictAndPrefixLength);
+    }
+
+    /* Match Loop */
+#if defined(__x86_64__)
+    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
+     * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
+     */
+    __asm__(".p2align 5");
+#endif
+    while (ip < ilimit) {
+        size_t matchLength=0;
+        size_t offset=0;
+        const BYTE* start=ip+1;
+
+        /* check repCode */
+        if (isDxS) {
+            const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
+            const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
+                                && repIndex < prefixLowestIndex) ?
+                                   dictBase + (repIndex - dictIndexDelta) :
+                                   base + repIndex;
+            if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
+                const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                if (depth==0) goto _storeSequence;
+            }
+        }
+        if ( dictMode == ZSTD_noDict
+          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
+            matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
+            if (depth==0) goto _storeSequence;
+        }
+
+        /* first search (depth 0) */
+        {   size_t offsetFound = 999999999;
+            size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
+            if (ml2 > matchLength)
+                matchLength = ml2, start = ip, offset=offsetFound;
+        }
+
+        if (matchLength < 4) {
+            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
+            continue;
+        }
+
+        /* let's try to find a better solution */
+        if (depth>=1)
+        while (ip<ilimit) {
+            ip ++;
+            if ( (dictMode == ZSTD_noDict)
+              && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
+                int const gain2 = (int)(mlRep * 3);
+                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                if ((mlRep >= 4) && (gain2 > gain1))
+                    matchLength = mlRep, offset = 0, start = ip;
+            }
+            if (isDxS) {
+                const U32 repIndex = (U32)(ip - base) - offset_1;
+                const BYTE* repMatch = repIndex < prefixLowestIndex ?
+                               dictBase + (repIndex - dictIndexDelta) :
+                               base + repIndex;
+                if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
+                    const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                    size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                    int const gain2 = (int)(mlRep * 3);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                    if ((mlRep >= 4) && (gain2 > gain1))
+                        matchLength = mlRep, offset = 0, start = ip;
+                }
+            }
+            {   size_t offset2=999999999;
+                size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
+                if ((ml2 >= 4) && (gain2 > gain1)) {
+                    matchLength = ml2, offset = offset2, start = ip;
+                    continue;   /* search a better one */
+            }   }
+
+            /* let's find an even better one */
+            if ((depth==2) && (ip<ilimit)) {
+                ip ++;
+                if ( (dictMode == ZSTD_noDict)
+                  && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                    size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
+                    int const gain2 = (int)(mlRep * 4);
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                    if ((mlRep >= 4) && (gain2 > gain1))
+                        matchLength = mlRep, offset = 0, start = ip;
+                }
+                if (isDxS) {
+                    const U32 repIndex = (U32)(ip - base) - offset_1;
+                    const BYTE* repMatch = repIndex < prefixLowestIndex ?
+                                   dictBase + (repIndex - dictIndexDelta) :
+                                   base + repIndex;
+                    if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+                        && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
+                        const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
+                        size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
+                        int const gain2 = (int)(mlRep * 4);
+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                        if ((mlRep >= 4) && (gain2 > gain1))
+                            matchLength = mlRep, offset = 0, start = ip;
+                    }
+                }
+                {   size_t offset2=999999999;
+                    size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
+                    if ((ml2 >= 4) && (gain2 > gain1)) {
+                        matchLength = ml2, offset = offset2, start = ip;
+                        continue;
+            }   }   }
+            break;  /* nothing found : store previous solution */
+        }
+
+        /* NOTE:
+         * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
+         * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
+         * overflows the pointer, which is undefined behavior.
+         */
+        /* catch up */
+        if (offset) {
+            if (dictMode == ZSTD_noDict) {
+                while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
+                     && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )  /* only search for offset within prefix */
+                    { start--; matchLength++; }
+            }
+            if (isDxS) {
+                U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
+                const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
+                const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
+                while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
+            }
+            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
+        }
+        /* store sequence */
+_storeSequence:
+        {   size_t const litLength = start - anchor;
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
+            anchor = ip = start + matchLength;
+        }
+
+        /* check immediate repcode */
+        if (isDxS) {
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex = current2 - offset_2;
+                const BYTE* repMatch = repIndex < prefixLowestIndex ?
+                        dictBase - dictIndexDelta + repIndex :
+                        base + repIndex;
+                if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
+                   && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
+                    matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
+                    offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                    ip += matchLength;
+                    anchor = ip;
+                    continue;
+                }
+                break;
+            }
+        }
+
+        if (dictMode == ZSTD_noDict) {
+            while ( ((ip <= ilimit) & (offset_2>0))
+                 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
+                /* store sequence */
+                matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
+                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                ip += matchLength;
+                anchor = ip;
+                continue;   /* faster when present ... (?) */
+    }   }   }
+
+    /* Save reps for next block */
+    rep[0] = offset_1 ? offset_1 : savedOffset;
+    rep[1] = offset_2 ? offset_2 : savedOffset;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_btlazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_greedy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_lazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_lazy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_greedy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
+}
+
+
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
+}
+
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_lazy_extDict_generic(
+                        ZSTD_matchState_t* ms, seqStore_t* seqStore,
+                        U32 rep[ZSTD_REP_NUM],
+                        const void* src, size_t srcSize,
+                        const searchMethod_e searchMethod, const U32 depth)
+{
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const BYTE* const dictEnd  = dictBase + dictLimit;
+    const BYTE* const dictStart  = dictBase + ms->window.lowLimit;
+    const U32 windowLog = ms->cParams.windowLog;
+
+    typedef size_t (*searchMax_f)(
+                        ZSTD_matchState_t* ms,
+                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
+    searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
+
+    U32 offset_1 = rep[0], offset_2 = rep[1];
+
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
+
+    /* init */
+    ip += (ip == prefixStart);
+
+    /* Match Loop */
+#if defined(__x86_64__)
+    /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
+     * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
+     */
+    __asm__(".p2align 5");
+#endif
+    while (ip < ilimit) {
+        size_t matchLength=0;
+        size_t offset=0;
+        const BYTE* start=ip+1;
+        U32 curr = (U32)(ip-base);
+
+        /* check repCode */
+        {   const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
+            const U32 repIndex = (U32)(curr+1 - offset_1);
+            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+            const BYTE* const repMatch = repBase + repIndex;
+            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))   /* intentional overflow */
+            if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
+                /* repcode detected we should take it */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                if (depth==0) goto _storeSequence;
+        }   }
+
+        /* first search (depth 0) */
+        {   size_t offsetFound = 999999999;
+            size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
+            if (ml2 > matchLength)
+                matchLength = ml2, start = ip, offset=offsetFound;
+        }
+
+        if (matchLength < 4) {
+            ip += ((ip-anchor) >> kSearchStrength) + 1;   /* jump faster over incompressible sections */
+            continue;
+        }
+
+        /* let's try to find a better solution */
+        if (depth>=1)
+        while (ip<ilimit) {
+            ip ++;
+            curr++;
+            /* check repCode */
+            if (offset) {
+                const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
+                const U32 repIndex = (U32)(curr - offset_1);
+                const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+                const BYTE* const repMatch = repBase + repIndex;
+                if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+                if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                    /* repcode detected */
+                    const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                    size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                    int const gain2 = (int)(repLength * 3);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                    if ((repLength >= 4) && (gain2 > gain1))
+                        matchLength = repLength, offset = 0, start = ip;
+            }   }
+
+            /* search match, depth 1 */
+            {   size_t offset2=999999999;
+                size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
+                if ((ml2 >= 4) && (gain2 > gain1)) {
+                    matchLength = ml2, offset = offset2, start = ip;
+                    continue;   /* search a better one */
+            }   }
+
+            /* let's find an even better one */
+            if ((depth==2) && (ip<ilimit)) {
+                ip ++;
+                curr++;
+                /* check repCode */
+                if (offset) {
+                    const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
+                    const U32 repIndex = (U32)(curr - offset_1);
+                    const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+                    const BYTE* const repMatch = repBase + repIndex;
+                    if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+                    if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                        /* repcode detected */
+                        const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                        size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                        int const gain2 = (int)(repLength * 4);
+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                        if ((repLength >= 4) && (gain2 > gain1))
+                            matchLength = repLength, offset = 0, start = ip;
+                }   }
+
+                /* search match, depth 2 */
+                {   size_t offset2=999999999;
+                    size_t const ml2 = searchMax(ms, ip, iend, &offset2);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
+                    if ((ml2 >= 4) && (gain2 > gain1)) {
+                        matchLength = ml2, offset = offset2, start = ip;
+                        continue;
+            }   }   }
+            break;  /* nothing found : store previous solution */
+        }
+
+        /* catch up */
+        if (offset) {
+            U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
+            const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
+            const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
+            while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
+            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
+        }
+
+        /* store sequence */
+_storeSequence:
+        {   size_t const litLength = start - anchor;
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
+            anchor = ip = start + matchLength;
+        }
+
+        /* check immediate repcode */
+        while (ip <= ilimit) {
+            const U32 repCurrent = (U32)(ip-base);
+            const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
+            const U32 repIndex = repCurrent - offset_2;
+            const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
+            const BYTE* const repMatch = repBase + repIndex;
+            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+            if (MEM_read32(ip) == MEM_read32(repMatch)) {
+                /* repcode detected we should take it */
+                const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
+                matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
+                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                ip += matchLength;
+                anchor = ip;
+                continue;   /* faster when present ... (?) */
+            }
+            break;
+    }   }
+
+    /* Save reps for next block */
+    rep[0] = offset_1;
+    rep[1] = offset_2;
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_greedy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
+}
+
+size_t ZSTD_compressBlock_lazy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
+}
+
+size_t ZSTD_compressBlock_lazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
+}
+
+size_t ZSTD_compressBlock_btlazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
+}
diff --git a/lib/zstd/compress/zstd_lazy.h b/lib/zstd/compress/zstd_lazy.h
new file mode 100644 (file)
index 0000000..2fc5a61
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LAZY_H
+#define ZSTD_LAZY_H
+
+
+#include "zstd_compress_internal.h"
+
+/*
+ * Dedicated Dictionary Search Structure bucket log. In the
+ * ZSTD_dedicatedDictSearch mode, the hashTable has
+ * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just
+ * one.
+ */
+#define ZSTD_LAZY_DDSS_BUCKET_LOG 2
+
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
+
+void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
+
+void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue);  /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
+
+size_t ZSTD_compressBlock_btlazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_greedy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btlazy2_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+
+#endif /* ZSTD_LAZY_H */
diff --git a/lib/zstd/compress/zstd_ldm.c b/lib/zstd/compress/zstd_ldm.c
new file mode 100644 (file)
index 0000000..8ef7e88
--- /dev/null
@@ -0,0 +1,686 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd_ldm.h"
+
+#include "../common/debug.h"
+#include <linux/xxhash.h>
+#include "zstd_fast.h"          /* ZSTD_fillHashTable() */
+#include "zstd_double_fast.h"   /* ZSTD_fillDoubleHashTable() */
+#include "zstd_ldm_geartab.h"
+
+#define LDM_BUCKET_SIZE_LOG 3
+#define LDM_MIN_MATCH_LENGTH 64
+#define LDM_HASH_RLOG 7
+
+typedef struct {
+    U64 rolling;
+    U64 stopMask;
+} ldmRollingHashState_t;
+
+/* ZSTD_ldm_gear_init():
+ *
+ * Initializes the rolling hash state such that it will honor the
+ * settings in params. */
+static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params)
+{
+    unsigned maxBitsInMask = MIN(params->minMatchLength, 64);
+    unsigned hashRateLog = params->hashRateLog;
+
+    state->rolling = ~(U32)0;
+
+    /* The choice of the splitting criterion is subject to two conditions:
+     *   1. it has to trigger on average every 2^(hashRateLog) bytes;
+     *   2. ideally, it has to depend on a window of minMatchLength bytes.
+     *
+     * In the gear hash algorithm, bit n depends on the last n bytes;
+     * so in order to obtain a good quality splitting criterion it is
+     * preferable to use bits with high weight.
+     *
+     * To match condition 1 we use a mask with hashRateLog bits set
+     * and, because of the previous remark, we make sure these bits
+     * have the highest possible weight while still respecting
+     * condition 2.
+     */
+    if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) {
+        state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog);
+    } else {
+        /* In this degenerate case we simply honor the hash rate. */
+        state->stopMask = ((U64)1 << hashRateLog) - 1;
+    }
+}
+
+/* ZSTD_ldm_gear_feed():
+ *
+ * Registers in the splits array all the split points found in the first
+ * size bytes following the data pointer. This function terminates when
+ * either all the data has been processed or LDM_BATCH_SIZE splits are
+ * present in the splits array.
+ *
+ * Precondition: The splits array must not be full.
+ * Returns: The number of bytes processed. */
+static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state,
+                                 BYTE const* data, size_t size,
+                                 size_t* splits, unsigned* numSplits)
+{
+    size_t n;
+    U64 hash, mask;
+
+    hash = state->rolling;
+    mask = state->stopMask;
+    n = 0;
+
+#define GEAR_ITER_ONCE() do { \
+        hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
+        n += 1; \
+        if (UNLIKELY((hash & mask) == 0)) { \
+            splits[*numSplits] = n; \
+            *numSplits += 1; \
+            if (*numSplits == LDM_BATCH_SIZE) \
+                goto done; \
+        } \
+    } while (0)
+
+    while (n + 3 < size) {
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+    }
+    while (n < size) {
+        GEAR_ITER_ONCE();
+    }
+
+#undef GEAR_ITER_ONCE
+
+done:
+    state->rolling = hash;
+    return n;
+}
+
+void ZSTD_ldm_adjustParameters(ldmParams_t* params,
+                               ZSTD_compressionParameters const* cParams)
+{
+    params->windowLog = cParams->windowLog;
+    ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
+    DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
+    if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
+    if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
+    if (params->hashLog == 0) {
+        params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
+        assert(params->hashLog <= ZSTD_HASHLOG_MAX);
+    }
+    if (params->hashRateLog == 0) {
+        params->hashRateLog = params->windowLog < params->hashLog
+                                   ? 0
+                                   : params->windowLog - params->hashLog;
+    }
+    params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
+}
+
+size_t ZSTD_ldm_getTableSize(ldmParams_t params)
+{
+    size_t const ldmHSize = ((size_t)1) << params.hashLog;
+    size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
+    size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
+    size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
+                           + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
+    return params.enableLdm ? totalSize : 0;
+}
+
+size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
+{
+    return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
+}
+
+/* ZSTD_ldm_getBucket() :
+ *  Returns a pointer to the start of the bucket associated with hash. */
+static ldmEntry_t* ZSTD_ldm_getBucket(
+        ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
+{
+    return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
+}
+
+/* ZSTD_ldm_insertEntry() :
+ *  Insert the entry with corresponding hash into the hash table */
+static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
+                                 size_t const hash, const ldmEntry_t entry,
+                                 ldmParams_t const ldmParams)
+{
+    BYTE* const pOffset = ldmState->bucketOffsets + hash;
+    unsigned const offset = *pOffset;
+
+    *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry;
+    *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1));
+
+}
+
+/* ZSTD_ldm_countBackwardsMatch() :
+ *  Returns the number of bytes that match backwards before pIn and pMatch.
+ *
+ *  We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
+static size_t ZSTD_ldm_countBackwardsMatch(
+            const BYTE* pIn, const BYTE* pAnchor,
+            const BYTE* pMatch, const BYTE* pMatchBase)
+{
+    size_t matchLength = 0;
+    while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) {
+        pIn--;
+        pMatch--;
+        matchLength++;
+    }
+    return matchLength;
+}
+
+/* ZSTD_ldm_countBackwardsMatch_2segments() :
+ *  Returns the number of bytes that match backwards from pMatch,
+ *  even with the backwards match spanning 2 different segments.
+ *
+ *  On reaching `pMatchBase`, start counting from mEnd */
+static size_t ZSTD_ldm_countBackwardsMatch_2segments(
+                    const BYTE* pIn, const BYTE* pAnchor,
+                    const BYTE* pMatch, const BYTE* pMatchBase,
+                    const BYTE* pExtDictStart, const BYTE* pExtDictEnd)
+{
+    size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase);
+    if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) {
+        /* If backwards match is entirely in the extDict or prefix, immediately return */
+        return matchLength;
+    }
+    DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength);
+    matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart);
+    DEBUGLOG(7, "final backwards match length = %zu", matchLength);
+    return matchLength;
+}
+
+/* ZSTD_ldm_fillFastTables() :
+ *
+ *  Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
+ *  This is similar to ZSTD_loadDictionaryContent.
+ *
+ *  The tables for the other strategies are filled within their
+ *  block compressors. */
+static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
+                                      void const* end)
+{
+    const BYTE* const iend = (const BYTE*)end;
+
+    switch(ms->cParams.strategy)
+    {
+    case ZSTD_fast:
+        ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
+        break;
+
+    case ZSTD_dfast:
+        ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
+        break;
+
+    case ZSTD_greedy:
+    case ZSTD_lazy:
+    case ZSTD_lazy2:
+    case ZSTD_btlazy2:
+    case ZSTD_btopt:
+    case ZSTD_btultra:
+    case ZSTD_btultra2:
+        break;
+    default:
+        assert(0);  /* not possible : not a valid strategy id */
+    }
+
+    return 0;
+}
+
+void ZSTD_ldm_fillHashTable(
+            ldmState_t* ldmState, const BYTE* ip,
+            const BYTE* iend, ldmParams_t const* params)
+{
+    U32 const minMatchLength = params->minMatchLength;
+    U32 const hBits = params->hashLog - params->bucketSizeLog;
+    BYTE const* const base = ldmState->window.base;
+    BYTE const* const istart = ip;
+    ldmRollingHashState_t hashState;
+    size_t* const splits = ldmState->splitIndices;
+    unsigned numSplits;
+
+    DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
+
+    ZSTD_ldm_gear_init(&hashState, params);
+    while (ip < iend) {
+        size_t hashed;
+        unsigned n;
+        
+        numSplits = 0;
+        hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
+
+        for (n = 0; n < numSplits; n++) {
+            if (ip + splits[n] >= istart + minMatchLength) {
+                BYTE const* const split = ip + splits[n] - minMatchLength;
+                U64 const xxhash = xxh64(split, minMatchLength, 0);
+                U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
+                ldmEntry_t entry;
+
+                entry.offset = (U32)(split - base);
+                entry.checksum = (U32)(xxhash >> 32);
+                ZSTD_ldm_insertEntry(ldmState, hash, entry, *params);
+            }
+        }
+
+        ip += hashed;
+    }
+}
+
+
+/* ZSTD_ldm_limitTableUpdate() :
+ *
+ *  Sets cctx->nextToUpdate to a position corresponding closer to anchor
+ *  if it is far way
+ *  (after a long match, only update tables a limited amount). */
+static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
+{
+    U32 const curr = (U32)(anchor - ms->window.base);
+    if (curr > ms->nextToUpdate + 1024) {
+        ms->nextToUpdate =
+            curr - MIN(512, curr - ms->nextToUpdate - 1024);
+    }
+}
+
+static size_t ZSTD_ldm_generateSequences_internal(
+        ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
+        ldmParams_t const* params, void const* src, size_t srcSize)
+{
+    /* LDM parameters */
+    int const extDict = ZSTD_window_hasExtDict(ldmState->window);
+    U32 const minMatchLength = params->minMatchLength;
+    U32 const entsPerBucket = 1U << params->bucketSizeLog;
+    U32 const hBits = params->hashLog - params->bucketSizeLog;
+    /* Prefix and extDict parameters */
+    U32 const dictLimit = ldmState->window.dictLimit;
+    U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
+    BYTE const* const base = ldmState->window.base;
+    BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
+    BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
+    BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
+    BYTE const* const lowPrefixPtr = base + dictLimit;
+    /* Input bounds */
+    BYTE const* const istart = (BYTE const*)src;
+    BYTE const* const iend = istart + srcSize;
+    BYTE const* const ilimit = iend - HASH_READ_SIZE;
+    /* Input positions */
+    BYTE const* anchor = istart;
+    BYTE const* ip = istart;
+    /* Rolling hash state */
+    ldmRollingHashState_t hashState;
+    /* Arrays for staged-processing */
+    size_t* const splits = ldmState->splitIndices;
+    ldmMatchCandidate_t* const candidates = ldmState->matchCandidates;
+    unsigned numSplits;
+
+    if (srcSize < minMatchLength)
+        return iend - anchor;
+
+    /* Initialize the rolling hash state with the first minMatchLength bytes */
+    ZSTD_ldm_gear_init(&hashState, params);
+    {
+        size_t n = 0;
+
+        while (n < minMatchLength) {
+            numSplits = 0;
+            n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n,
+                                    splits, &numSplits);
+        }
+        ip += minMatchLength;
+    }
+
+    while (ip < ilimit) {
+        size_t hashed;
+        unsigned n;
+
+        numSplits = 0;
+        hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip,
+                                    splits, &numSplits);
+
+        for (n = 0; n < numSplits; n++) {
+            BYTE const* const split = ip + splits[n] - minMatchLength;
+            U64 const xxhash = xxh64(split, minMatchLength, 0);
+            U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1));
+
+            candidates[n].split = split;
+            candidates[n].hash = hash;
+            candidates[n].checksum = (U32)(xxhash >> 32);
+            candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params);
+            PREFETCH_L1(candidates[n].bucket);
+        }
+
+        for (n = 0; n < numSplits; n++) {
+            size_t forwardMatchLength = 0, backwardMatchLength = 0,
+                   bestMatchLength = 0, mLength;
+            BYTE const* const split = candidates[n].split;
+            U32 const checksum = candidates[n].checksum;
+            U32 const hash = candidates[n].hash;
+            ldmEntry_t* const bucket = candidates[n].bucket;
+            ldmEntry_t const* cur;
+            ldmEntry_t const* bestEntry = NULL;
+            ldmEntry_t newEntry;
+
+            newEntry.offset = (U32)(split - base);
+            newEntry.checksum = checksum;
+
+            /* If a split point would generate a sequence overlapping with
+             * the previous one, we merely register it in the hash table and
+             * move on */
+            if (split < anchor) {
+                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+                continue;
+            }
+
+            for (cur = bucket; cur < bucket + entsPerBucket; cur++) {
+                size_t curForwardMatchLength, curBackwardMatchLength,
+                       curTotalMatchLength;
+                if (cur->checksum != checksum || cur->offset <= lowestIndex) {
+                    continue;
+                }
+                if (extDict) {
+                    BYTE const* const curMatchBase =
+                        cur->offset < dictLimit ? dictBase : base;
+                    BYTE const* const pMatch = curMatchBase + cur->offset;
+                    BYTE const* const matchEnd =
+                        cur->offset < dictLimit ? dictEnd : iend;
+                    BYTE const* const lowMatchPtr =
+                        cur->offset < dictLimit ? dictStart : lowPrefixPtr;
+                    curForwardMatchLength =
+                        ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr);
+                    if (curForwardMatchLength < minMatchLength) {
+                        continue;
+                    }
+                    curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments(
+                            split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd);
+                } else { /* !extDict */
+                    BYTE const* const pMatch = base + cur->offset;
+                    curForwardMatchLength = ZSTD_count(split, pMatch, iend);
+                    if (curForwardMatchLength < minMatchLength) {
+                        continue;
+                    }
+                    curBackwardMatchLength =
+                        ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr);
+                }
+                curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength;
+
+                if (curTotalMatchLength > bestMatchLength) {
+                    bestMatchLength = curTotalMatchLength;
+                    forwardMatchLength = curForwardMatchLength;
+                    backwardMatchLength = curBackwardMatchLength;
+                    bestEntry = cur;
+                }
+            }
+
+            /* No match found -- insert an entry into the hash table
+             * and process the next candidate match */
+            if (bestEntry == NULL) {
+                ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+                continue;
+            }
+
+            /* Match found */
+            mLength = forwardMatchLength + backwardMatchLength;
+            {
+                U32 const offset = (U32)(split - base) - bestEntry->offset;
+                rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
+
+                /* Out of sequence storage */
+                if (rawSeqStore->size == rawSeqStore->capacity)
+                    return ERROR(dstSize_tooSmall);
+                seq->litLength = (U32)(split - backwardMatchLength - anchor);
+                seq->matchLength = (U32)mLength;
+                seq->offset = offset;
+                rawSeqStore->size++;
+            }
+
+            /* Insert the current entry into the hash table --- it must be
+             * done after the previous block to avoid clobbering bestEntry */
+            ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
+
+            anchor = split + forwardMatchLength;
+        }
+
+        ip += hashed;
+    }
+
+    return iend - anchor;
+}
+
+/*! ZSTD_ldm_reduceTable() :
+ *  reduce table indexes by `reducerValue` */
+static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
+                                 U32 const reducerValue)
+{
+    U32 u;
+    for (u = 0; u < size; u++) {
+        if (table[u].offset < reducerValue) table[u].offset = 0;
+        else table[u].offset -= reducerValue;
+    }
+}
+
+size_t ZSTD_ldm_generateSequences(
+        ldmState_t* ldmState, rawSeqStore_t* sequences,
+        ldmParams_t const* params, void const* src, size_t srcSize)
+{
+    U32 const maxDist = 1U << params->windowLog;
+    BYTE const* const istart = (BYTE const*)src;
+    BYTE const* const iend = istart + srcSize;
+    size_t const kMaxChunkSize = 1 << 20;
+    size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
+    size_t chunk;
+    size_t leftoverSize = 0;
+
+    assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
+    /* Check that ZSTD_window_update() has been called for this chunk prior
+     * to passing it to this function.
+     */
+    assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
+    /* The input could be very large (in zstdmt), so it must be broken up into
+     * chunks to enforce the maximum distance and handle overflow correction.
+     */
+    assert(sequences->pos <= sequences->size);
+    assert(sequences->size <= sequences->capacity);
+    for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
+        BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
+        size_t const remaining = (size_t)(iend - chunkStart);
+        BYTE const *const chunkEnd =
+            (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
+        size_t const chunkSize = chunkEnd - chunkStart;
+        size_t newLeftoverSize;
+        size_t const prevSize = sequences->size;
+
+        assert(chunkStart < iend);
+        /* 1. Perform overflow correction if necessary. */
+        if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
+            U32 const ldmHSize = 1U << params->hashLog;
+            U32 const correction = ZSTD_window_correctOverflow(
+                &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
+            ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
+            /* invalidate dictionaries on overflow correction */
+            ldmState->loadedDictEnd = 0;
+        }
+        /* 2. We enforce the maximum offset allowed.
+         *
+         * kMaxChunkSize should be small enough that we don't lose too much of
+         * the window through early invalidation.
+         * TODO: * Test the chunk size.
+         *       * Try invalidation after the sequence generation and test the
+         *         the offset against maxDist directly.
+         *
+         * NOTE: Because of dictionaries + sequence splitting we MUST make sure
+         * that any offset used is valid at the END of the sequence, since it may
+         * be split into two sequences. This condition holds when using
+         * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
+         * against maxDist directly, we'll have to carefully handle that case.
+         */
+        ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
+        /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
+        newLeftoverSize = ZSTD_ldm_generateSequences_internal(
+            ldmState, sequences, params, chunkStart, chunkSize);
+        if (ZSTD_isError(newLeftoverSize))
+            return newLeftoverSize;
+        /* 4. We add the leftover literals from previous iterations to the first
+         *    newly generated sequence, or add the `newLeftoverSize` if none are
+         *    generated.
+         */
+        /* Prepend the leftover literals from the last call */
+        if (prevSize < sequences->size) {
+            sequences->seq[prevSize].litLength += (U32)leftoverSize;
+            leftoverSize = newLeftoverSize;
+        } else {
+            assert(newLeftoverSize == chunkSize);
+            leftoverSize += chunkSize;
+        }
+    }
+    return 0;
+}
+
+void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
+    while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
+        if (srcSize <= seq->litLength) {
+            /* Skip past srcSize literals */
+            seq->litLength -= (U32)srcSize;
+            return;
+        }
+        srcSize -= seq->litLength;
+        seq->litLength = 0;
+        if (srcSize < seq->matchLength) {
+            /* Skip past the first srcSize of the match */
+            seq->matchLength -= (U32)srcSize;
+            if (seq->matchLength < minMatch) {
+                /* The match is too short, omit it */
+                if (rawSeqStore->pos + 1 < rawSeqStore->size) {
+                    seq[1].litLength += seq[0].matchLength;
+                }
+                rawSeqStore->pos++;
+            }
+            return;
+        }
+        srcSize -= seq->matchLength;
+        seq->matchLength = 0;
+        rawSeqStore->pos++;
+    }
+}
+
+/*
+ * If the sequence length is longer than remaining then the sequence is split
+ * between this block and the next.
+ *
+ * Returns the current sequence to handle, or if the rest of the block should
+ * be literals, it returns a sequence with offset == 0.
+ */
+static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
+                                 U32 const remaining, U32 const minMatch)
+{
+    rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
+    assert(sequence.offset > 0);
+    /* Likely: No partial sequence */
+    if (remaining >= sequence.litLength + sequence.matchLength) {
+        rawSeqStore->pos++;
+        return sequence;
+    }
+    /* Cut the sequence short (offset == 0 ==> rest is literals). */
+    if (remaining <= sequence.litLength) {
+        sequence.offset = 0;
+    } else if (remaining < sequence.litLength + sequence.matchLength) {
+        sequence.matchLength = remaining - sequence.litLength;
+        if (sequence.matchLength < minMatch) {
+            sequence.offset = 0;
+        }
+    }
+    /* Skip past `remaining` bytes for the future sequences. */
+    ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
+    return sequence;
+}
+
+void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
+    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
+    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
+        if (currPos >= currSeq.litLength + currSeq.matchLength) {
+            currPos -= currSeq.litLength + currSeq.matchLength;
+            rawSeqStore->pos++;
+        } else {
+            rawSeqStore->posInSequence = currPos;
+            break;
+        }
+    }
+    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
+        rawSeqStore->posInSequence = 0;
+    }
+}
+
+size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+    ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+    void const* src, size_t srcSize)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    unsigned const minMatch = cParams->minMatch;
+    ZSTD_blockCompressor const blockCompressor =
+        ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
+    /* Input bounds */
+    BYTE const* const istart = (BYTE const*)src;
+    BYTE const* const iend = istart + srcSize;
+    /* Input positions */
+    BYTE const* ip = istart;
+
+    DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
+    /* If using opt parser, use LDMs only as candidates rather than always accepting them */
+    if (cParams->strategy >= ZSTD_btopt) {
+        size_t lastLLSize;
+        ms->ldmSeqStore = rawSeqStore;
+        lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize);
+        ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize);
+        return lastLLSize;
+    }
+
+    assert(rawSeqStore->pos <= rawSeqStore->size);
+    assert(rawSeqStore->size <= rawSeqStore->capacity);
+    /* Loop through each sequence and apply the block compressor to the literals */
+    while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
+        /* maybeSplitSequence updates rawSeqStore->pos */
+        rawSeq const sequence = maybeSplitSequence(rawSeqStore,
+                                                   (U32)(iend - ip), minMatch);
+        int i;
+        /* End signal */
+        if (sequence.offset == 0)
+            break;
+
+        assert(ip + sequence.litLength + sequence.matchLength <= iend);
+
+        /* Fill tables for block compressor */
+        ZSTD_ldm_limitTableUpdate(ms, ip);
+        ZSTD_ldm_fillFastTables(ms, ip);
+        /* Run the block compressor */
+        DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
+        {
+            size_t const newLitLength =
+                blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
+            ip += sequence.litLength;
+            /* Update the repcodes */
+            for (i = ZSTD_REP_NUM - 1; i > 0; i--)
+                rep[i] = rep[i-1];
+            rep[0] = sequence.offset;
+            /* Store the sequence */
+            ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
+                          sequence.offset + ZSTD_REP_MOVE,
+                          sequence.matchLength - MINMATCH);
+            ip += sequence.matchLength;
+        }
+    }
+    /* Fill the tables for the block compressor */
+    ZSTD_ldm_limitTableUpdate(ms, ip);
+    ZSTD_ldm_fillFastTables(ms, ip);
+    /* Compress the last literals */
+    return blockCompressor(ms, seqStore, rep, ip, iend - ip);
+}
diff --git a/lib/zstd/compress/zstd_ldm.h b/lib/zstd/compress/zstd_ldm.h
new file mode 100644 (file)
index 0000000..25b2527
--- /dev/null
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LDM_H
+#define ZSTD_LDM_H
+
+
+#include "zstd_compress_internal.h"   /* ldmParams_t, U32 */
+#include <linux/zstd.h>   /* ZSTD_CCtx, size_t */
+
+/*-*************************************
+*  Long distance matching
+***************************************/
+
+#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT
+
+void ZSTD_ldm_fillHashTable(
+            ldmState_t* state, const BYTE* ip,
+            const BYTE* iend, ldmParams_t const* params);
+
+/*
+ * ZSTD_ldm_generateSequences():
+ *
+ * Generates the sequences using the long distance match finder.
+ * Generates long range matching sequences in `sequences`, which parse a prefix
+ * of the source. `sequences` must be large enough to store every sequence,
+ * which can be checked with `ZSTD_ldm_getMaxNbSeq()`.
+ * @returns 0 or an error code.
+ *
+ * NOTE: The user must have called ZSTD_window_update() for all of the input
+ * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks.
+ * NOTE: This function returns an error if it runs out of space to store
+ *       sequences.
+ */
+size_t ZSTD_ldm_generateSequences(
+            ldmState_t* ldms, rawSeqStore_t* sequences,
+            ldmParams_t const* params, void const* src, size_t srcSize);
+
+/*
+ * ZSTD_ldm_blockCompress():
+ *
+ * Compresses a block using the predefined sequences, along with a secondary
+ * block compressor. The literals section of every sequence is passed to the
+ * secondary block compressor, and those sequences are interspersed with the
+ * predefined sequences. Returns the length of the last literals.
+ * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed.
+ * `rawSeqStore.seq` may also be updated to split the last sequence between two
+ * blocks.
+ * @return The length of the last literals.
+ *
+ * NOTE: The source must be at most the maximum block size, but the predefined
+ * sequences can be any size, and may be longer than the block. In the case that
+ * they are longer than the block, the last sequences may need to be split into
+ * two. We handle that case correctly, and update `rawSeqStore` appropriately.
+ * NOTE: This function does not return any errors.
+ */
+size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
+            ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+            void const* src, size_t srcSize);
+
+/*
+ * ZSTD_ldm_skipSequences():
+ *
+ * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`.
+ * Avoids emitting matches less than `minMatch` bytes.
+ * Must be called for data that is not passed to ZSTD_ldm_blockCompress().
+ */
+void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize,
+    U32 const minMatch);
+
+/* ZSTD_ldm_skipRawSeqStoreBytes():
+ * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'.
+ * Not to be used in conjunction with ZSTD_ldm_skipSequences().
+ * Must be called for data with is not passed to ZSTD_ldm_blockCompress().
+ */
+void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes);
+
+/* ZSTD_ldm_getTableSize() :
+ *  Estimate the space needed for long distance matching tables or 0 if LDM is
+ *  disabled.
+ */
+size_t ZSTD_ldm_getTableSize(ldmParams_t params);
+
+/* ZSTD_ldm_getSeqSpace() :
+ *  Return an upper bound on the number of sequences that can be produced by
+ *  the long distance matcher, or 0 if LDM is disabled.
+ */
+size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize);
+
+/* ZSTD_ldm_adjustParameters() :
+ *  If the params->hashRateLog is not set, set it to its default value based on
+ *  windowLog and params->hashLog.
+ *
+ *  Ensures that params->bucketSizeLog is <= params->hashLog (setting it to
+ *  params->hashLog if it is not).
+ *
+ *  Ensures that the minMatchLength >= targetLength during optimal parsing.
+ */
+void ZSTD_ldm_adjustParameters(ldmParams_t* params,
+                               ZSTD_compressionParameters const* cParams);
+
+
+#endif /* ZSTD_FAST_H */
diff --git a/lib/zstd/compress/zstd_ldm_geartab.h b/lib/zstd/compress/zstd_ldm_geartab.h
new file mode 100644 (file)
index 0000000..e5c24d8
--- /dev/null
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_LDM_GEARTAB_H
+#define ZSTD_LDM_GEARTAB_H
+
+static U64 ZSTD_ldm_gearTab[256] = {
+    0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
+    0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
+    0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
+    0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889,
+    0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e,
+    0x37b628620b628,    0x49a8d455d88caf5,  0x8556d711e6958140,
+    0x4f7ae74fc605c1f,  0x829f0c3468bd3a20, 0x4ffdc885c625179e,
+    0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f,
+    0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391,
+    0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210,
+    0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be,
+    0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a,
+    0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b,
+    0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4,
+    0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb,
+    0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312,
+    0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01,
+    0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc,
+    0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967,
+    0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553,
+    0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f,
+    0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2,
+    0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d,
+    0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a,
+    0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74,
+    0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3,
+    0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1,
+    0xff452823dbb010a,  0x9d42ed614f3dd267, 0x5b9313c06257c57b,
+    0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568,
+    0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a,
+    0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1,
+    0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9,
+    0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463,
+    0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba,
+    0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9,
+    0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61,
+    0x24a5483879c453e3, 0x88026889192b4b9,  0x28da96671782dbec,
+    0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6,
+    0xbc135a0a704b70ba, 0x69cd868f7622ada,  0xbc37ba89e0b9c0ab,
+    0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5,
+    0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59,
+    0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7,
+    0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc,
+    0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb,
+    0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be,
+    0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312,
+    0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1,
+    0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc,
+    0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d,
+    0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445,
+    0x820d471e20b348e,  0x1874383cb83d46dc, 0x97edeec7a1efe11c,
+    0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5,
+    0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5,
+    0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28,
+    0xaf846af6ab7d0bf4, 0xe5af208eb666e49,  0x5e6622f73534cd6a,
+    0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9,
+    0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15,
+    0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef,
+    0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2,
+    0x9f90e4c5fd508d8,  0xa34e5956fbaf3385, 0x2e2f8e151d3ef375,
+    0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3,
+    0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595,
+    0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389,
+    0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4,
+    0x4228e364c5b5ed7,  0x9d7a3edf0da43911, 0x8edcfeda24686756,
+    0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc,
+    0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45,
+    0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea,
+    0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f,
+    0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc,
+    0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c,
+    0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a,
+    0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17,
+    0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3,
+    0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4,
+    0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91,
+    0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40,
+    0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741,
+    0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f,
+    0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4,
+    0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad,
+    0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047,
+    0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2,
+    0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e,
+    0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b,
+    0x2b4da14f2613d8f4
+};
+
+#endif /* ZSTD_LDM_GEARTAB_H */
diff --git a/lib/zstd/compress/zstd_opt.c b/lib/zstd/compress/zstd_opt.c
new file mode 100644 (file)
index 0000000..0433705
--- /dev/null
@@ -0,0 +1,1346 @@
+/*
+ * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include "zstd_compress_internal.h"
+#include "hist.h"
+#include "zstd_opt.h"
+
+
+#define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
+#define ZSTD_FREQ_DIV       4   /* log factor when using previous stats to init next stats */
+#define ZSTD_MAX_PRICE     (1<<30)
+
+#define ZSTD_PREDEF_THRESHOLD 1024   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
+
+
+/*-*************************************
+*  Price functions for optimal parser
+***************************************/
+
+#if 0    /* approximation at bit level */
+#  define BITCOST_ACCURACY 0
+#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+#  define WEIGHT(stat)  ((void)opt, ZSTD_bitWeight(stat))
+#elif 0  /* fractional bit accuracy */
+#  define BITCOST_ACCURACY 8
+#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+#  define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
+#else    /* opt==approx, ultra==accurate */
+#  define BITCOST_ACCURACY 8
+#  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
+#  define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
+#endif
+
+MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
+{
+    return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
+}
+
+MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
+{
+    U32 const stat = rawStat + 1;
+    U32 const hb = ZSTD_highbit32(stat);
+    U32 const BWeight = hb * BITCOST_MULTIPLIER;
+    U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
+    U32 const weight = BWeight + FWeight;
+    assert(hb + BITCOST_ACCURACY < 31);
+    return weight;
+}
+
+#if (DEBUGLEVEL>=2)
+/* debugging function,
+ * @return price in bytes as fractional value
+ * for debug messages only */
+MEM_STATIC double ZSTD_fCost(U32 price)
+{
+    return (double)price / (BITCOST_MULTIPLIER*8);
+}
+#endif
+
+static int ZSTD_compressedLiterals(optState_t const* const optPtr)
+{
+    return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
+}
+
+static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
+{
+    if (ZSTD_compressedLiterals(optPtr))
+        optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
+    optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
+    optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
+    optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
+}
+
+
+/* ZSTD_downscaleStat() :
+ * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
+ * return the resulting sum of elements */
+static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
+{
+    U32 s, sum=0;
+    DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
+    assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
+    for (s=0; s<lastEltIndex+1; s++) {
+        table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
+        sum += table[s];
+    }
+    return sum;
+}
+
+/* ZSTD_rescaleFreqs() :
+ * if first block (detected by optPtr->litLengthSum == 0) : init statistics
+ *    take hints from dictionary if there is one
+ *    or init from zero, using src for literals stats, or flat 1 for match symbols
+ * otherwise downscale existing stats, to be used as seed for next block.
+ */
+static void
+ZSTD_rescaleFreqs(optState_t* const optPtr,
+            const BYTE* const src, size_t const srcSize,
+                  int const optLevel)
+{
+    int const compressedLiterals = ZSTD_compressedLiterals(optPtr);
+    DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize);
+    optPtr->priceType = zop_dynamic;
+
+    if (optPtr->litLengthSum == 0) {  /* first block : init */
+        if (srcSize <= ZSTD_PREDEF_THRESHOLD) {  /* heuristic */
+            DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef");
+            optPtr->priceType = zop_predef;
+        }
+
+        assert(optPtr->symbolCosts != NULL);
+        if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) {
+            /* huffman table presumed generated by dictionary */
+            optPtr->priceType = zop_dynamic;
+
+            if (compressedLiterals) {
+                unsigned lit;
+                assert(optPtr->litFreq != NULL);
+                optPtr->litSum = 0;
+                for (lit=0; lit<=MaxLit; lit++) {
+                    U32 const scaleLog = 11;   /* scale to 2K */
+                    U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
+                    assert(bitCost <= scaleLog);
+                    optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->litSum += optPtr->litFreq[lit];
+            }   }
+
+            {   unsigned ll;
+                FSE_CState_t llstate;
+                FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
+                optPtr->litLengthSum = 0;
+                for (ll=0; ll<=MaxLL; ll++) {
+                    U32 const scaleLog = 10;   /* scale to 1K */
+                    U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
+                    assert(bitCost < scaleLog);
+                    optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->litLengthSum += optPtr->litLengthFreq[ll];
+            }   }
+
+            {   unsigned ml;
+                FSE_CState_t mlstate;
+                FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
+                optPtr->matchLengthSum = 0;
+                for (ml=0; ml<=MaxML; ml++) {
+                    U32 const scaleLog = 10;
+                    U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
+                    assert(bitCost < scaleLog);
+                    optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
+            }   }
+
+            {   unsigned of;
+                FSE_CState_t ofstate;
+                FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
+                optPtr->offCodeSum = 0;
+                for (of=0; of<=MaxOff; of++) {
+                    U32 const scaleLog = 10;
+                    U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
+                    assert(bitCost < scaleLog);
+                    optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
+                    optPtr->offCodeSum += optPtr->offCodeFreq[of];
+            }   }
+
+        } else {  /* not a dictionary */
+
+            assert(optPtr->litFreq != NULL);
+            if (compressedLiterals) {
+                unsigned lit = MaxLit;
+                HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
+                optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
+            }
+
+            {   unsigned ll;
+                for (ll=0; ll<=MaxLL; ll++)
+                    optPtr->litLengthFreq[ll] = 1;
+            }
+            optPtr->litLengthSum = MaxLL+1;
+
+            {   unsigned ml;
+                for (ml=0; ml<=MaxML; ml++)
+                    optPtr->matchLengthFreq[ml] = 1;
+            }
+            optPtr->matchLengthSum = MaxML+1;
+
+            {   unsigned of;
+                for (of=0; of<=MaxOff; of++)
+                    optPtr->offCodeFreq[of] = 1;
+            }
+            optPtr->offCodeSum = MaxOff+1;
+
+        }
+
+    } else {   /* new block : re-use previous statistics, scaled down */
+
+        if (compressedLiterals)
+            optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
+        optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
+        optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
+        optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
+    }
+
+    ZSTD_setBasePrices(optPtr, optLevel);
+}
+
+/* ZSTD_rawLiteralsCost() :
+ * price of literals (only) in specified segment (which length can be 0).
+ * does not include price of literalLength symbol */
+static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
+                                const optState_t* const optPtr,
+                                int optLevel)
+{
+    if (litLength == 0) return 0;
+
+    if (!ZSTD_compressedLiterals(optPtr))
+        return (litLength << 3) * BITCOST_MULTIPLIER;  /* Uncompressed - 8 bytes per literal. */
+
+    if (optPtr->priceType == zop_predef)
+        return (litLength*6) * BITCOST_MULTIPLIER;  /* 6 bit per literal - no statistic used */
+
+    /* dynamic statistics */
+    {   U32 price = litLength * optPtr->litSumBasePrice;
+        U32 u;
+        for (u=0; u < litLength; u++) {
+            assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice);   /* literal cost should never be negative */
+            price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
+        }
+        return price;
+    }
+}
+
+/* ZSTD_litLengthPrice() :
+ * cost of literalLength symbol */
+static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
+{
+    if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
+
+    /* dynamic statistics */
+    {   U32 const llCode = ZSTD_LLcode(litLength);
+        return (LL_bits[llCode] * BITCOST_MULTIPLIER)
+             + optPtr->litLengthSumBasePrice
+             - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
+    }
+}
+
+/* ZSTD_getMatchPrice() :
+ * Provides the cost of the match part (offset + matchLength) of a sequence
+ * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
+ * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
+FORCE_INLINE_TEMPLATE U32
+ZSTD_getMatchPrice(U32 const offset,
+                   U32 const matchLength,
+             const optState_t* const optPtr,
+                   int const optLevel)
+{
+    U32 price;
+    U32 const offCode = ZSTD_highbit32(offset+1);
+    U32 const mlBase = matchLength - MINMATCH;
+    assert(matchLength >= MINMATCH);
+
+    if (optPtr->priceType == zop_predef)  /* fixed scheme, do not use statistics */
+        return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
+
+    /* dynamic statistics */
+    price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
+    if ((optLevel<2) /*static*/ && offCode >= 20)
+        price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
+
+    /* match Length */
+    {   U32 const mlCode = ZSTD_MLcode(mlBase);
+        price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
+    }
+
+    price += BITCOST_MULTIPLIER / 5;   /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
+
+    DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
+    return price;
+}
+
+/* ZSTD_updateStats() :
+ * assumption : literals + litLengtn <= iend */
+static void ZSTD_updateStats(optState_t* const optPtr,
+                             U32 litLength, const BYTE* literals,
+                             U32 offsetCode, U32 matchLength)
+{
+    /* literals */
+    if (ZSTD_compressedLiterals(optPtr)) {
+        U32 u;
+        for (u=0; u < litLength; u++)
+            optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
+        optPtr->litSum += litLength*ZSTD_LITFREQ_ADD;
+    }
+
+    /* literal Length */
+    {   U32 const llCode = ZSTD_LLcode(litLength);
+        optPtr->litLengthFreq[llCode]++;
+        optPtr->litLengthSum++;
+    }
+
+    /* match offset code (0-2=>repCode; 3+=>offset+2) */
+    {   U32 const offCode = ZSTD_highbit32(offsetCode+1);
+        assert(offCode <= MaxOff);
+        optPtr->offCodeFreq[offCode]++;
+        optPtr->offCodeSum++;
+    }
+
+    /* match Length */
+    {   U32 const mlBase = matchLength - MINMATCH;
+        U32 const mlCode = ZSTD_MLcode(mlBase);
+        optPtr->matchLengthFreq[mlCode]++;
+        optPtr->matchLengthSum++;
+    }
+}
+
+
+/* ZSTD_readMINMATCH() :
+ * function safe only for comparisons
+ * assumption : memPtr must be at least 4 bytes before end of buffer */
+MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
+{
+    switch (length)
+    {
+    default :
+    case 4 : return MEM_read32(memPtr);
+    case 3 : if (MEM_isLittleEndian())
+                return MEM_read32(memPtr)<<8;
+             else
+                return MEM_read32(memPtr)>>8;
+    }
+}
+
+
+/* Update hashTable3 up to ip (excluded)
+   Assumption : always within prefix (i.e. not within extDict) */
+static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
+                                              U32* nextToUpdate3,
+                                              const BYTE* const ip)
+{
+    U32* const hashTable3 = ms->hashTable3;
+    U32 const hashLog3 = ms->hashLog3;
+    const BYTE* const base = ms->window.base;
+    U32 idx = *nextToUpdate3;
+    U32 const target = (U32)(ip - base);
+    size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3);
+    assert(hashLog3 > 0);
+
+    while(idx < target) {
+        hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
+        idx++;
+    }
+
+    *nextToUpdate3 = target;
+    return hashTable3[hash3];
+}
+
+
+/*-*************************************
+*  Binary Tree search
+***************************************/
+/* ZSTD_insertBt1() : add one or multiple positions to tree.
+ *  ip : assumed <= iend-8 .
+ * @return : nb of positions added */
+static U32 ZSTD_insertBt1(
+                ZSTD_matchState_t* ms,
+                const BYTE* const ip, const BYTE* const iend,
+                U32 const mls, const int extDict)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32*   const hashTable = ms->hashTable;
+    U32    const hashLog = cParams->hashLog;
+    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32*   const bt = ms->chainTable;
+    U32    const btLog  = cParams->chainLog - 1;
+    U32    const btMask = (1 << btLog) - 1;
+    U32 matchIndex = hashTable[h];
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* match;
+    const U32 curr = (U32)(ip-base);
+    const U32 btLow = btMask >= curr ? 0 : curr - btMask;
+    U32* smallerPtr = bt + 2*(curr&btMask);
+    U32* largerPtr  = smallerPtr + 1;
+    U32 dummy32;   /* to be nullified at the end */
+    U32 const windowLow = ms->window.lowLimit;
+    U32 matchEndIdx = curr+8+1;
+    size_t bestLength = 8;
+    U32 nbCompares = 1U << cParams->searchLog;
+#ifdef ZSTD_C_PREDICT
+    U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0);
+    U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1);
+    predictedSmall += (predictedSmall>0);
+    predictedLarge += (predictedLarge>0);
+#endif /* ZSTD_C_PREDICT */
+
+    DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
+
+    assert(ip <= iend-8);   /* required for h calculation */
+    hashTable[h] = curr;   /* Update Hash Table */
+
+    assert(windowLow > 0);
+    for (; nbCompares && (matchIndex >= windowLow); --nbCompares) {
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        assert(matchIndex < curr);
+
+#ifdef ZSTD_C_PREDICT   /* note : can create issues when hlog small <= 11 */
+        const U32* predictPtr = bt + 2*((matchIndex-1) & btMask);   /* written this way, as bt is a roll buffer */
+        if (matchIndex == predictedSmall) {
+            /* no need to check length, result known */
+            *smallerPtr = matchIndex;
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new "smaller" => larger of match */
+            matchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+            predictedSmall = predictPtr[1] + (predictPtr[1]>0);
+            continue;
+        }
+        if (matchIndex == predictedLarge) {
+            *largerPtr = matchIndex;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+            predictedLarge = predictPtr[0] + (predictPtr[0]>0);
+            continue;
+        }
+#endif
+
+        if (!extDict || (matchIndex+matchLength >= dictLimit)) {
+            assert(matchIndex+matchLength >= dictLimit);   /* might be wrong if actually extDict */
+            match = base + matchIndex;
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
+        } else {
+            match = dictBase + matchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* to prepare for next usage of match[matchLength] */
+        }
+
+        if (matchLength > bestLength) {
+            bestLength = matchLength;
+            if (matchLength > matchEndIdx - matchIndex)
+                matchEndIdx = matchIndex + (U32)matchLength;
+        }
+
+        if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
+            break;   /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
+        }
+
+        if (match[matchLength] < ip[matchLength]) {  /* necessarily within buffer */
+            /* match is smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            smallerPtr = nextPtr+1;               /* new "candidate" => larger than match, which was smaller than target */
+            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous and closer to current */
+        } else {
+            /* match is larger than current */
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop searching */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+    {   U32 positions = 0;
+        if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384));   /* speed optimization */
+        assert(matchEndIdx > curr + 8);
+        return MAX(positions, matchEndIdx - (curr + 8));
+    }
+}
+
+FORCE_INLINE_TEMPLATE
+void ZSTD_updateTree_internal(
+                ZSTD_matchState_t* ms,
+                const BYTE* const ip, const BYTE* const iend,
+                const U32 mls, const ZSTD_dictMode_e dictMode)
+{
+    const BYTE* const base = ms->window.base;
+    U32 const target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
+                idx, target, dictMode);
+
+    while(idx < target) {
+        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
+        assert(idx < (U32)(idx + forward));
+        idx += forward;
+    }
+    assert((size_t)(ip - base) <= (size_t)(U32)(-1));
+    assert((size_t)(iend - base) <= (size_t)(U32)(-1));
+    ms->nextToUpdate = target;
+}
+
+void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
+    ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
+}
+
+FORCE_INLINE_TEMPLATE
+U32 ZSTD_insertBtAndGetAllMatches (
+                    ZSTD_match_t* matches,   /* store result (found matches) in this table (presumed large enough) */
+                    ZSTD_matchState_t* ms,
+                    U32* nextToUpdate3,
+                    const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
+                    const U32 rep[ZSTD_REP_NUM],
+                    U32 const ll0,   /* tells if associated literal length is 0 or not. This value must be 0 or 1 */
+                    const U32 lengthToBeat,
+                    U32 const mls /* template */)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+    const BYTE* const base = ms->window.base;
+    U32 const curr = (U32)(ip-base);
+    U32 const hashLog = cParams->hashLog;
+    U32 const minMatch = (mls==3) ? 3 : 4;
+    U32* const hashTable = ms->hashTable;
+    size_t const h  = ZSTD_hashPtr(ip, hashLog, mls);
+    U32 matchIndex  = hashTable[h];
+    U32* const bt   = ms->chainTable;
+    U32 const btLog = cParams->chainLog - 1;
+    U32 const btMask= (1U << btLog) - 1;
+    size_t commonLengthSmaller=0, commonLengthLarger=0;
+    const BYTE* const dictBase = ms->window.dictBase;
+    U32 const dictLimit = ms->window.dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
+    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
+    U32 const matchLow = windowLow ? windowLow : 1;
+    U32* smallerPtr = bt + 2*(curr&btMask);
+    U32* largerPtr  = bt + 2*(curr&btMask) + 1;
+    U32 matchEndIdx = curr+8+1;   /* farthest referenced position of any match => detects repetitive patterns */
+    U32 dummy32;   /* to be nullified at the end */
+    U32 mnum = 0;
+    U32 nbCompares = 1U << cParams->searchLog;
+
+    const ZSTD_matchState_t* dms    = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
+    const ZSTD_compressionParameters* const dmsCParams =
+                                      dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
+    const BYTE* const dmsBase       = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
+    const BYTE* const dmsEnd        = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
+    U32         const dmsHighLimit  = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
+    U32         const dmsLowLimit   = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
+    U32         const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
+    U32         const dmsHashLog    = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
+    U32         const dmsBtLog      = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
+    U32         const dmsBtMask     = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
+    U32         const dmsBtLow      = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
+
+    size_t bestLength = lengthToBeat-1;
+    DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr);
+
+    /* check repCode */
+    assert(ll0 <= 1);   /* necessarily 1 or 0 */
+    {   U32 const lastR = ZSTD_REP_NUM + ll0;
+        U32 repCode;
+        for (repCode = ll0; repCode < lastR; repCode++) {
+            U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+            U32 const repIndex = curr - repOffset;
+            U32 repLen = 0;
+            assert(curr >= dictLimit);
+            if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) {  /* equivalent to `curr > repIndex >= dictLimit` */
+                /* We must validate the repcode offset because when we're using a dictionary the
+                 * valid offset range shrinks when the dictionary goes out of bounds.
+                 */
+                if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) {
+                    repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
+                }
+            } else {  /* repIndex < dictLimit || repIndex >= curr */
+                const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
+                                             dmsBase + repIndex - dmsIndexDelta :
+                                             dictBase + repIndex;
+                assert(curr >= windowLow);
+                if ( dictMode == ZSTD_extDict
+                  && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow)  /* equivalent to `curr > repIndex >= windowLow` */
+                     & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
+                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
+                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
+                }
+                if (dictMode == ZSTD_dictMatchState
+                  && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta))  /* equivalent to `curr > repIndex >= dmsLowLimit` */
+                     & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
+                  && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
+                    repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
+            }   }
+            /* save longer solution */
+            if (repLen > bestLength) {
+                DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
+                            repCode, ll0, repOffset, repLen);
+                bestLength = repLen;
+                matches[mnum].off = repCode - ll0;
+                matches[mnum].len = (U32)repLen;
+                mnum++;
+                if ( (repLen > sufficient_len)
+                   | (ip+repLen == iLimit) ) {  /* best possible */
+                    return mnum;
+    }   }   }   }
+
+    /* HC3 match finder */
+    if ((mls == 3) /*static*/ && (bestLength < mls)) {
+        U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip);
+        if ((matchIndex3 >= matchLow)
+          & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
+            size_t mlen;
+            if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
+                const BYTE* const match = base + matchIndex3;
+                mlen = ZSTD_count(ip, match, iLimit);
+            } else {
+                const BYTE* const match = dictBase + matchIndex3;
+                mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart);
+            }
+
+            /* save best solution */
+            if (mlen >= mls /* == 3 > bestLength */) {
+                DEBUGLOG(8, "found small match with hlog3, of length %u",
+                            (U32)mlen);
+                bestLength = mlen;
+                assert(curr > matchIndex3);
+                assert(mnum==0);  /* no prior solution */
+                matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
+                matches[0].len = (U32)mlen;
+                mnum = 1;
+                if ( (mlen > sufficient_len) |
+                     (ip+mlen == iLimit) ) {  /* best possible length */
+                    ms->nextToUpdate = curr+1;  /* skip insertion */
+                    return 1;
+        }   }   }
+        /* no dictMatchState lookup: dicts don't have a populated HC3 table */
+    }
+
+    hashTable[h] = curr;   /* Update Hash Table */
+
+    for (; nbCompares && (matchIndex >= matchLow); --nbCompares) {
+        U32* const nextPtr = bt + 2*(matchIndex & btMask);
+        const BYTE* match;
+        size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+        assert(curr > matchIndex);
+
+        if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
+            assert(matchIndex+matchLength >= dictLimit);  /* ensure the condition is correct when !extDict */
+            match = base + matchIndex;
+            if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
+            matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
+        } else {
+            match = dictBase + matchIndex;
+            assert(memcmp(match, ip, matchLength) == 0);  /* ensure early section of match is equal as expected */
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
+            if (matchIndex+matchLength >= dictLimit)
+                match = base + matchIndex;   /* prepare for match[matchLength] read */
+        }
+
+        if (matchLength > bestLength) {
+            DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
+                    (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
+            assert(matchEndIdx > matchIndex);
+            if (matchLength > matchEndIdx - matchIndex)
+                matchEndIdx = matchIndex + (U32)matchLength;
+            bestLength = matchLength;
+            matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
+            matches[mnum].len = (U32)matchLength;
+            mnum++;
+            if ( (matchLength > ZSTD_OPT_NUM)
+               | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
+                if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
+                break; /* drop, to preserve bt consistency (miss a little bit of compression) */
+            }
+        }
+
+        if (match[matchLength] < ip[matchLength]) {
+            /* match smaller than current */
+            *smallerPtr = matchIndex;             /* update smaller idx */
+            commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+            if (matchIndex <= btLow) { smallerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            smallerPtr = nextPtr+1;               /* new candidate => larger than match, which was smaller than current */
+            matchIndex = nextPtr[1];              /* new matchIndex, larger than previous, closer to current */
+        } else {
+            *largerPtr = matchIndex;
+            commonLengthLarger = matchLength;
+            if (matchIndex <= btLow) { largerPtr=&dummy32; break; }   /* beyond tree size, stop the search */
+            largerPtr = nextPtr;
+            matchIndex = nextPtr[0];
+    }   }
+
+    *smallerPtr = *largerPtr = 0;
+
+    assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
+    if (dictMode == ZSTD_dictMatchState && nbCompares) {
+        size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
+        U32 dictMatchIndex = dms->hashTable[dmsH];
+        const U32* const dmsBt = dms->chainTable;
+        commonLengthSmaller = commonLengthLarger = 0;
+        for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) {
+            const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
+            size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger);   /* guaranteed minimum nb of common bytes */
+            const BYTE* match = dmsBase + dictMatchIndex;
+            matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
+            if (dictMatchIndex+matchLength >= dmsHighLimit)
+                match = base + dictMatchIndex + dmsIndexDelta;   /* to prepare for next usage of match[matchLength] */
+
+            if (matchLength > bestLength) {
+                matchIndex = dictMatchIndex + dmsIndexDelta;
+                DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
+                        (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
+                if (matchLength > matchEndIdx - matchIndex)
+                    matchEndIdx = matchIndex + (U32)matchLength;
+                bestLength = matchLength;
+                matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
+                matches[mnum].len = (U32)matchLength;
+                mnum++;
+                if ( (matchLength > ZSTD_OPT_NUM)
+                   | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
+                    break;   /* drop, to guarantee consistency (miss a little bit of compression) */
+                }
+            }
+
+            if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
+            if (match[matchLength] < ip[matchLength]) {
+                commonLengthSmaller = matchLength;    /* all smaller will now have at least this guaranteed common length */
+                dictMatchIndex = nextPtr[1];              /* new matchIndex larger than previous (closer to current) */
+            } else {
+                /* match is larger than current */
+                commonLengthLarger = matchLength;
+                dictMatchIndex = nextPtr[0];
+            }
+        }
+    }
+
+    assert(matchEndIdx > curr+8);
+    ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
+    return mnum;
+}
+
+
+FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
+                        ZSTD_match_t* matches,   /* store result (match found, increasing size) in this table */
+                        ZSTD_matchState_t* ms,
+                        U32* nextToUpdate3,
+                        const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
+                        const U32 rep[ZSTD_REP_NUM],
+                        U32 const ll0,
+                        U32 const lengthToBeat)
+{
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    U32 const matchLengthSearch = cParams->minMatch;
+    DEBUGLOG(8, "ZSTD_BtGetAllMatches");
+    if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
+    ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
+    switch(matchLengthSearch)
+    {
+    case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
+    default :
+    case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
+    case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
+    case 7 :
+    case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
+    }
+}
+
+/* ***********************
+*  LDM helper functions  *
+*************************/
+
+/* Struct containing info needed to make decision about ldm inclusion */
+typedef struct {
+    rawSeqStore_t seqStore;         /* External match candidates store for this block */
+    U32 startPosInBlock;            /* Start position of the current match candidate */
+    U32 endPosInBlock;              /* End position of the current match candidate */
+    U32 offset;                     /* Offset of the match candidate */
+} ZSTD_optLdm_t;
+
+/* ZSTD_optLdm_skipRawSeqStoreBytes():
+ * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
+ */
+static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
+    U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
+    while (currPos && rawSeqStore->pos < rawSeqStore->size) {
+        rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
+        if (currPos >= currSeq.litLength + currSeq.matchLength) {
+            currPos -= currSeq.litLength + currSeq.matchLength;
+            rawSeqStore->pos++;
+        } else {
+            rawSeqStore->posInSequence = currPos;
+            break;
+        }
+    }
+    if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) {
+        rawSeqStore->posInSequence = 0;
+    }
+}
+
+/* ZSTD_opt_getNextMatchAndUpdateSeqStore():
+ * Calculates the beginning and end of the next match in the current block.
+ * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
+ */
+static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
+                                                   U32 blockBytesRemaining) {
+    rawSeq currSeq;
+    U32 currBlockEndPos;
+    U32 literalsBytesRemaining;
+    U32 matchBytesRemaining;
+
+    /* Setting match end position to MAX to ensure we never use an LDM during this block */
+    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
+        optLdm->startPosInBlock = UINT_MAX;
+        optLdm->endPosInBlock = UINT_MAX;
+        return;
+    }
+    /* Calculate appropriate bytes left in matchLength and litLength after adjusting
+       based on ldmSeqStore->posInSequence */
+    currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
+    assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
+    currBlockEndPos = currPosInBlock + blockBytesRemaining;
+    literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ?
+            currSeq.litLength - (U32)optLdm->seqStore.posInSequence :
+            0;
+    matchBytesRemaining = (literalsBytesRemaining == 0) ?
+            currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) :
+            currSeq.matchLength;
+
+    /* If there are more literal bytes than bytes remaining in block, no ldm is possible */
+    if (literalsBytesRemaining >= blockBytesRemaining) {
+        optLdm->startPosInBlock = UINT_MAX;
+        optLdm->endPosInBlock = UINT_MAX;
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining);
+        return;
+    }
+
+    /* Matches may be < MINMATCH by this process. In that case, we will reject them
+       when we are deciding whether or not to add the ldm */
+    optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining;
+    optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining;
+    optLdm->offset = currSeq.offset;
+
+    if (optLdm->endPosInBlock > currBlockEndPos) {
+        /* Match ends after the block ends, we can't use the whole match */
+        optLdm->endPosInBlock = currBlockEndPos;
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock);
+    } else {
+        /* Consume nb of bytes equal to size of sequence left */
+        ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining);
+    }
+}
+
+/* ZSTD_optLdm_maybeAddMatch():
+ * Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
+ * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
+ */
+static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
+                                      ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
+    U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
+    /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
+    U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
+    U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
+
+    /* Ensure that current block position is not outside of the match */
+    if (currPosInBlock < optLdm->startPosInBlock
+      || currPosInBlock >= optLdm->endPosInBlock
+      || candidateMatchLength < MINMATCH) {
+        return;
+    }
+
+    if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
+        DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
+                 candidateOffCode, candidateMatchLength, currPosInBlock);
+        matches[*nbMatches].len = candidateMatchLength;
+        matches[*nbMatches].off = candidateOffCode;
+        (*nbMatches)++;
+    }
+}
+
+/* ZSTD_optLdm_processMatchCandidate():
+ * Wrapper function to update ldm seq store and call ldm functions as necessary.
+ */
+static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
+                                              U32 currPosInBlock, U32 remainingBytes) {
+    if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
+        return;
+    }
+
+    if (currPosInBlock >= optLdm->endPosInBlock) {
+        if (currPosInBlock > optLdm->endPosInBlock) {
+            /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily
+             * at the end of a match from the ldm seq store, and will often be some bytes
+             * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
+             */
+            U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
+            ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
+        } 
+        ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
+    }
+    ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
+}
+
+/*-*******************************
+*  Optimal parser
+*********************************/
+
+
+static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
+{
+    return sol.litlen + sol.mlen;
+}
+
+#if 0 /* debug */
+
+static void
+listStats(const U32* table, int lastEltID)
+{
+    int const nbElts = lastEltID + 1;
+    int enb;
+    for (enb=0; enb < nbElts; enb++) {
+        (void)table;
+        /* RAWLOG(2, "%3i:%3i,  ", enb, table[enb]); */
+        RAWLOG(2, "%4i,", table[enb]);
+    }
+    RAWLOG(2, " \n");
+}
+
+#endif
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
+                               seqStore_t* seqStore,
+                               U32 rep[ZSTD_REP_NUM],
+                         const void* src, size_t srcSize,
+                         const int optLevel,
+                         const ZSTD_dictMode_e dictMode)
+{
+    optState_t* const optStatePtr = &ms->opt;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* ip = istart;
+    const BYTE* anchor = istart;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - 8;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const prefixStart = base + ms->window.dictLimit;
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+
+    U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
+    U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
+    U32 nextToUpdate3 = ms->nextToUpdate;
+
+    ZSTD_optimal_t* const opt = optStatePtr->priceTable;
+    ZSTD_match_t* const matches = optStatePtr->matchTable;
+    ZSTD_optimal_t lastSequence;
+    ZSTD_optLdm_t optLdm;
+
+    optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
+    optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
+    ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip));
+
+    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u",
+                (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate);
+    assert(optLevel <= 2);
+    ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
+    ip += (ip==prefixStart);
+
+    /* Match Loop */
+    while (ip < ilimit) {
+        U32 cur, last_pos = 0;
+
+        /* find first match */
+        {   U32 const litlen = (U32)(ip - anchor);
+            U32 const ll0 = !litlen;
+            U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
+            ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
+                                              (U32)(ip-istart), (U32)(iend - ip));
+            if (!nbMatches) { ip++; continue; }
+
+            /* initialize opt[0] */
+            { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
+            opt[0].mlen = 0;  /* means is_a_literal */
+            opt[0].litlen = litlen;
+            /* We don't need to include the actual price of the literals because
+             * it is static for the duration of the forward pass, and is included
+             * in every price. We include the literal length to avoid negative
+             * prices when we subtract the previous literal length.
+             */
+            opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
+
+            /* large match -> immediate encoding */
+            {   U32 const maxML = matches[nbMatches-1].len;
+                U32 const maxOffset = matches[nbMatches-1].off;
+                DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
+                            nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
+
+                if (maxML > sufficient_len) {
+                    lastSequence.litlen = litlen;
+                    lastSequence.mlen = maxML;
+                    lastSequence.off = maxOffset;
+                    DEBUGLOG(6, "large match (%u>%u), immediate encoding",
+                                maxML, sufficient_len);
+                    cur = 0;
+                    last_pos = ZSTD_totalLen(lastSequence);
+                    goto _shortestPath;
+            }   }
+
+            /* set prices for first matches starting position == 0 */
+            {   U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+                U32 pos;
+                U32 matchNb;
+                for (pos = 1; pos < minMatch; pos++) {
+                    opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
+                }
+                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
+                    U32 const offset = matches[matchNb].off;
+                    U32 const end = matches[matchNb].len;
+                    for ( ; pos <= end ; pos++ ) {
+                        U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
+                        U32 const sequencePrice = literalsPrice + matchPrice;
+                        DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
+                                    pos, ZSTD_fCost(sequencePrice));
+                        opt[pos].mlen = pos;
+                        opt[pos].off = offset;
+                        opt[pos].litlen = litlen;
+                        opt[pos].price = sequencePrice;
+                }   }
+                last_pos = pos-1;
+            }
+        }
+
+        /* check further positions */
+        for (cur = 1; cur <= last_pos; cur++) {
+            const BYTE* const inr = ip + cur;
+            assert(cur < ZSTD_OPT_NUM);
+            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
+
+            /* Fix current position with one literal if cheaper */
+            {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
+                int const price = opt[cur-1].price
+                                + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
+                                + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
+                                - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
+                assert(price < 1000000000); /* overflow check */
+                if (price <= opt[cur].price) {
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
+                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
+                                opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
+                    opt[cur].mlen = 0;
+                    opt[cur].off = 0;
+                    opt[cur].litlen = litlen;
+                    opt[cur].price = price;
+                } else {
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
+                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
+                                opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
+                }
+            }
+
+            /* Set the repcodes of the current position. We must do it here
+             * because we rely on the repcodes of the 2nd to last sequence being
+             * correct to set the next chunks repcodes during the backward
+             * traversal.
+             */
+            ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
+            assert(cur >= opt[cur].mlen);
+            if (opt[cur].mlen != 0) {
+                U32 const prev = cur - opt[cur].mlen;
+                repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
+                ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
+            } else {
+                ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
+            }
+
+            /* last match must start at a minimum distance of 8 from oend */
+            if (inr > ilimit) continue;
+
+            if (cur == last_pos) break;
+
+            if ( (optLevel==0) /*static_test*/
+              && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
+                DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
+                continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
+            }
+
+            {   U32 const ll0 = (opt[cur].mlen != 0);
+                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
+                U32 const previousPrice = opt[cur].price;
+                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+                U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
+                U32 matchNb;
+
+                ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
+                                                  (U32)(inr-istart), (U32)(iend-inr));
+
+                if (!nbMatches) {
+                    DEBUGLOG(7, "rPos:%u : no match found", cur);
+                    continue;
+                }
+
+                {   U32 const maxML = matches[nbMatches-1].len;
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
+                                inr-istart, cur, nbMatches, maxML);
+
+                    if ( (maxML > sufficient_len)
+                      || (cur + maxML >= ZSTD_OPT_NUM) ) {
+                        lastSequence.mlen = maxML;
+                        lastSequence.off = matches[nbMatches-1].off;
+                        lastSequence.litlen = litlen;
+                        cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
+                        last_pos = cur + ZSTD_totalLen(lastSequence);
+                        if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
+                        goto _shortestPath;
+                }   }
+
+                /* set prices using matches found at position == cur */
+                for (matchNb = 0; matchNb < nbMatches; matchNb++) {
+                    U32 const offset = matches[matchNb].off;
+                    U32 const lastML = matches[matchNb].len;
+                    U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
+                    U32 mlen;
+
+                    DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
+                                matchNb, matches[matchNb].off, lastML, litlen);
+
+                    for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
+                        U32 const pos = cur + mlen;
+                        int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
+
+                        if ((pos > last_pos) || (price < opt[pos].price)) {
+                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
+                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
+                            while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
+                            opt[pos].mlen = mlen;
+                            opt[pos].off = offset;
+                            opt[pos].litlen = litlen;
+                            opt[pos].price = price;
+                        } else {
+                            DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
+                                        pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
+                            if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
+                        }
+            }   }   }
+        }  /* for (cur = 1; cur <= last_pos; cur++) */
+
+        lastSequence = opt[last_pos];
+        cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
+        assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
+
+_shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
+        assert(opt[0].mlen == 0);
+
+        /* Set the next chunk's repcodes based on the repcodes of the beginning
+         * of the last match, and the last sequence. This avoids us having to
+         * update them while traversing the sequences.
+         */
+        if (lastSequence.mlen != 0) {
+            repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
+            ZSTD_memcpy(rep, &reps, sizeof(reps));
+        } else {
+            ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
+        }
+
+        {   U32 const storeEnd = cur + 1;
+            U32 storeStart = storeEnd;
+            U32 seqPos = cur;
+
+            DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
+                        last_pos, cur); (void)last_pos;
+            assert(storeEnd < ZSTD_OPT_NUM);
+            DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
+                        storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
+            opt[storeEnd] = lastSequence;
+            while (seqPos > 0) {
+                U32 const backDist = ZSTD_totalLen(opt[seqPos]);
+                storeStart--;
+                DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
+                            seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
+                opt[storeStart] = opt[seqPos];
+                seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
+            }
+
+            /* save sequences */
+            DEBUGLOG(6, "sending selected sequences into seqStore")
+            {   U32 storePos;
+                for (storePos=storeStart; storePos <= storeEnd; storePos++) {
+                    U32 const llen = opt[storePos].litlen;
+                    U32 const mlen = opt[storePos].mlen;
+                    U32 const offCode = opt[storePos].off;
+                    U32 const advance = llen + mlen;
+                    DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
+                                anchor - istart, (unsigned)llen, (unsigned)mlen);
+
+                    if (mlen==0) {  /* only literals => must be last "sequence", actually starting a new stream of sequences */
+                        assert(storePos == storeEnd);   /* must be last sequence */
+                        ip = anchor + llen;     /* last "sequence" is a bunch of literals => don't progress anchor */
+                        continue;   /* will finish */
+                    }
+
+                    assert(anchor + llen <= iend);
+                    ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
+                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
+                    anchor += advance;
+                    ip = anchor;
+            }   }
+            ZSTD_setBasePrices(optStatePtr, optLevel);
+        }
+    }   /* while (ip < ilimit) */
+
+    /* Return the last literals size */
+    return (size_t)(iend - anchor);
+}
+
+
+size_t ZSTD_compressBlock_btopt(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressBlock_btopt");
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
+}
+
+
+/* used in 2-pass strategy */
+static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
+{
+    U32 s, sum=0;
+    assert(ZSTD_FREQ_DIV+bonus >= 0);
+    for (s=0; s<lastEltIndex+1; s++) {
+        table[s] <<= ZSTD_FREQ_DIV+bonus;
+        table[s]--;
+        sum += table[s];
+    }
+    return sum;
+}
+
+/* used in 2-pass strategy */
+MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
+{
+    if (ZSTD_compressedLiterals(optPtr))
+        optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
+    optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
+    optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
+    optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
+}
+
+/* ZSTD_initStats_ultra():
+ * make a first compression pass, just to seed stats with more accurate starting values.
+ * only works on first block, with no dictionary and no ldm.
+ * this function cannot error, hence its contract must be respected.
+ */
+static void
+ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
+                     seqStore_t* seqStore,
+                     U32 rep[ZSTD_REP_NUM],
+               const void* src, size_t srcSize)
+{
+    U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
+    ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
+
+    DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize);
+    assert(ms->opt.litLengthSum == 0);    /* first block */
+    assert(seqStore->sequences == seqStore->sequencesStart);   /* no ldm */
+    assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
+    assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
+
+    ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);   /* generate stats into ms->opt*/
+
+    /* invalidate first scan from history */
+    ZSTD_resetSeqStore(seqStore);
+    ms->window.base -= srcSize;
+    ms->window.dictLimit += (U32)srcSize;
+    ms->window.lowLimit = ms->window.dictLimit;
+    ms->nextToUpdate = ms->window.dictLimit;
+
+    /* re-inforce weight of collected statistics */
+    ZSTD_upscaleStats(&ms->opt);
+}
+
+size_t ZSTD_compressBlock_btultra(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btultra2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    U32 const curr = (U32)((const BYTE*)src - ms->window.base);
+    DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize);
+
+    /* 2-pass strategy:
+     * this strategy makes a first pass over first block to collect statistics
+     * and seed next round's statistics with it.
+     * After 1st pass, function forgets everything, and starts a new block.
+     * Consequently, this can only work if no data has been previously loaded in tables,
+     * aka, no dictionary, no prefix, no ldm preprocessing.
+     * The compression ratio gain is generally small (~0.5% on first block),
+     * the cost is 2x cpu time on first block. */
+    assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
+    if ( (ms->opt.litLengthSum==0)   /* first block */
+      && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
+      && (ms->window.dictLimit == ms->window.lowLimit)   /* no dictionary */
+      && (curr == ms->window.dictLimit)   /* start of frame, nothing already loaded nor skipped */
+      && (srcSize > ZSTD_PREDEF_THRESHOLD)
+      ) {
+        ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
+    }
+
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_btopt_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_btultra_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_btopt_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
+}
+
+size_t ZSTD_compressBlock_btultra_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
+}
+
+/* note : no btultra2 variant for extDict nor dictMatchState,
+ * because btultra2 is not meant to work with dictionaries
+ * and is only specific for the first block (no prefix) */
diff --git a/lib/zstd/compress/zstd_opt.h b/lib/zstd/compress/zstd_opt.h
new file mode 100644 (file)
index 0000000..22b8628
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_OPT_H
+#define ZSTD_OPT_H
+
+
+#include "zstd_compress_internal.h"
+
+/* used in ZSTD_loadDictionaryContent() */
+void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
+
+size_t ZSTD_compressBlock_btopt(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+
+size_t ZSTD_compressBlock_btopt_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra_dictMatchState(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+size_t ZSTD_compressBlock_btopt_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_btultra_extDict(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+        /* note : no btultra2 variant for extDict nor dictMatchState,
+         * because btultra2 is not meant to work with dictionaries
+         * and is only specific for the first block (no prefix) */
+
+
+#endif /* ZSTD_OPT_H */
diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c
deleted file mode 100644 (file)
index 66cd487..0000000
+++ /dev/null
@@ -1,2531 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-/* ***************************************************************
-*  Tuning parameters
-*****************************************************************/
-/*!
-*  MAXWINDOWSIZE_DEFAULT :
-*  maximum window size accepted by DStream, by default.
-*  Frames requiring more memory will be rejected.
-*/
-#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
-#define ZSTD_MAXWINDOWSIZE_DEFAULT ((1 << ZSTD_WINDOWLOG_MAX) + 1) /* defined within zstd.h */
-#endif
-
-/*-*******************************************************
-*  Dependencies
-*********************************************************/
-#include "fse.h"
-#include "huf.h"
-#include "mem.h" /* low level memory routines */
-#include "zstd_internal.h"
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h> /* memcpy, memmove, memset */
-
-#define ZSTD_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
-
-/*-*************************************
-*  Macros
-***************************************/
-#define ZSTD_isError ERR_isError /* for inlining */
-#define FSE_isError ERR_isError
-#define HUF_isError ERR_isError
-
-/*_*******************************************************
-*  Memory operations
-**********************************************************/
-static void ZSTD_copy4(void *dst, const void *src) { memcpy(dst, src, 4); }
-
-/*-*************************************************************
-*   Context management
-***************************************************************/
-typedef enum {
-       ZSTDds_getFrameHeaderSize,
-       ZSTDds_decodeFrameHeader,
-       ZSTDds_decodeBlockHeader,
-       ZSTDds_decompressBlock,
-       ZSTDds_decompressLastBlock,
-       ZSTDds_checkChecksum,
-       ZSTDds_decodeSkippableHeader,
-       ZSTDds_skipFrame
-} ZSTD_dStage;
-
-typedef struct {
-       FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
-       FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
-       FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
-       HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
-       U64 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32 / 2];
-       U32 rep[ZSTD_REP_NUM];
-} ZSTD_entropyTables_t;
-
-struct ZSTD_DCtx_s {
-       const FSE_DTable *LLTptr;
-       const FSE_DTable *MLTptr;
-       const FSE_DTable *OFTptr;
-       const HUF_DTable *HUFptr;
-       ZSTD_entropyTables_t entropy;
-       const void *previousDstEnd; /* detect continuity */
-       const void *base;          /* start of curr segment */
-       const void *vBase;        /* virtual start of previous segment if it was just before curr one */
-       const void *dictEnd;    /* end of previous segment */
-       size_t expected;
-       ZSTD_frameParams fParams;
-       blockType_e bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */
-       ZSTD_dStage stage;
-       U32 litEntropy;
-       U32 fseEntropy;
-       struct xxh64_state xxhState;
-       size_t headerSize;
-       U32 dictID;
-       const BYTE *litPtr;
-       ZSTD_customMem customMem;
-       size_t litSize;
-       size_t rleSize;
-       BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH];
-       BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
-}; /* typedef'd to ZSTD_DCtx within "zstd.h" */
-
-size_t ZSTD_DCtxWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DCtx)); }
-
-size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx)
-{
-       dctx->expected = ZSTD_frameHeaderSize_prefix;
-       dctx->stage = ZSTDds_getFrameHeaderSize;
-       dctx->previousDstEnd = NULL;
-       dctx->base = NULL;
-       dctx->vBase = NULL;
-       dctx->dictEnd = NULL;
-       dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
-       dctx->litEntropy = dctx->fseEntropy = 0;
-       dctx->dictID = 0;
-       ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
-       memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
-       dctx->LLTptr = dctx->entropy.LLTable;
-       dctx->MLTptr = dctx->entropy.MLTable;
-       dctx->OFTptr = dctx->entropy.OFTable;
-       dctx->HUFptr = dctx->entropy.hufTable;
-       return 0;
-}
-
-ZSTD_DCtx *ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
-{
-       ZSTD_DCtx *dctx;
-
-       if (!customMem.customAlloc || !customMem.customFree)
-               return NULL;
-
-       dctx = (ZSTD_DCtx *)ZSTD_malloc(sizeof(ZSTD_DCtx), customMem);
-       if (!dctx)
-               return NULL;
-       memcpy(&dctx->customMem, &customMem, sizeof(customMem));
-       ZSTD_decompressBegin(dctx);
-       return dctx;
-}
-
-ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize)
-{
-       ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-       return ZSTD_createDCtx_advanced(stackMem);
-}
-
-size_t ZSTD_freeDCtx(ZSTD_DCtx *dctx)
-{
-       if (dctx == NULL)
-               return 0; /* support free on NULL */
-       ZSTD_free(dctx, dctx->customMem);
-       return 0; /* reserved as a potential error code in the future */
-}
-
-void ZSTD_copyDCtx(ZSTD_DCtx *dstDCtx, const ZSTD_DCtx *srcDCtx)
-{
-       size_t const workSpaceSize = (ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH) + ZSTD_frameHeaderSize_max;
-       memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need to copy workspace */
-}
-
-static void ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict);
-
-/*-*************************************************************
-*   Decompression section
-***************************************************************/
-
-/*! ZSTD_isFrame() :
- *  Tells if the content of `buffer` starts with a valid Frame Identifier.
- *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
- *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
- *  Note 3 : Skippable Frame Identifiers are considered valid. */
-unsigned ZSTD_isFrame(const void *buffer, size_t size)
-{
-       if (size < 4)
-               return 0;
-       {
-               U32 const magic = ZSTD_readLE32(buffer);
-               if (magic == ZSTD_MAGICNUMBER)
-                       return 1;
-               if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START)
-                       return 1;
-       }
-       return 0;
-}
-
-/** ZSTD_frameHeaderSize() :
-*   srcSize must be >= ZSTD_frameHeaderSize_prefix.
-*   @return : size of the Frame Header */
-static size_t ZSTD_frameHeaderSize(const void *src, size_t srcSize)
-{
-       if (srcSize < ZSTD_frameHeaderSize_prefix)
-               return ERROR(srcSize_wrong);
-       {
-               BYTE const fhd = ((const BYTE *)src)[4];
-               U32 const dictID = fhd & 3;
-               U32 const singleSegment = (fhd >> 5) & 1;
-               U32 const fcsId = fhd >> 6;
-               return ZSTD_frameHeaderSize_prefix + !singleSegment + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + (singleSegment && !fcsId);
-       }
-}
-
-/** ZSTD_getFrameParams() :
-*   decode Frame Header, or require larger `srcSize`.
-*   @return : 0, `fparamsPtr` is correctly filled,
-*            >0, `srcSize` is too small, result is expected `srcSize`,
-*             or an error code, which can be tested using ZSTD_isError() */
-size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, size_t srcSize)
-{
-       const BYTE *ip = (const BYTE *)src;
-
-       if (srcSize < ZSTD_frameHeaderSize_prefix)
-               return ZSTD_frameHeaderSize_prefix;
-       if (ZSTD_readLE32(src) != ZSTD_MAGICNUMBER) {
-               if ((ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
-                       if (srcSize < ZSTD_skippableHeaderSize)
-                               return ZSTD_skippableHeaderSize; /* magic number + skippable frame length */
-                       memset(fparamsPtr, 0, sizeof(*fparamsPtr));
-                       fparamsPtr->frameContentSize = ZSTD_readLE32((const char *)src + 4);
-                       fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */
-                       return 0;
-               }
-               return ERROR(prefix_unknown);
-       }
-
-       /* ensure there is enough `srcSize` to fully read/decode frame header */
-       {
-               size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize);
-               if (srcSize < fhsize)
-                       return fhsize;
-       }
-
-       {
-               BYTE const fhdByte = ip[4];
-               size_t pos = 5;
-               U32 const dictIDSizeCode = fhdByte & 3;
-               U32 const checksumFlag = (fhdByte >> 2) & 1;
-               U32 const singleSegment = (fhdByte >> 5) & 1;
-               U32 const fcsID = fhdByte >> 6;
-               U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;
-               U32 windowSize = 0;
-               U32 dictID = 0;
-               U64 frameContentSize = 0;
-               if ((fhdByte & 0x08) != 0)
-                       return ERROR(frameParameter_unsupported); /* reserved bits, which must be zero */
-               if (!singleSegment) {
-                       BYTE const wlByte = ip[pos++];
-                       U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
-                       if (windowLog > ZSTD_WINDOWLOG_MAX)
-                               return ERROR(frameParameter_windowTooLarge); /* avoids issue with 1 << windowLog */
-                       windowSize = (1U << windowLog);
-                       windowSize += (windowSize >> 3) * (wlByte & 7);
-               }
-
-               switch (dictIDSizeCode) {
-               default: /* impossible */
-               case 0: break;
-               case 1:
-                       dictID = ip[pos];
-                       pos++;
-                       break;
-               case 2:
-                       dictID = ZSTD_readLE16(ip + pos);
-                       pos += 2;
-                       break;
-               case 3:
-                       dictID = ZSTD_readLE32(ip + pos);
-                       pos += 4;
-                       break;
-               }
-               switch (fcsID) {
-               default: /* impossible */
-               case 0:
-                       if (singleSegment)
-                               frameContentSize = ip[pos];
-                       break;
-               case 1: frameContentSize = ZSTD_readLE16(ip + pos) + 256; break;
-               case 2: frameContentSize = ZSTD_readLE32(ip + pos); break;
-               case 3: frameContentSize = ZSTD_readLE64(ip + pos); break;
-               }
-               if (!windowSize)
-                       windowSize = (U32)frameContentSize;
-               if (windowSize > windowSizeMax)
-                       return ERROR(frameParameter_windowTooLarge);
-               fparamsPtr->frameContentSize = frameContentSize;
-               fparamsPtr->windowSize = windowSize;
-               fparamsPtr->dictID = dictID;
-               fparamsPtr->checksumFlag = checksumFlag;
-       }
-       return 0;
-}
-
-/** ZSTD_getFrameContentSize() :
-*   compatible with legacy mode
-*   @return : decompressed size of the single frame pointed to be `src` if known, otherwise
-*             - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
-*             - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
-unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
-{
-       {
-               ZSTD_frameParams fParams;
-               if (ZSTD_getFrameParams(&fParams, src, srcSize) != 0)
-                       return ZSTD_CONTENTSIZE_ERROR;
-               if (fParams.windowSize == 0) {
-                       /* Either skippable or empty frame, size == 0 either way */
-                       return 0;
-               } else if (fParams.frameContentSize != 0) {
-                       return fParams.frameContentSize;
-               } else {
-                       return ZSTD_CONTENTSIZE_UNKNOWN;
-               }
-       }
-}
-
-/** ZSTD_findDecompressedSize() :
- *  compatible with legacy mode
- *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
- *      skippable frames
- *  @return : decompressed size of the frames contained */
-unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize)
-{
-       {
-               unsigned long long totalDstSize = 0;
-               while (srcSize >= ZSTD_frameHeaderSize_prefix) {
-                       const U32 magicNumber = ZSTD_readLE32(src);
-
-                       if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
-                               size_t skippableSize;
-                               if (srcSize < ZSTD_skippableHeaderSize)
-                                       return ERROR(srcSize_wrong);
-                               skippableSize = ZSTD_readLE32((const BYTE *)src + 4) + ZSTD_skippableHeaderSize;
-                               if (srcSize < skippableSize) {
-                                       return ZSTD_CONTENTSIZE_ERROR;
-                               }
-
-                               src = (const BYTE *)src + skippableSize;
-                               srcSize -= skippableSize;
-                               continue;
-                       }
-
-                       {
-                               unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
-                               if (ret >= ZSTD_CONTENTSIZE_ERROR)
-                                       return ret;
-
-                               /* check for overflow */
-                               if (totalDstSize + ret < totalDstSize)
-                                       return ZSTD_CONTENTSIZE_ERROR;
-                               totalDstSize += ret;
-                       }
-                       {
-                               size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
-                               if (ZSTD_isError(frameSrcSize)) {
-                                       return ZSTD_CONTENTSIZE_ERROR;
-                               }
-
-                               src = (const BYTE *)src + frameSrcSize;
-                               srcSize -= frameSrcSize;
-                       }
-               }
-
-               if (srcSize) {
-                       return ZSTD_CONTENTSIZE_ERROR;
-               }
-
-               return totalDstSize;
-       }
-}
-
-/** ZSTD_decodeFrameHeader() :
-*   `headerSize` must be the size provided by ZSTD_frameHeaderSize().
-*   @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
-static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx *dctx, const void *src, size_t headerSize)
-{
-       size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, headerSize);
-       if (ZSTD_isError(result))
-               return result; /* invalid header */
-       if (result > 0)
-               return ERROR(srcSize_wrong); /* headerSize too small */
-       if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID))
-               return ERROR(dictionary_wrong);
-       if (dctx->fParams.checksumFlag)
-               xxh64_reset(&dctx->xxhState, 0);
-       return 0;
-}
-
-typedef struct {
-       blockType_e blockType;
-       U32 lastBlock;
-       U32 origSize;
-} blockProperties_t;
-
-/*! ZSTD_getcBlockSize() :
-*   Provides the size of compressed block from block header `src` */
-size_t ZSTD_getcBlockSize(const void *src, size_t srcSize, blockProperties_t *bpPtr)
-{
-       if (srcSize < ZSTD_blockHeaderSize)
-               return ERROR(srcSize_wrong);
-       {
-               U32 const cBlockHeader = ZSTD_readLE24(src);
-               U32 const cSize = cBlockHeader >> 3;
-               bpPtr->lastBlock = cBlockHeader & 1;
-               bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
-               bpPtr->origSize = cSize; /* only useful for RLE */
-               if (bpPtr->blockType == bt_rle)
-                       return 1;
-               if (bpPtr->blockType == bt_reserved)
-                       return ERROR(corruption_detected);
-               return cSize;
-       }
-}
-
-static size_t ZSTD_copyRawBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-       if (srcSize > dstCapacity)
-               return ERROR(dstSize_tooSmall);
-       memcpy(dst, src, srcSize);
-       return srcSize;
-}
-
-static size_t ZSTD_setRleBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize, size_t regenSize)
-{
-       if (srcSize != 1)
-               return ERROR(srcSize_wrong);
-       if (regenSize > dstCapacity)
-               return ERROR(dstSize_tooSmall);
-       memset(dst, *(const BYTE *)src, regenSize);
-       return regenSize;
-}
-
-/*! ZSTD_decodeLiteralsBlock() :
-       @return : nb of bytes read from src (< srcSize ) */
-size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx *dctx, const void *src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
-{
-       if (srcSize < MIN_CBLOCK_SIZE)
-               return ERROR(corruption_detected);
-
-       {
-               const BYTE *const istart = (const BYTE *)src;
-               symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
-
-               switch (litEncType) {
-               case set_repeat:
-                       if (dctx->litEntropy == 0)
-                               return ERROR(dictionary_corrupted);
-                       fallthrough;
-               case set_compressed:
-                       if (srcSize < 5)
-                               return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
-                       {
-                               size_t lhSize, litSize, litCSize;
-                               U32 singleStream = 0;
-                               U32 const lhlCode = (istart[0] >> 2) & 3;
-                               U32 const lhc = ZSTD_readLE32(istart);
-                               switch (lhlCode) {
-                               case 0:
-                               case 1:
-                               default: /* note : default is impossible, since lhlCode into [0..3] */
-                                       /* 2 - 2 - 10 - 10 */
-                                       singleStream = !lhlCode;
-                                       lhSize = 3;
-                                       litSize = (lhc >> 4) & 0x3FF;
-                                       litCSize = (lhc >> 14) & 0x3FF;
-                                       break;
-                               case 2:
-                                       /* 2 - 2 - 14 - 14 */
-                                       lhSize = 4;
-                                       litSize = (lhc >> 4) & 0x3FFF;
-                                       litCSize = lhc >> 18;
-                                       break;
-                               case 3:
-                                       /* 2 - 2 - 18 - 18 */
-                                       lhSize = 5;
-                                       litSize = (lhc >> 4) & 0x3FFFF;
-                                       litCSize = (lhc >> 22) + (istart[4] << 10);
-                                       break;
-                               }
-                               if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX)
-                                       return ERROR(corruption_detected);
-                               if (litCSize + lhSize > srcSize)
-                                       return ERROR(corruption_detected);
-
-                               if (HUF_isError(
-                                       (litEncType == set_repeat)
-                                           ? (singleStream ? HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr)
-                                                           : HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr))
-                                           : (singleStream
-                                                  ? HUF_decompress1X2_DCtx_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize, litCSize,
-                                                                                dctx->entropy.workspace, sizeof(dctx->entropy.workspace))
-                                                  : HUF_decompress4X_hufOnly_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize, litCSize,
-                                                                                  dctx->entropy.workspace, sizeof(dctx->entropy.workspace)))))
-                                       return ERROR(corruption_detected);
-
-                               dctx->litPtr = dctx->litBuffer;
-                               dctx->litSize = litSize;
-                               dctx->litEntropy = 1;
-                               if (litEncType == set_compressed)
-                                       dctx->HUFptr = dctx->entropy.hufTable;
-                               memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
-                               return litCSize + lhSize;
-                       }
-
-               case set_basic: {
-                       size_t litSize, lhSize;
-                       U32 const lhlCode = ((istart[0]) >> 2) & 3;
-                       switch (lhlCode) {
-                       case 0:
-                       case 2:
-                       default: /* note : default is impossible, since lhlCode into [0..3] */
-                               lhSize = 1;
-                               litSize = istart[0] >> 3;
-                               break;
-                       case 1:
-                               lhSize = 2;
-                               litSize = ZSTD_readLE16(istart) >> 4;
-                               break;
-                       case 3:
-                               lhSize = 3;
-                               litSize = ZSTD_readLE24(istart) >> 4;
-                               break;
-                       }
-
-                       if (lhSize + litSize + WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
-                               if (litSize + lhSize > srcSize)
-                                       return ERROR(corruption_detected);
-                               memcpy(dctx->litBuffer, istart + lhSize, litSize);
-                               dctx->litPtr = dctx->litBuffer;
-                               dctx->litSize = litSize;
-                               memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
-                               return lhSize + litSize;
-                       }
-                       /* direct reference into compressed stream */
-                       dctx->litPtr = istart + lhSize;
-                       dctx->litSize = litSize;
-                       return lhSize + litSize;
-               }
-
-               case set_rle: {
-                       U32 const lhlCode = ((istart[0]) >> 2) & 3;
-                       size_t litSize, lhSize;
-                       switch (lhlCode) {
-                       case 0:
-                       case 2:
-                       default: /* note : default is impossible, since lhlCode into [0..3] */
-                               lhSize = 1;
-                               litSize = istart[0] >> 3;
-                               break;
-                       case 1:
-                               lhSize = 2;
-                               litSize = ZSTD_readLE16(istart) >> 4;
-                               break;
-                       case 3:
-                               lhSize = 3;
-                               litSize = ZSTD_readLE24(istart) >> 4;
-                               if (srcSize < 4)
-                                       return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */
-                               break;
-                       }
-                       if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX)
-                               return ERROR(corruption_detected);
-                       memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
-                       dctx->litPtr = dctx->litBuffer;
-                       dctx->litSize = litSize;
-                       return lhSize + 1;
-               }
-               default:
-                       return ERROR(corruption_detected); /* impossible */
-               }
-       }
-}
-
-typedef union {
-       FSE_decode_t realData;
-       U32 alignedBy4;
-} FSE_decode_t4;
-
-static const FSE_decode_t4 LL_defaultDTable[(1 << LL_DEFAULTNORMLOG) + 1] = {
-    {{LL_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */
-    {{0, 0, 4}},                /* 0 : base, symbol, bits */
-    {{16, 0, 4}},
-    {{32, 1, 5}},
-    {{0, 3, 5}},
-    {{0, 4, 5}},
-    {{0, 6, 5}},
-    {{0, 7, 5}},
-    {{0, 9, 5}},
-    {{0, 10, 5}},
-    {{0, 12, 5}},
-    {{0, 14, 6}},
-    {{0, 16, 5}},
-    {{0, 18, 5}},
-    {{0, 19, 5}},
-    {{0, 21, 5}},
-    {{0, 22, 5}},
-    {{0, 24, 5}},
-    {{32, 25, 5}},
-    {{0, 26, 5}},
-    {{0, 27, 6}},
-    {{0, 29, 6}},
-    {{0, 31, 6}},
-    {{32, 0, 4}},
-    {{0, 1, 4}},
-    {{0, 2, 5}},
-    {{32, 4, 5}},
-    {{0, 5, 5}},
-    {{32, 7, 5}},
-    {{0, 8, 5}},
-    {{32, 10, 5}},
-    {{0, 11, 5}},
-    {{0, 13, 6}},
-    {{32, 16, 5}},
-    {{0, 17, 5}},
-    {{32, 19, 5}},
-    {{0, 20, 5}},
-    {{32, 22, 5}},
-    {{0, 23, 5}},
-    {{0, 25, 4}},
-    {{16, 25, 4}},
-    {{32, 26, 5}},
-    {{0, 28, 6}},
-    {{0, 30, 6}},
-    {{48, 0, 4}},
-    {{16, 1, 4}},
-    {{32, 2, 5}},
-    {{32, 3, 5}},
-    {{32, 5, 5}},
-    {{32, 6, 5}},
-    {{32, 8, 5}},
-    {{32, 9, 5}},
-    {{32, 11, 5}},
-    {{32, 12, 5}},
-    {{0, 15, 6}},
-    {{32, 17, 5}},
-    {{32, 18, 5}},
-    {{32, 20, 5}},
-    {{32, 21, 5}},
-    {{32, 23, 5}},
-    {{32, 24, 5}},
-    {{0, 35, 6}},
-    {{0, 34, 6}},
-    {{0, 33, 6}},
-    {{0, 32, 6}},
-}; /* LL_defaultDTable */
-
-static const FSE_decode_t4 ML_defaultDTable[(1 << ML_DEFAULTNORMLOG) + 1] = {
-    {{ML_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */
-    {{0, 0, 6}},                /* 0 : base, symbol, bits */
-    {{0, 1, 4}},
-    {{32, 2, 5}},
-    {{0, 3, 5}},
-    {{0, 5, 5}},
-    {{0, 6, 5}},
-    {{0, 8, 5}},
-    {{0, 10, 6}},
-    {{0, 13, 6}},
-    {{0, 16, 6}},
-    {{0, 19, 6}},
-    {{0, 22, 6}},
-    {{0, 25, 6}},
-    {{0, 28, 6}},
-    {{0, 31, 6}},
-    {{0, 33, 6}},
-    {{0, 35, 6}},
-    {{0, 37, 6}},
-    {{0, 39, 6}},
-    {{0, 41, 6}},
-    {{0, 43, 6}},
-    {{0, 45, 6}},
-    {{16, 1, 4}},
-    {{0, 2, 4}},
-    {{32, 3, 5}},
-    {{0, 4, 5}},
-    {{32, 6, 5}},
-    {{0, 7, 5}},
-    {{0, 9, 6}},
-    {{0, 12, 6}},
-    {{0, 15, 6}},
-    {{0, 18, 6}},
-    {{0, 21, 6}},
-    {{0, 24, 6}},
-    {{0, 27, 6}},
-    {{0, 30, 6}},
-    {{0, 32, 6}},
-    {{0, 34, 6}},
-    {{0, 36, 6}},
-    {{0, 38, 6}},
-    {{0, 40, 6}},
-    {{0, 42, 6}},
-    {{0, 44, 6}},
-    {{32, 1, 4}},
-    {{48, 1, 4}},
-    {{16, 2, 4}},
-    {{32, 4, 5}},
-    {{32, 5, 5}},
-    {{32, 7, 5}},
-    {{32, 8, 5}},
-    {{0, 11, 6}},
-    {{0, 14, 6}},
-    {{0, 17, 6}},
-    {{0, 20, 6}},
-    {{0, 23, 6}},
-    {{0, 26, 6}},
-    {{0, 29, 6}},
-    {{0, 52, 6}},
-    {{0, 51, 6}},
-    {{0, 50, 6}},
-    {{0, 49, 6}},
-    {{0, 48, 6}},
-    {{0, 47, 6}},
-    {{0, 46, 6}},
-}; /* ML_defaultDTable */
-
-static const FSE_decode_t4 OF_defaultDTable[(1 << OF_DEFAULTNORMLOG) + 1] = {
-    {{OF_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */
-    {{0, 0, 5}},                /* 0 : base, symbol, bits */
-    {{0, 6, 4}},
-    {{0, 9, 5}},
-    {{0, 15, 5}},
-    {{0, 21, 5}},
-    {{0, 3, 5}},
-    {{0, 7, 4}},
-    {{0, 12, 5}},
-    {{0, 18, 5}},
-    {{0, 23, 5}},
-    {{0, 5, 5}},
-    {{0, 8, 4}},
-    {{0, 14, 5}},
-    {{0, 20, 5}},
-    {{0, 2, 5}},
-    {{16, 7, 4}},
-    {{0, 11, 5}},
-    {{0, 17, 5}},
-    {{0, 22, 5}},
-    {{0, 4, 5}},
-    {{16, 8, 4}},
-    {{0, 13, 5}},
-    {{0, 19, 5}},
-    {{0, 1, 5}},
-    {{16, 6, 4}},
-    {{0, 10, 5}},
-    {{0, 16, 5}},
-    {{0, 28, 5}},
-    {{0, 27, 5}},
-    {{0, 26, 5}},
-    {{0, 25, 5}},
-    {{0, 24, 5}},
-}; /* OF_defaultDTable */
-
-/*! ZSTD_buildSeqTable() :
-       @return : nb bytes read from src,
-                         or an error code if it fails, testable with ZSTD_isError()
-*/
-static size_t ZSTD_buildSeqTable(FSE_DTable *DTableSpace, const FSE_DTable **DTablePtr, symbolEncodingType_e type, U32 max, U32 maxLog, const void *src,
-                                size_t srcSize, const FSE_decode_t4 *defaultTable, U32 flagRepeatTable, void *workspace, size_t workspaceSize)
-{
-       const void *const tmpPtr = defaultTable; /* bypass strict aliasing */
-       switch (type) {
-       case set_rle:
-               if (!srcSize)
-                       return ERROR(srcSize_wrong);
-               if ((*(const BYTE *)src) > max)
-                       return ERROR(corruption_detected);
-               FSE_buildDTable_rle(DTableSpace, *(const BYTE *)src);
-               *DTablePtr = DTableSpace;
-               return 1;
-       case set_basic: *DTablePtr = (const FSE_DTable *)tmpPtr; return 0;
-       case set_repeat:
-               if (!flagRepeatTable)
-                       return ERROR(corruption_detected);
-               return 0;
-       default: /* impossible */
-       case set_compressed: {
-               U32 tableLog;
-               S16 *norm = (S16 *)workspace;
-               size_t const spaceUsed32 = ALIGN(sizeof(S16) * (MaxSeq + 1), sizeof(U32)) >> 2;
-
-               if ((spaceUsed32 << 2) > workspaceSize)
-                       return ERROR(GENERIC);
-               workspace = (U32 *)workspace + spaceUsed32;
-               workspaceSize -= (spaceUsed32 << 2);
-               {
-                       size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
-                       if (FSE_isError(headerSize))
-                               return ERROR(corruption_detected);
-                       if (tableLog > maxLog)
-                               return ERROR(corruption_detected);
-                       FSE_buildDTable_wksp(DTableSpace, norm, max, tableLog, workspace, workspaceSize);
-                       *DTablePtr = DTableSpace;
-                       return headerSize;
-               }
-       }
-       }
-}
-
-size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx *dctx, int *nbSeqPtr, const void *src, size_t srcSize)
-{
-       const BYTE *const istart = (const BYTE *const)src;
-       const BYTE *const iend = istart + srcSize;
-       const BYTE *ip = istart;
-
-       /* check */
-       if (srcSize < MIN_SEQUENCES_SIZE)
-               return ERROR(srcSize_wrong);
-
-       /* SeqHead */
-       {
-               int nbSeq = *ip++;
-               if (!nbSeq) {
-                       *nbSeqPtr = 0;
-                       return 1;
-               }
-               if (nbSeq > 0x7F) {
-                       if (nbSeq == 0xFF) {
-                               if (ip + 2 > iend)
-                                       return ERROR(srcSize_wrong);
-                               nbSeq = ZSTD_readLE16(ip) + LONGNBSEQ, ip += 2;
-                       } else {
-                               if (ip >= iend)
-                                       return ERROR(srcSize_wrong);
-                               nbSeq = ((nbSeq - 0x80) << 8) + *ip++;
-                       }
-               }
-               *nbSeqPtr = nbSeq;
-       }
-
-       /* FSE table descriptors */
-       if (ip + 4 > iend)
-               return ERROR(srcSize_wrong); /* minimum possible size */
-       {
-               symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
-               symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
-               symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
-               ip++;
-
-               /* Build DTables */
-               {
-                       size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, LLtype, MaxLL, LLFSELog, ip, iend - ip,
-                                                                 LL_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace));
-                       if (ZSTD_isError(llhSize))
-                               return ERROR(corruption_detected);
-                       ip += llhSize;
-               }
-               {
-                       size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, OFtype, MaxOff, OffFSELog, ip, iend - ip,
-                                                                 OF_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace));
-                       if (ZSTD_isError(ofhSize))
-                               return ERROR(corruption_detected);
-                       ip += ofhSize;
-               }
-               {
-                       size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, MLtype, MaxML, MLFSELog, ip, iend - ip,
-                                                                 ML_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace));
-                       if (ZSTD_isError(mlhSize))
-                               return ERROR(corruption_detected);
-                       ip += mlhSize;
-               }
-       }
-
-       return ip - istart;
-}
-
-typedef struct {
-       size_t litLength;
-       size_t matchLength;
-       size_t offset;
-       const BYTE *match;
-} seq_t;
-
-typedef struct {
-       BIT_DStream_t DStream;
-       FSE_DState_t stateLL;
-       FSE_DState_t stateOffb;
-       FSE_DState_t stateML;
-       size_t prevOffset[ZSTD_REP_NUM];
-       const BYTE *base;
-       size_t pos;
-       uPtrDiff gotoDict;
-} seqState_t;
-
-FORCE_NOINLINE
-size_t ZSTD_execSequenceLast7(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base,
-                             const BYTE *const vBase, const BYTE *const dictEnd)
-{
-       BYTE *const oLitEnd = op + sequence.litLength;
-       size_t const sequenceLength = sequence.litLength + sequence.matchLength;
-       BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
-       BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH;
-       const BYTE *const iLitEnd = *litPtr + sequence.litLength;
-       const BYTE *match = oLitEnd - sequence.offset;
-
-       /* check */
-       if (oMatchEnd > oend)
-               return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
-       if (iLitEnd > litLimit)
-               return ERROR(corruption_detected); /* over-read beyond lit buffer */
-       if (oLitEnd <= oend_w)
-               return ERROR(GENERIC); /* Precondition */
-
-       /* copy literals */
-       if (op < oend_w) {
-               ZSTD_wildcopy(op, *litPtr, oend_w - op);
-               *litPtr += oend_w - op;
-               op = oend_w;
-       }
-       while (op < oLitEnd)
-               *op++ = *(*litPtr)++;
-
-       /* copy Match */
-       if (sequence.offset > (size_t)(oLitEnd - base)) {
-               /* offset beyond prefix */
-               if (sequence.offset > (size_t)(oLitEnd - vBase))
-                       return ERROR(corruption_detected);
-               match = dictEnd - (base - match);
-               if (match + sequence.matchLength <= dictEnd) {
-                       memmove(oLitEnd, match, sequence.matchLength);
-                       return sequenceLength;
-               }
-               /* span extDict & currPrefixSegment */
-               {
-                       size_t const length1 = dictEnd - match;
-                       memmove(oLitEnd, match, length1);
-                       op = oLitEnd + length1;
-                       sequence.matchLength -= length1;
-                       match = base;
-               }
-       }
-       while (op < oMatchEnd)
-               *op++ = *match++;
-       return sequenceLength;
-}
-
-static seq_t ZSTD_decodeSequence(seqState_t *seqState)
-{
-       seq_t seq;
-
-       U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
-       U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
-       U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */
-
-       U32 const llBits = LL_bits[llCode];
-       U32 const mlBits = ML_bits[mlCode];
-       U32 const ofBits = ofCode;
-       U32 const totalBits = llBits + mlBits + ofBits;
-
-       static const U32 LL_base[MaxLL + 1] = {0,  1,  2,  3,  4,  5,  6,  7,  8,    9,     10,    11,    12,    13,     14,     15,     16,     18,
-                                              20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000};
-
-       static const U32 ML_base[MaxML + 1] = {3,  4,  5,  6,  7,  8,  9,  10,   11,    12,    13,    14,    15,     16,     17,     18,     19,     20,
-                                              21, 22, 23, 24, 25, 26, 27, 28,   29,    30,    31,    32,    33,     34,     35,     37,     39,     41,
-                                              43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003};
-
-       static const U32 OF_base[MaxOff + 1] = {0,       1,     1,      5,      0xD,      0x1D,      0x3D,      0x7D,      0xFD,     0x1FD,
-                                               0x3FD,   0x7FD,    0xFFD,    0x1FFD,   0x3FFD,   0x7FFD,    0xFFFD,    0x1FFFD,   0x3FFFD,  0x7FFFD,
-                                               0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD};
-
-       /* sequence */
-       {
-               size_t offset;
-               if (!ofCode)
-                       offset = 0;
-               else {
-                       offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
-                       if (ZSTD_32bits())
-                               BIT_reloadDStream(&seqState->DStream);
-               }
-
-               if (ofCode <= 1) {
-                       offset += (llCode == 0);
-                       if (offset) {
-                               size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
-                               temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
-                               if (offset != 1)
-                                       seqState->prevOffset[2] = seqState->prevOffset[1];
-                               seqState->prevOffset[1] = seqState->prevOffset[0];
-                               seqState->prevOffset[0] = offset = temp;
-                       } else {
-                               offset = seqState->prevOffset[0];
-                       }
-               } else {
-                       seqState->prevOffset[2] = seqState->prevOffset[1];
-                       seqState->prevOffset[1] = seqState->prevOffset[0];
-                       seqState->prevOffset[0] = offset;
-               }
-               seq.offset = offset;
-       }
-
-       seq.matchLength = ML_base[mlCode] + ((mlCode > 31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <=  16 bits */
-       if (ZSTD_32bits() && (mlBits + llBits > 24))
-               BIT_reloadDStream(&seqState->DStream);
-
-       seq.litLength = LL_base[llCode] + ((llCode > 15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <=  16 bits */
-       if (ZSTD_32bits() || (totalBits > 64 - 7 - (LLFSELog + MLFSELog + OffFSELog)))
-               BIT_reloadDStream(&seqState->DStream);
-
-       /* ANS state update */
-       FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <=  9 bits */
-       FSE_updateState(&seqState->stateML, &seqState->DStream); /* <=  9 bits */
-       if (ZSTD_32bits())
-               BIT_reloadDStream(&seqState->DStream);             /* <= 18 bits */
-       FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <=  8 bits */
-
-       seq.match = NULL;
-
-       return seq;
-}
-
-FORCE_INLINE
-size_t ZSTD_execSequence(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base,
-                        const BYTE *const vBase, const BYTE *const dictEnd)
-{
-       BYTE *const oLitEnd = op + sequence.litLength;
-       size_t const sequenceLength = sequence.litLength + sequence.matchLength;
-       BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
-       BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH;
-       const BYTE *const iLitEnd = *litPtr + sequence.litLength;
-       const BYTE *match = oLitEnd - sequence.offset;
-
-       /* check */
-       if (oMatchEnd > oend)
-               return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
-       if (iLitEnd > litLimit)
-               return ERROR(corruption_detected); /* over-read beyond lit buffer */
-       if (oLitEnd > oend_w)
-               return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd);
-
-       /* copy Literals */
-       ZSTD_copy8(op, *litPtr);
-       if (sequence.litLength > 8)
-               ZSTD_wildcopy(op + 8, (*litPtr) + 8,
-                             sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
-       op = oLitEnd;
-       *litPtr = iLitEnd; /* update for next sequence */
-
-       /* copy Match */
-       if (sequence.offset > (size_t)(oLitEnd - base)) {
-               /* offset beyond prefix */
-               if (sequence.offset > (size_t)(oLitEnd - vBase))
-                       return ERROR(corruption_detected);
-               match = dictEnd + (match - base);
-               if (match + sequence.matchLength <= dictEnd) {
-                       memmove(oLitEnd, match, sequence.matchLength);
-                       return sequenceLength;
-               }
-               /* span extDict & currPrefixSegment */
-               {
-                       size_t const length1 = dictEnd - match;
-                       memmove(oLitEnd, match, length1);
-                       op = oLitEnd + length1;
-                       sequence.matchLength -= length1;
-                       match = base;
-                       if (op > oend_w || sequence.matchLength < MINMATCH) {
-                               U32 i;
-                               for (i = 0; i < sequence.matchLength; ++i)
-                                       op[i] = match[i];
-                               return sequenceLength;
-                       }
-               }
-       }
-       /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
-
-       /* match within prefix */
-       if (sequence.offset < 8) {
-               /* close range match, overlap */
-               static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
-               static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */
-               int const sub2 = dec64table[sequence.offset];
-               op[0] = match[0];
-               op[1] = match[1];
-               op[2] = match[2];
-               op[3] = match[3];
-               match += dec32table[sequence.offset];
-               ZSTD_copy4(op + 4, match);
-               match -= sub2;
-       } else {
-               ZSTD_copy8(op, match);
-       }
-       op += 8;
-       match += 8;
-
-       if (oMatchEnd > oend - (16 - MINMATCH)) {
-               if (op < oend_w) {
-                       ZSTD_wildcopy(op, match, oend_w - op);
-                       match += oend_w - op;
-                       op = oend_w;
-               }
-               while (op < oMatchEnd)
-                       *op++ = *match++;
-       } else {
-               ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8); /* works even if matchLength < 8 */
-       }
-       return sequenceLength;
-}
-
-static size_t ZSTD_decompressSequences(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize)
-{
-       const BYTE *ip = (const BYTE *)seqStart;
-       const BYTE *const iend = ip + seqSize;
-       BYTE *const ostart = (BYTE * const)dst;
-       BYTE *const oend = ostart + maxDstSize;
-       BYTE *op = ostart;
-       const BYTE *litPtr = dctx->litPtr;
-       const BYTE *const litEnd = litPtr + dctx->litSize;
-       const BYTE *const base = (const BYTE *)(dctx->base);
-       const BYTE *const vBase = (const BYTE *)(dctx->vBase);
-       const BYTE *const dictEnd = (const BYTE *)(dctx->dictEnd);
-       int nbSeq;
-
-       /* Build Decoding Tables */
-       {
-               size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
-               if (ZSTD_isError(seqHSize))
-                       return seqHSize;
-               ip += seqHSize;
-       }
-
-       /* Regen sequences */
-       if (nbSeq) {
-               seqState_t seqState;
-               dctx->fseEntropy = 1;
-               {
-                       U32 i;
-                       for (i = 0; i < ZSTD_REP_NUM; i++)
-                               seqState.prevOffset[i] = dctx->entropy.rep[i];
-               }
-               CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend - ip), corruption_detected);
-               FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
-               FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
-               FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
-
-               for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq;) {
-                       nbSeq--;
-                       {
-                               seq_t const sequence = ZSTD_decodeSequence(&seqState);
-                               size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
-                               if (ZSTD_isError(oneSeqSize))
-                                       return oneSeqSize;
-                               op += oneSeqSize;
-                       }
-               }
-
-               /* check if reached exact end */
-               if (nbSeq)
-                       return ERROR(corruption_detected);
-               /* save reps for next block */
-               {
-                       U32 i;
-                       for (i = 0; i < ZSTD_REP_NUM; i++)
-                               dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]);
-               }
-       }
-
-       /* last literal segment */
-       {
-               size_t const lastLLSize = litEnd - litPtr;
-               if (lastLLSize > (size_t)(oend - op))
-                       return ERROR(dstSize_tooSmall);
-               memcpy(op, litPtr, lastLLSize);
-               op += lastLLSize;
-       }
-
-       return op - ostart;
-}
-
-FORCE_INLINE seq_t ZSTD_decodeSequenceLong_generic(seqState_t *seqState, int const longOffsets)
-{
-       seq_t seq;
-
-       U32 const llCode = FSE_peekSymbol(&seqState->stateLL);
-       U32 const mlCode = FSE_peekSymbol(&seqState->stateML);
-       U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */
-
-       U32 const llBits = LL_bits[llCode];
-       U32 const mlBits = ML_bits[mlCode];
-       U32 const ofBits = ofCode;
-       U32 const totalBits = llBits + mlBits + ofBits;
-
-       static const U32 LL_base[MaxLL + 1] = {0,  1,  2,  3,  4,  5,  6,  7,  8,    9,     10,    11,    12,    13,     14,     15,     16,     18,
-                                              20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000};
-
-       static const U32 ML_base[MaxML + 1] = {3,  4,  5,  6,  7,  8,  9,  10,   11,    12,    13,    14,    15,     16,     17,     18,     19,     20,
-                                              21, 22, 23, 24, 25, 26, 27, 28,   29,    30,    31,    32,    33,     34,     35,     37,     39,     41,
-                                              43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003};
-
-       static const U32 OF_base[MaxOff + 1] = {0,       1,     1,      5,      0xD,      0x1D,      0x3D,      0x7D,      0xFD,     0x1FD,
-                                               0x3FD,   0x7FD,    0xFFD,    0x1FFD,   0x3FFD,   0x7FFD,    0xFFFD,    0x1FFFD,   0x3FFFD,  0x7FFFD,
-                                               0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD};
-
-       /* sequence */
-       {
-               size_t offset;
-               if (!ofCode)
-                       offset = 0;
-               else {
-                       if (longOffsets) {
-                               int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN);
-                               offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
-                               if (ZSTD_32bits() || extraBits)
-                                       BIT_reloadDStream(&seqState->DStream);
-                               if (extraBits)
-                                       offset += BIT_readBitsFast(&seqState->DStream, extraBits);
-                       } else {
-                               offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
-                               if (ZSTD_32bits())
-                                       BIT_reloadDStream(&seqState->DStream);
-                       }
-               }
-
-               if (ofCode <= 1) {
-                       offset += (llCode == 0);
-                       if (offset) {
-                               size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
-                               temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
-                               if (offset != 1)
-                                       seqState->prevOffset[2] = seqState->prevOffset[1];
-                               seqState->prevOffset[1] = seqState->prevOffset[0];
-                               seqState->prevOffset[0] = offset = temp;
-                       } else {
-                               offset = seqState->prevOffset[0];
-                       }
-               } else {
-                       seqState->prevOffset[2] = seqState->prevOffset[1];
-                       seqState->prevOffset[1] = seqState->prevOffset[0];
-                       seqState->prevOffset[0] = offset;
-               }
-               seq.offset = offset;
-       }
-
-       seq.matchLength = ML_base[mlCode] + ((mlCode > 31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <=  16 bits */
-       if (ZSTD_32bits() && (mlBits + llBits > 24))
-               BIT_reloadDStream(&seqState->DStream);
-
-       seq.litLength = LL_base[llCode] + ((llCode > 15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <=  16 bits */
-       if (ZSTD_32bits() || (totalBits > 64 - 7 - (LLFSELog + MLFSELog + OffFSELog)))
-               BIT_reloadDStream(&seqState->DStream);
-
-       {
-               size_t const pos = seqState->pos + seq.litLength;
-               seq.match = seqState->base + pos - seq.offset; /* single memory segment */
-               if (seq.offset > pos)
-                       seq.match += seqState->gotoDict; /* separate memory segment */
-               seqState->pos = pos + seq.matchLength;
-       }
-
-       /* ANS state update */
-       FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <=  9 bits */
-       FSE_updateState(&seqState->stateML, &seqState->DStream); /* <=  9 bits */
-       if (ZSTD_32bits())
-               BIT_reloadDStream(&seqState->DStream);             /* <= 18 bits */
-       FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <=  8 bits */
-
-       return seq;
-}
-
-static seq_t ZSTD_decodeSequenceLong(seqState_t *seqState, unsigned const windowSize)
-{
-       if (ZSTD_highbit32(windowSize) > STREAM_ACCUMULATOR_MIN) {
-               return ZSTD_decodeSequenceLong_generic(seqState, 1);
-       } else {
-               return ZSTD_decodeSequenceLong_generic(seqState, 0);
-       }
-}
-
-FORCE_INLINE
-size_t ZSTD_execSequenceLong(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base,
-                            const BYTE *const vBase, const BYTE *const dictEnd)
-{
-       BYTE *const oLitEnd = op + sequence.litLength;
-       size_t const sequenceLength = sequence.litLength + sequence.matchLength;
-       BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
-       BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH;
-       const BYTE *const iLitEnd = *litPtr + sequence.litLength;
-       const BYTE *match = sequence.match;
-
-       /* check */
-       if (oMatchEnd > oend)
-               return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
-       if (iLitEnd > litLimit)
-               return ERROR(corruption_detected); /* over-read beyond lit buffer */
-       if (oLitEnd > oend_w)
-               return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd);
-
-       /* copy Literals */
-       ZSTD_copy8(op, *litPtr);
-       if (sequence.litLength > 8)
-               ZSTD_wildcopy(op + 8, (*litPtr) + 8,
-                             sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */
-       op = oLitEnd;
-       *litPtr = iLitEnd; /* update for next sequence */
-
-       /* copy Match */
-       if (sequence.offset > (size_t)(oLitEnd - base)) {
-               /* offset beyond prefix */
-               if (sequence.offset > (size_t)(oLitEnd - vBase))
-                       return ERROR(corruption_detected);
-               if (match + sequence.matchLength <= dictEnd) {
-                       memmove(oLitEnd, match, sequence.matchLength);
-                       return sequenceLength;
-               }
-               /* span extDict & currPrefixSegment */
-               {
-                       size_t const length1 = dictEnd - match;
-                       memmove(oLitEnd, match, length1);
-                       op = oLitEnd + length1;
-                       sequence.matchLength -= length1;
-                       match = base;
-                       if (op > oend_w || sequence.matchLength < MINMATCH) {
-                               U32 i;
-                               for (i = 0; i < sequence.matchLength; ++i)
-                                       op[i] = match[i];
-                               return sequenceLength;
-                       }
-               }
-       }
-       /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */
-
-       /* match within prefix */
-       if (sequence.offset < 8) {
-               /* close range match, overlap */
-               static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4};   /* added */
-               static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */
-               int const sub2 = dec64table[sequence.offset];
-               op[0] = match[0];
-               op[1] = match[1];
-               op[2] = match[2];
-               op[3] = match[3];
-               match += dec32table[sequence.offset];
-               ZSTD_copy4(op + 4, match);
-               match -= sub2;
-       } else {
-               ZSTD_copy8(op, match);
-       }
-       op += 8;
-       match += 8;
-
-       if (oMatchEnd > oend - (16 - MINMATCH)) {
-               if (op < oend_w) {
-                       ZSTD_wildcopy(op, match, oend_w - op);
-                       match += oend_w - op;
-                       op = oend_w;
-               }
-               while (op < oMatchEnd)
-                       *op++ = *match++;
-       } else {
-               ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8); /* works even if matchLength < 8 */
-       }
-       return sequenceLength;
-}
-
-static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize)
-{
-       const BYTE *ip = (const BYTE *)seqStart;
-       const BYTE *const iend = ip + seqSize;
-       BYTE *const ostart = (BYTE * const)dst;
-       BYTE *const oend = ostart + maxDstSize;
-       BYTE *op = ostart;
-       const BYTE *litPtr = dctx->litPtr;
-       const BYTE *const litEnd = litPtr + dctx->litSize;
-       const BYTE *const base = (const BYTE *)(dctx->base);
-       const BYTE *const vBase = (const BYTE *)(dctx->vBase);
-       const BYTE *const dictEnd = (const BYTE *)(dctx->dictEnd);
-       unsigned const windowSize = dctx->fParams.windowSize;
-       int nbSeq;
-
-       /* Build Decoding Tables */
-       {
-               size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize);
-               if (ZSTD_isError(seqHSize))
-                       return seqHSize;
-               ip += seqHSize;
-       }
-
-       /* Regen sequences */
-       if (nbSeq) {
-#define STORED_SEQS 4
-#define STOSEQ_MASK (STORED_SEQS - 1)
-#define ADVANCED_SEQS 4
-               seq_t *sequences = (seq_t *)dctx->entropy.workspace;
-               int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
-               seqState_t seqState;
-               int seqNb;
-               ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.workspace) >= sizeof(seq_t) * STORED_SEQS);
-               dctx->fseEntropy = 1;
-               {
-                       U32 i;
-                       for (i = 0; i < ZSTD_REP_NUM; i++)
-                               seqState.prevOffset[i] = dctx->entropy.rep[i];
-               }
-               seqState.base = base;
-               seqState.pos = (size_t)(op - base);
-               seqState.gotoDict = (uPtrDiff)dictEnd - (uPtrDiff)base; /* cast to avoid undefined behaviour */
-               CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend - ip), corruption_detected);
-               FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
-               FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
-               FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
-
-               /* prepare in advance */
-               for (seqNb = 0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb < seqAdvance; seqNb++) {
-                       sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, windowSize);
-               }
-               if (seqNb < seqAdvance)
-                       return ERROR(corruption_detected);
-
-               /* decode and decompress */
-               for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb < nbSeq; seqNb++) {
-                       seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, windowSize);
-                       size_t const oneSeqSize =
-                           ZSTD_execSequenceLong(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd);
-                       if (ZSTD_isError(oneSeqSize))
-                               return oneSeqSize;
-                       ZSTD_PREFETCH(sequence.match);
-                       sequences[seqNb & STOSEQ_MASK] = sequence;
-                       op += oneSeqSize;
-               }
-               if (seqNb < nbSeq)
-                       return ERROR(corruption_detected);
-
-               /* finish queue */
-               seqNb -= seqAdvance;
-               for (; seqNb < nbSeq; seqNb++) {
-                       size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd);
-                       if (ZSTD_isError(oneSeqSize))
-                               return oneSeqSize;
-                       op += oneSeqSize;
-               }
-
-               /* save reps for next block */
-               {
-                       U32 i;
-                       for (i = 0; i < ZSTD_REP_NUM; i++)
-                               dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]);
-               }
-       }
-
-       /* last literal segment */
-       {
-               size_t const lastLLSize = litEnd - litPtr;
-               if (lastLLSize > (size_t)(oend - op))
-                       return ERROR(dstSize_tooSmall);
-               memcpy(op, litPtr, lastLLSize);
-               op += lastLLSize;
-       }
-
-       return op - ostart;
-}
-
-static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{ /* blockType == blockCompressed */
-       const BYTE *ip = (const BYTE *)src;
-
-       if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX)
-               return ERROR(srcSize_wrong);
-
-       /* Decode literals section */
-       {
-               size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
-               if (ZSTD_isError(litCSize))
-                       return litCSize;
-               ip += litCSize;
-               srcSize -= litCSize;
-       }
-       if (sizeof(size_t) > 4) /* do not enable prefetching on 32-bits x86, as it's performance detrimental */
-                               /* likely because of register pressure */
-                               /* if that's the correct cause, then 32-bits ARM should be affected differently */
-                               /* it would be good to test this on ARM real hardware, to see if prefetch version improves speed */
-               if (dctx->fParams.windowSize > (1 << 23))
-                       return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize);
-       return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize);
-}
-
-static void ZSTD_checkContinuity(ZSTD_DCtx *dctx, const void *dst)
-{
-       if (dst != dctx->previousDstEnd) { /* not contiguous */
-               dctx->dictEnd = dctx->previousDstEnd;
-               dctx->vBase = (const char *)dst - ((const char *)(dctx->previousDstEnd) - (const char *)(dctx->base));
-               dctx->base = dst;
-               dctx->previousDstEnd = dst;
-       }
-}
-
-size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-       size_t dSize;
-       ZSTD_checkContinuity(dctx, dst);
-       dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize);
-       dctx->previousDstEnd = (char *)dst + dSize;
-       return dSize;
-}
-
-/** ZSTD_insertBlock() :
-       insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
-size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, size_t blockSize)
-{
-       ZSTD_checkContinuity(dctx, blockStart);
-       dctx->previousDstEnd = (const char *)blockStart + blockSize;
-       return blockSize;
-}
-
-size_t ZSTD_generateNxBytes(void *dst, size_t dstCapacity, BYTE byte, size_t length)
-{
-       if (length > dstCapacity)
-               return ERROR(dstSize_tooSmall);
-       memset(dst, byte, length);
-       return length;
-}
-
-/** ZSTD_findFrameCompressedSize() :
- *  compatible with legacy mode
- *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
- *  `srcSize` must be at least as large as the frame contained
- *  @return : the compressed size of the frame starting at `src` */
-size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
-{
-       if (srcSize >= ZSTD_skippableHeaderSize && (ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
-               return ZSTD_skippableHeaderSize + ZSTD_readLE32((const BYTE *)src + 4);
-       } else {
-               const BYTE *ip = (const BYTE *)src;
-               const BYTE *const ipstart = ip;
-               size_t remainingSize = srcSize;
-               ZSTD_frameParams fParams;
-
-               size_t const headerSize = ZSTD_frameHeaderSize(ip, remainingSize);
-               if (ZSTD_isError(headerSize))
-                       return headerSize;
-
-               /* Frame Header */
-               {
-                       size_t const ret = ZSTD_getFrameParams(&fParams, ip, remainingSize);
-                       if (ZSTD_isError(ret))
-                               return ret;
-                       if (ret > 0)
-                               return ERROR(srcSize_wrong);
-               }
-
-               ip += headerSize;
-               remainingSize -= headerSize;
-
-               /* Loop on each block */
-               while (1) {
-                       blockProperties_t blockProperties;
-                       size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
-                       if (ZSTD_isError(cBlockSize))
-                               return cBlockSize;
-
-                       if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
-                               return ERROR(srcSize_wrong);
-
-                       ip += ZSTD_blockHeaderSize + cBlockSize;
-                       remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
-
-                       if (blockProperties.lastBlock)
-                               break;
-               }
-
-               if (fParams.checksumFlag) { /* Frame content checksum */
-                       if (remainingSize < 4)
-                               return ERROR(srcSize_wrong);
-                       ip += 4;
-                       remainingSize -= 4;
-               }
-
-               return ip - ipstart;
-       }
-}
-
-/*! ZSTD_decompressFrame() :
-*   @dctx must be properly initialized */
-static size_t ZSTD_decompressFrame(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void **srcPtr, size_t *srcSizePtr)
-{
-       const BYTE *ip = (const BYTE *)(*srcPtr);
-       BYTE *const ostart = (BYTE * const)dst;
-       BYTE *const oend = ostart + dstCapacity;
-       BYTE *op = ostart;
-       size_t remainingSize = *srcSizePtr;
-
-       /* check */
-       if (remainingSize < ZSTD_frameHeaderSize_min + ZSTD_blockHeaderSize)
-               return ERROR(srcSize_wrong);
-
-       /* Frame Header */
-       {
-               size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_frameHeaderSize_prefix);
-               if (ZSTD_isError(frameHeaderSize))
-                       return frameHeaderSize;
-               if (remainingSize < frameHeaderSize + ZSTD_blockHeaderSize)
-                       return ERROR(srcSize_wrong);
-               CHECK_F(ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize));
-               ip += frameHeaderSize;
-               remainingSize -= frameHeaderSize;
-       }
-
-       /* Loop on each block */
-       while (1) {
-               size_t decodedSize;
-               blockProperties_t blockProperties;
-               size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
-               if (ZSTD_isError(cBlockSize))
-                       return cBlockSize;
-
-               ip += ZSTD_blockHeaderSize;
-               remainingSize -= ZSTD_blockHeaderSize;
-               if (cBlockSize > remainingSize)
-                       return ERROR(srcSize_wrong);
-
-               switch (blockProperties.blockType) {
-               case bt_compressed: decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend - op, ip, cBlockSize); break;
-               case bt_raw: decodedSize = ZSTD_copyRawBlock(op, oend - op, ip, cBlockSize); break;
-               case bt_rle: decodedSize = ZSTD_generateNxBytes(op, oend - op, *ip, blockProperties.origSize); break;
-               case bt_reserved:
-               default: return ERROR(corruption_detected);
-               }
-
-               if (ZSTD_isError(decodedSize))
-                       return decodedSize;
-               if (dctx->fParams.checksumFlag)
-                       xxh64_update(&dctx->xxhState, op, decodedSize);
-               op += decodedSize;
-               ip += cBlockSize;
-               remainingSize -= cBlockSize;
-               if (blockProperties.lastBlock)
-                       break;
-       }
-
-       if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
-               U32 const checkCalc = (U32)xxh64_digest(&dctx->xxhState);
-               U32 checkRead;
-               if (remainingSize < 4)
-                       return ERROR(checksum_wrong);
-               checkRead = ZSTD_readLE32(ip);
-               if (checkRead != checkCalc)
-                       return ERROR(checksum_wrong);
-               ip += 4;
-               remainingSize -= 4;
-       }
-
-       /* Allow caller to get size read */
-       *srcPtr = ip;
-       *srcSizePtr = remainingSize;
-       return op - ostart;
-}
-
-static const void *ZSTD_DDictDictContent(const ZSTD_DDict *ddict);
-static size_t ZSTD_DDictDictSize(const ZSTD_DDict *ddict);
-
-static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize,
-                                       const ZSTD_DDict *ddict)
-{
-       void *const dststart = dst;
-
-       if (ddict) {
-               if (dict) {
-                       /* programmer error, these two cases should be mutually exclusive */
-                       return ERROR(GENERIC);
-               }
-
-               dict = ZSTD_DDictDictContent(ddict);
-               dictSize = ZSTD_DDictDictSize(ddict);
-       }
-
-       while (srcSize >= ZSTD_frameHeaderSize_prefix) {
-               U32 magicNumber;
-
-               magicNumber = ZSTD_readLE32(src);
-               if (magicNumber != ZSTD_MAGICNUMBER) {
-                       if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
-                               size_t skippableSize;
-                               if (srcSize < ZSTD_skippableHeaderSize)
-                                       return ERROR(srcSize_wrong);
-                               skippableSize = ZSTD_readLE32((const BYTE *)src + 4) + ZSTD_skippableHeaderSize;
-                               if (srcSize < skippableSize) {
-                                       return ERROR(srcSize_wrong);
-                               }
-
-                               src = (const BYTE *)src + skippableSize;
-                               srcSize -= skippableSize;
-                               continue;
-                       } else {
-                               return ERROR(prefix_unknown);
-                       }
-               }
-
-               if (ddict) {
-                       /* we were called from ZSTD_decompress_usingDDict */
-                       ZSTD_refDDict(dctx, ddict);
-               } else {
-                       /* this will initialize correctly with no dict if dict == NULL, so
-                        * use this in all cases but ddict */
-                       CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize));
-               }
-               ZSTD_checkContinuity(dctx, dst);
-
-               {
-                       const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize);
-                       if (ZSTD_isError(res))
-                               return res;
-                       /* don't need to bounds check this, ZSTD_decompressFrame will have
-                        * already */
-                       dst = (BYTE *)dst + res;
-                       dstCapacity -= res;
-               }
-       }
-
-       if (srcSize)
-               return ERROR(srcSize_wrong); /* input not entirely consumed */
-
-       return (BYTE *)dst - (BYTE *)dststart;
-}
-
-size_t ZSTD_decompress_usingDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize)
-{
-       return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
-}
-
-size_t ZSTD_decompressDCtx(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-       return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0);
-}
-
-/*-**************************************
-*   Advanced Streaming Decompression API
-*   Bufferless and synchronous
-****************************************/
-size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx) { return dctx->expected; }
-
-ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx)
-{
-       switch (dctx->stage) {
-       default: /* should not happen */
-       case ZSTDds_getFrameHeaderSize:
-       case ZSTDds_decodeFrameHeader: return ZSTDnit_frameHeader;
-       case ZSTDds_decodeBlockHeader: return ZSTDnit_blockHeader;
-       case ZSTDds_decompressBlock: return ZSTDnit_block;
-       case ZSTDds_decompressLastBlock: return ZSTDnit_lastBlock;
-       case ZSTDds_checkChecksum: return ZSTDnit_checksum;
-       case ZSTDds_decodeSkippableHeader:
-       case ZSTDds_skipFrame: return ZSTDnit_skippableFrame;
-       }
-}
-
-int ZSTD_isSkipFrame(ZSTD_DCtx *dctx) { return dctx->stage == ZSTDds_skipFrame; } /* for zbuff */
-
-/** ZSTD_decompressContinue() :
-*   @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
-*             or an error code, which can be tested using ZSTD_isError() */
-size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-       /* Sanity check */
-       if (srcSize != dctx->expected)
-               return ERROR(srcSize_wrong);
-       if (dstCapacity)
-               ZSTD_checkContinuity(dctx, dst);
-
-       switch (dctx->stage) {
-       case ZSTDds_getFrameHeaderSize:
-               if (srcSize != ZSTD_frameHeaderSize_prefix)
-                       return ERROR(srcSize_wrong);                                    /* impossible */
-               if ((ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
-                       memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix);
-                       dctx->expected = ZSTD_skippableHeaderSize - ZSTD_frameHeaderSize_prefix; /* magic number + skippable frame length */
-                       dctx->stage = ZSTDds_decodeSkippableHeader;
-                       return 0;
-               }
-               dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_prefix);
-               if (ZSTD_isError(dctx->headerSize))
-                       return dctx->headerSize;
-               memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix);
-               if (dctx->headerSize > ZSTD_frameHeaderSize_prefix) {
-                       dctx->expected = dctx->headerSize - ZSTD_frameHeaderSize_prefix;
-                       dctx->stage = ZSTDds_decodeFrameHeader;
-                       return 0;
-               }
-               dctx->expected = 0; /* not necessary to copy more */
-               fallthrough;
-
-       case ZSTDds_decodeFrameHeader:
-               memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
-               CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize));
-               dctx->expected = ZSTD_blockHeaderSize;
-               dctx->stage = ZSTDds_decodeBlockHeader;
-               return 0;
-
-       case ZSTDds_decodeBlockHeader: {
-               blockProperties_t bp;
-               size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
-               if (ZSTD_isError(cBlockSize))
-                       return cBlockSize;
-               dctx->expected = cBlockSize;
-               dctx->bType = bp.blockType;
-               dctx->rleSize = bp.origSize;
-               if (cBlockSize) {
-                       dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
-                       return 0;
-               }
-               /* empty block */
-               if (bp.lastBlock) {
-                       if (dctx->fParams.checksumFlag) {
-                               dctx->expected = 4;
-                               dctx->stage = ZSTDds_checkChecksum;
-                       } else {
-                               dctx->expected = 0; /* end of frame */
-                               dctx->stage = ZSTDds_getFrameHeaderSize;
-                       }
-               } else {
-                       dctx->expected = 3; /* go directly to next header */
-                       dctx->stage = ZSTDds_decodeBlockHeader;
-               }
-               return 0;
-       }
-       case ZSTDds_decompressLastBlock:
-       case ZSTDds_decompressBlock: {
-               size_t rSize;
-               switch (dctx->bType) {
-               case bt_compressed: rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); break;
-               case bt_raw: rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); break;
-               case bt_rle: rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize); break;
-               case bt_reserved: /* should never happen */
-               default: return ERROR(corruption_detected);
-               }
-               if (ZSTD_isError(rSize))
-                       return rSize;
-               if (dctx->fParams.checksumFlag)
-                       xxh64_update(&dctx->xxhState, dst, rSize);
-
-               if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
-                       if (dctx->fParams.checksumFlag) {       /* another round for frame checksum */
-                               dctx->expected = 4;
-                               dctx->stage = ZSTDds_checkChecksum;
-                       } else {
-                               dctx->expected = 0; /* ends here */
-                               dctx->stage = ZSTDds_getFrameHeaderSize;
-                       }
-               } else {
-                       dctx->stage = ZSTDds_decodeBlockHeader;
-                       dctx->expected = ZSTD_blockHeaderSize;
-                       dctx->previousDstEnd = (char *)dst + rSize;
-               }
-               return rSize;
-       }
-       case ZSTDds_checkChecksum: {
-               U32 const h32 = (U32)xxh64_digest(&dctx->xxhState);
-               U32 const check32 = ZSTD_readLE32(src); /* srcSize == 4, guaranteed by dctx->expected */
-               if (check32 != h32)
-                       return ERROR(checksum_wrong);
-               dctx->expected = 0;
-               dctx->stage = ZSTDds_getFrameHeaderSize;
-               return 0;
-       }
-       case ZSTDds_decodeSkippableHeader: {
-               memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected);
-               dctx->expected = ZSTD_readLE32(dctx->headerBuffer + 4);
-               dctx->stage = ZSTDds_skipFrame;
-               return 0;
-       }
-       case ZSTDds_skipFrame: {
-               dctx->expected = 0;
-               dctx->stage = ZSTDds_getFrameHeaderSize;
-               return 0;
-       }
-       default:
-               return ERROR(GENERIC); /* impossible */
-       }
-}
-
-static size_t ZSTD_refDictContent(ZSTD_DCtx *dctx, const void *dict, size_t dictSize)
-{
-       dctx->dictEnd = dctx->previousDstEnd;
-       dctx->vBase = (const char *)dict - ((const char *)(dctx->previousDstEnd) - (const char *)(dctx->base));
-       dctx->base = dict;
-       dctx->previousDstEnd = (const char *)dict + dictSize;
-       return 0;
-}
-
-/* ZSTD_loadEntropy() :
- * dict : must point at beginning of a valid zstd dictionary
- * @return : size of entropy tables read */
-static size_t ZSTD_loadEntropy(ZSTD_entropyTables_t *entropy, const void *const dict, size_t const dictSize)
-{
-       const BYTE *dictPtr = (const BYTE *)dict;
-       const BYTE *const dictEnd = dictPtr + dictSize;
-
-       if (dictSize <= 8)
-               return ERROR(dictionary_corrupted);
-       dictPtr += 8; /* skip header = magic + dictID */
-
-       {
-               size_t const hSize = HUF_readDTableX4_wksp(entropy->hufTable, dictPtr, dictEnd - dictPtr, entropy->workspace, sizeof(entropy->workspace));
-               if (HUF_isError(hSize))
-                       return ERROR(dictionary_corrupted);
-               dictPtr += hSize;
-       }
-
-       {
-               short offcodeNCount[MaxOff + 1];
-               U32 offcodeMaxValue = MaxOff, offcodeLog;
-               size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd - dictPtr);
-               if (FSE_isError(offcodeHeaderSize))
-                       return ERROR(dictionary_corrupted);
-               if (offcodeLog > OffFSELog)
-                       return ERROR(dictionary_corrupted);
-               CHECK_E(FSE_buildDTable_wksp(entropy->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted);
-               dictPtr += offcodeHeaderSize;
-       }
-
-       {
-               short matchlengthNCount[MaxML + 1];
-               unsigned matchlengthMaxValue = MaxML, matchlengthLog;
-               size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd - dictPtr);
-               if (FSE_isError(matchlengthHeaderSize))
-                       return ERROR(dictionary_corrupted);
-               if (matchlengthLog > MLFSELog)
-                       return ERROR(dictionary_corrupted);
-               CHECK_E(FSE_buildDTable_wksp(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted);
-               dictPtr += matchlengthHeaderSize;
-       }
-
-       {
-               short litlengthNCount[MaxLL + 1];
-               unsigned litlengthMaxValue = MaxLL, litlengthLog;
-               size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd - dictPtr);
-               if (FSE_isError(litlengthHeaderSize))
-                       return ERROR(dictionary_corrupted);
-               if (litlengthLog > LLFSELog)
-                       return ERROR(dictionary_corrupted);
-               CHECK_E(FSE_buildDTable_wksp(entropy->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted);
-               dictPtr += litlengthHeaderSize;
-       }
-
-       if (dictPtr + 12 > dictEnd)
-               return ERROR(dictionary_corrupted);
-       {
-               int i;
-               size_t const dictContentSize = (size_t)(dictEnd - (dictPtr + 12));
-               for (i = 0; i < 3; i++) {
-                       U32 const rep = ZSTD_readLE32(dictPtr);
-                       dictPtr += 4;
-                       if (rep == 0 || rep >= dictContentSize)
-                               return ERROR(dictionary_corrupted);
-                       entropy->rep[i] = rep;
-               }
-       }
-
-       return dictPtr - (const BYTE *)dict;
-}
-
-static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx *dctx, const void *dict, size_t dictSize)
-{
-       if (dictSize < 8)
-               return ZSTD_refDictContent(dctx, dict, dictSize);
-       {
-               U32 const magic = ZSTD_readLE32(dict);
-               if (magic != ZSTD_DICT_MAGIC) {
-                       return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */
-               }
-       }
-       dctx->dictID = ZSTD_readLE32((const char *)dict + 4);
-
-       /* load entropy tables */
-       {
-               size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize);
-               if (ZSTD_isError(eSize))
-                       return ERROR(dictionary_corrupted);
-               dict = (const char *)dict + eSize;
-               dictSize -= eSize;
-       }
-       dctx->litEntropy = dctx->fseEntropy = 1;
-
-       /* reference dictionary content */
-       return ZSTD_refDictContent(dctx, dict, dictSize);
-}
-
-size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, size_t dictSize)
-{
-       CHECK_F(ZSTD_decompressBegin(dctx));
-       if (dict && dictSize)
-               CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted);
-       return 0;
-}
-
-/* ======   ZSTD_DDict   ====== */
-
-struct ZSTD_DDict_s {
-       void *dictBuffer;
-       const void *dictContent;
-       size_t dictSize;
-       ZSTD_entropyTables_t entropy;
-       U32 dictID;
-       U32 entropyPresent;
-       ZSTD_customMem cMem;
-}; /* typedef'd to ZSTD_DDict within "zstd.h" */
-
-size_t ZSTD_DDictWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DDict)); }
-
-static const void *ZSTD_DDictDictContent(const ZSTD_DDict *ddict) { return ddict->dictContent; }
-
-static size_t ZSTD_DDictDictSize(const ZSTD_DDict *ddict) { return ddict->dictSize; }
-
-static void ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict)
-{
-       ZSTD_decompressBegin(dstDCtx); /* init */
-       if (ddict) {                   /* support refDDict on NULL */
-               dstDCtx->dictID = ddict->dictID;
-               dstDCtx->base = ddict->dictContent;
-               dstDCtx->vBase = ddict->dictContent;
-               dstDCtx->dictEnd = (const BYTE *)ddict->dictContent + ddict->dictSize;
-               dstDCtx->previousDstEnd = dstDCtx->dictEnd;
-               if (ddict->entropyPresent) {
-                       dstDCtx->litEntropy = 1;
-                       dstDCtx->fseEntropy = 1;
-                       dstDCtx->LLTptr = ddict->entropy.LLTable;
-                       dstDCtx->MLTptr = ddict->entropy.MLTable;
-                       dstDCtx->OFTptr = ddict->entropy.OFTable;
-                       dstDCtx->HUFptr = ddict->entropy.hufTable;
-                       dstDCtx->entropy.rep[0] = ddict->entropy.rep[0];
-                       dstDCtx->entropy.rep[1] = ddict->entropy.rep[1];
-                       dstDCtx->entropy.rep[2] = ddict->entropy.rep[2];
-               } else {
-                       dstDCtx->litEntropy = 0;
-                       dstDCtx->fseEntropy = 0;
-               }
-       }
-}
-
-static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict *ddict)
-{
-       ddict->dictID = 0;
-       ddict->entropyPresent = 0;
-       if (ddict->dictSize < 8)
-               return 0;
-       {
-               U32 const magic = ZSTD_readLE32(ddict->dictContent);
-               if (magic != ZSTD_DICT_MAGIC)
-                       return 0; /* pure content mode */
-       }
-       ddict->dictID = ZSTD_readLE32((const char *)ddict->dictContent + 4);
-
-       /* load entropy tables */
-       CHECK_E(ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted);
-       ddict->entropyPresent = 1;
-       return 0;
-}
-
-static ZSTD_DDict *ZSTD_createDDict_advanced(const void *dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem)
-{
-       if (!customMem.customAlloc || !customMem.customFree)
-               return NULL;
-
-       {
-               ZSTD_DDict *const ddict = (ZSTD_DDict *)ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
-               if (!ddict)
-                       return NULL;
-               ddict->cMem = customMem;
-
-               if ((byReference) || (!dict) || (!dictSize)) {
-                       ddict->dictBuffer = NULL;
-                       ddict->dictContent = dict;
-               } else {
-                       void *const internalBuffer = ZSTD_malloc(dictSize, customMem);
-                       if (!internalBuffer) {
-                               ZSTD_freeDDict(ddict);
-                               return NULL;
-                       }
-                       memcpy(internalBuffer, dict, dictSize);
-                       ddict->dictBuffer = internalBuffer;
-                       ddict->dictContent = internalBuffer;
-               }
-               ddict->dictSize = dictSize;
-               ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
-               /* parse dictionary content */
-               {
-                       size_t const errorCode = ZSTD_loadEntropy_inDDict(ddict);
-                       if (ZSTD_isError(errorCode)) {
-                               ZSTD_freeDDict(ddict);
-                               return NULL;
-                       }
-               }
-
-               return ddict;
-       }
-}
-
-/*! ZSTD_initDDict() :
-*   Create a digested dictionary, to start decompression without startup delay.
-*   `dict` content is copied inside DDict.
-*   Consequently, `dict` can be released after `ZSTD_DDict` creation */
-ZSTD_DDict *ZSTD_initDDict(const void *dict, size_t dictSize, void *workspace, size_t workspaceSize)
-{
-       ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-       return ZSTD_createDDict_advanced(dict, dictSize, 1, stackMem);
-}
-
-size_t ZSTD_freeDDict(ZSTD_DDict *ddict)
-{
-       if (ddict == NULL)
-               return 0; /* support free on NULL */
-       {
-               ZSTD_customMem const cMem = ddict->cMem;
-               ZSTD_free(ddict->dictBuffer, cMem);
-               ZSTD_free(ddict, cMem);
-               return 0;
-       }
-}
-
-/*! ZSTD_getDictID_fromDict() :
- *  Provides the dictID stored within dictionary.
- *  if @return == 0, the dictionary is not conformant with Zstandard specification.
- *  It can still be loaded, but as a content-only dictionary. */
-unsigned ZSTD_getDictID_fromDict(const void *dict, size_t dictSize)
-{
-       if (dictSize < 8)
-               return 0;
-       if (ZSTD_readLE32(dict) != ZSTD_DICT_MAGIC)
-               return 0;
-       return ZSTD_readLE32((const char *)dict + 4);
-}
-
-/*! ZSTD_getDictID_fromDDict() :
- *  Provides the dictID of the dictionary loaded into `ddict`.
- *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
- *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
-unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict)
-{
-       if (ddict == NULL)
-               return 0;
-       return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
-}
-
-/*! ZSTD_getDictID_fromFrame() :
- *  Provides the dictID required to decompressed the frame stored within `src`.
- *  If @return == 0, the dictID could not be decoded.
- *  This could for one of the following reasons :
- *  - The frame does not require a dictionary to be decoded (most common case).
- *  - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information.
- *    Note : this use case also happens when using a non-conformant dictionary.
- *  - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`).
- *  - This is not a Zstandard frame.
- *  When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */
-unsigned ZSTD_getDictID_fromFrame(const void *src, size_t srcSize)
-{
-       ZSTD_frameParams zfp = {0, 0, 0, 0};
-       size_t const hError = ZSTD_getFrameParams(&zfp, src, srcSize);
-       if (ZSTD_isError(hError))
-               return 0;
-       return zfp.dictID;
-}
-
-/*! ZSTD_decompress_usingDDict() :
-*   Decompression using a pre-digested Dictionary
-*   Use dictionary without significant overhead. */
-size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const ZSTD_DDict *ddict)
-{
-       /* pass content and size in case legacy frames are encountered */
-       return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, NULL, 0, ddict);
-}
-
-/*=====================================
-*   Streaming decompression
-*====================================*/
-
-typedef enum { zdss_init, zdss_loadHeader, zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
-
-/* *** Resource management *** */
-struct ZSTD_DStream_s {
-       ZSTD_DCtx *dctx;
-       ZSTD_DDict *ddictLocal;
-       const ZSTD_DDict *ddict;
-       ZSTD_frameParams fParams;
-       ZSTD_dStreamStage stage;
-       char *inBuff;
-       size_t inBuffSize;
-       size_t inPos;
-       size_t maxWindowSize;
-       char *outBuff;
-       size_t outBuffSize;
-       size_t outStart;
-       size_t outEnd;
-       size_t blockSize;
-       BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; /* tmp buffer to store frame header */
-       size_t lhSize;
-       ZSTD_customMem customMem;
-       void *legacyContext;
-       U32 previousLegacyVersion;
-       U32 legacyVersion;
-       U32 hostageByte;
-}; /* typedef'd to ZSTD_DStream within "zstd.h" */
-
-size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize)
-{
-       size_t const blockSize = MIN(maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
-       size_t const inBuffSize = blockSize;
-       size_t const outBuffSize = maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
-       return ZSTD_DCtxWorkspaceBound() + ZSTD_ALIGN(sizeof(ZSTD_DStream)) + ZSTD_ALIGN(inBuffSize) + ZSTD_ALIGN(outBuffSize);
-}
-
-static ZSTD_DStream *ZSTD_createDStream_advanced(ZSTD_customMem customMem)
-{
-       ZSTD_DStream *zds;
-
-       if (!customMem.customAlloc || !customMem.customFree)
-               return NULL;
-
-       zds = (ZSTD_DStream *)ZSTD_malloc(sizeof(ZSTD_DStream), customMem);
-       if (zds == NULL)
-               return NULL;
-       memset(zds, 0, sizeof(ZSTD_DStream));
-       memcpy(&zds->customMem, &customMem, sizeof(ZSTD_customMem));
-       zds->dctx = ZSTD_createDCtx_advanced(customMem);
-       if (zds->dctx == NULL) {
-               ZSTD_freeDStream(zds);
-               return NULL;
-       }
-       zds->stage = zdss_init;
-       zds->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
-       return zds;
-}
-
-ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace, size_t workspaceSize)
-{
-       ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize);
-       ZSTD_DStream *zds = ZSTD_createDStream_advanced(stackMem);
-       if (!zds) {
-               return NULL;
-       }
-
-       zds->maxWindowSize = maxWindowSize;
-       zds->stage = zdss_loadHeader;
-       zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
-       ZSTD_freeDDict(zds->ddictLocal);
-       zds->ddictLocal = NULL;
-       zds->ddict = zds->ddictLocal;
-       zds->legacyVersion = 0;
-       zds->hostageByte = 0;
-
-       {
-               size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
-               size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
-
-               zds->inBuff = (char *)ZSTD_malloc(blockSize, zds->customMem);
-               zds->inBuffSize = blockSize;
-               zds->outBuff = (char *)ZSTD_malloc(neededOutSize, zds->customMem);
-               zds->outBuffSize = neededOutSize;
-               if (zds->inBuff == NULL || zds->outBuff == NULL) {
-                       ZSTD_freeDStream(zds);
-                       return NULL;
-               }
-       }
-       return zds;
-}
-
-ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize, const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize)
-{
-       ZSTD_DStream *zds = ZSTD_initDStream(maxWindowSize, workspace, workspaceSize);
-       if (zds) {
-               zds->ddict = ddict;
-       }
-       return zds;
-}
-
-size_t ZSTD_freeDStream(ZSTD_DStream *zds)
-{
-       if (zds == NULL)
-               return 0; /* support free on null */
-       {
-               ZSTD_customMem const cMem = zds->customMem;
-               ZSTD_freeDCtx(zds->dctx);
-               zds->dctx = NULL;
-               ZSTD_freeDDict(zds->ddictLocal);
-               zds->ddictLocal = NULL;
-               ZSTD_free(zds->inBuff, cMem);
-               zds->inBuff = NULL;
-               ZSTD_free(zds->outBuff, cMem);
-               zds->outBuff = NULL;
-               ZSTD_free(zds, cMem);
-               return 0;
-       }
-}
-
-/* *** Initialization *** */
-
-size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize; }
-size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
-
-size_t ZSTD_resetDStream(ZSTD_DStream *zds)
-{
-       zds->stage = zdss_loadHeader;
-       zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
-       zds->legacyVersion = 0;
-       zds->hostageByte = 0;
-       return ZSTD_frameHeaderSize_prefix;
-}
-
-/* *****   Decompression   ***** */
-
-ZSTD_STATIC size_t ZSTD_limitCopy(void *dst, size_t dstCapacity, const void *src, size_t srcSize)
-{
-       size_t const length = MIN(dstCapacity, srcSize);
-       memcpy(dst, src, length);
-       return length;
-}
-
-size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inBuffer *input)
-{
-       const char *const istart = (const char *)(input->src) + input->pos;
-       const char *const iend = (const char *)(input->src) + input->size;
-       const char *ip = istart;
-       char *const ostart = (char *)(output->dst) + output->pos;
-       char *const oend = (char *)(output->dst) + output->size;
-       char *op = ostart;
-       U32 someMoreWork = 1;
-
-       while (someMoreWork) {
-               switch (zds->stage) {
-               case zdss_init:
-                       ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */
-                       fallthrough;
-
-               case zdss_loadHeader: {
-                       size_t const hSize = ZSTD_getFrameParams(&zds->fParams, zds->headerBuffer, zds->lhSize);
-                       if (ZSTD_isError(hSize))
-                               return hSize;
-                       if (hSize != 0) {                                  /* need more input */
-                               size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */
-                               if (toLoad > (size_t)(iend - ip)) {     /* not enough input to load full header */
-                                       memcpy(zds->headerBuffer + zds->lhSize, ip, iend - ip);
-                                       zds->lhSize += iend - ip;
-                                       input->pos = input->size;
-                                       return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) +
-                                              ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
-                               }
-                               memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad);
-                               zds->lhSize = hSize;
-                               ip += toLoad;
-                               break;
-                       }
-
-                       /* check for single-pass mode opportunity */
-                       if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */
-                           && (U64)(size_t)(oend - op) >= zds->fParams.frameContentSize) {
-                               size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend - istart);
-                               if (cSize <= (size_t)(iend - istart)) {
-                                       size_t const decompressedSize = ZSTD_decompress_usingDDict(zds->dctx, op, oend - op, istart, cSize, zds->ddict);
-                                       if (ZSTD_isError(decompressedSize))
-                                               return decompressedSize;
-                                       ip = istart + cSize;
-                                       op += decompressedSize;
-                                       zds->dctx->expected = 0;
-                                       zds->stage = zdss_init;
-                                       someMoreWork = 0;
-                                       break;
-                               }
-                       }
-
-                       /* Consume header */
-                       ZSTD_refDDict(zds->dctx, zds->ddict);
-                       {
-                               size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); /* == ZSTD_frameHeaderSize_prefix */
-                               CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size));
-                               {
-                                       size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx);
-                                       CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer + h1Size, h2Size));
-                               }
-                       }
-
-                       zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
-                       if (zds->fParams.windowSize > zds->maxWindowSize)
-                               return ERROR(frameParameter_windowTooLarge);
-
-                       /* Buffers are preallocated, but double check */
-                       {
-                               size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
-                               size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2;
-                               if (zds->inBuffSize < blockSize) {
-                                       return ERROR(GENERIC);
-                               }
-                               if (zds->outBuffSize < neededOutSize) {
-                                       return ERROR(GENERIC);
-                               }
-                               zds->blockSize = blockSize;
-                       }
-                       zds->stage = zdss_read;
-               }
-                       fallthrough;
-
-               case zdss_read: {
-                       size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx);
-                       if (neededInSize == 0) { /* end of frame */
-                               zds->stage = zdss_init;
-                               someMoreWork = 0;
-                               break;
-                       }
-                       if ((size_t)(iend - ip) >= neededInSize) { /* decode directly from src */
-                               const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx);
-                               size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, zds->outBuff + zds->outStart,
-                                                                                  (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart), ip, neededInSize);
-                               if (ZSTD_isError(decodedSize))
-                                       return decodedSize;
-                               ip += neededInSize;
-                               if (!decodedSize && !isSkipFrame)
-                                       break; /* this was just a header */
-                               zds->outEnd = zds->outStart + decodedSize;
-                               zds->stage = zdss_flush;
-                               break;
-                       }
-                       if (ip == iend) {
-                               someMoreWork = 0;
-                               break;
-                       } /* no more input */
-                       zds->stage = zdss_load;
-                       /* pass-through */
-               }
-                       fallthrough;
-
-               case zdss_load: {
-                       size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx);
-                       size_t const toLoad = neededInSize - zds->inPos; /* should always be <= remaining space within inBuff */
-                       size_t loadedSize;
-                       if (toLoad > zds->inBuffSize - zds->inPos)
-                               return ERROR(corruption_detected); /* should never happen */
-                       loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend - ip);
-                       ip += loadedSize;
-                       zds->inPos += loadedSize;
-                       if (loadedSize < toLoad) {
-                               someMoreWork = 0;
-                               break;
-                       } /* not enough input, wait for more */
-
-                       /* decode loaded input */
-                       {
-                               const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx);
-                               size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart,
-                                                                                  zds->inBuff, neededInSize);
-                               if (ZSTD_isError(decodedSize))
-                                       return decodedSize;
-                               zds->inPos = 0; /* input is consumed */
-                               if (!decodedSize && !isSkipFrame) {
-                                       zds->stage = zdss_read;
-                                       break;
-                               } /* this was just a header */
-                               zds->outEnd = zds->outStart + decodedSize;
-                               zds->stage = zdss_flush;
-                               /* pass-through */
-                       }
-               }
-                       fallthrough;
-
-               case zdss_flush: {
-                       size_t const toFlushSize = zds->outEnd - zds->outStart;
-                       size_t const flushedSize = ZSTD_limitCopy(op, oend - op, zds->outBuff + zds->outStart, toFlushSize);
-                       op += flushedSize;
-                       zds->outStart += flushedSize;
-                       if (flushedSize == toFlushSize) { /* flush completed */
-                               zds->stage = zdss_read;
-                               if (zds->outStart + zds->blockSize > zds->outBuffSize)
-                                       zds->outStart = zds->outEnd = 0;
-                               break;
-                       }
-                       /* cannot complete flush */
-                       someMoreWork = 0;
-                       break;
-               }
-               default:
-                       return ERROR(GENERIC); /* impossible */
-               }
-       }
-
-       /* result */
-       input->pos += (size_t)(ip - istart);
-       output->pos += (size_t)(op - ostart);
-       {
-               size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds->dctx);
-               if (!nextSrcSizeHint) {                     /* frame fully decoded */
-                       if (zds->outEnd == zds->outStart) { /* output fully flushed */
-                               if (zds->hostageByte) {
-                                       if (input->pos >= input->size) {
-                                               zds->stage = zdss_read;
-                                               return 1;
-                                       }            /* can't release hostage (not present) */
-                                       input->pos++; /* release hostage */
-                               }
-                               return 0;
-                       }
-                       if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
-                               input->pos--;    /* note : pos > 0, otherwise, impossible to finish reading last block */
-                               zds->hostageByte = 1;
-                       }
-                       return 1;
-               }
-               nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds->dctx) == ZSTDnit_block); /* preload header of next block */
-               if (zds->inPos > nextSrcSizeHint)
-                       return ERROR(GENERIC); /* should never happen */
-               nextSrcSizeHint -= zds->inPos; /* already loaded*/
-               return nextSrcSizeHint;
-       }
-}
-
-EXPORT_SYMBOL(ZSTD_DCtxWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initDCtx);
-EXPORT_SYMBOL(ZSTD_decompressDCtx);
-EXPORT_SYMBOL(ZSTD_decompress_usingDict);
-
-EXPORT_SYMBOL(ZSTD_DDictWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initDDict);
-EXPORT_SYMBOL(ZSTD_decompress_usingDDict);
-
-EXPORT_SYMBOL(ZSTD_DStreamWorkspaceBound);
-EXPORT_SYMBOL(ZSTD_initDStream);
-EXPORT_SYMBOL(ZSTD_initDStream_usingDDict);
-EXPORT_SYMBOL(ZSTD_resetDStream);
-EXPORT_SYMBOL(ZSTD_decompressStream);
-EXPORT_SYMBOL(ZSTD_DStreamInSize);
-EXPORT_SYMBOL(ZSTD_DStreamOutSize);
-
-EXPORT_SYMBOL(ZSTD_findFrameCompressedSize);
-EXPORT_SYMBOL(ZSTD_getFrameContentSize);
-EXPORT_SYMBOL(ZSTD_findDecompressedSize);
-
-EXPORT_SYMBOL(ZSTD_isFrame);
-EXPORT_SYMBOL(ZSTD_getDictID_fromDict);
-EXPORT_SYMBOL(ZSTD_getDictID_fromDDict);
-EXPORT_SYMBOL(ZSTD_getDictID_fromFrame);
-
-EXPORT_SYMBOL(ZSTD_getFrameParams);
-EXPORT_SYMBOL(ZSTD_decompressBegin);
-EXPORT_SYMBOL(ZSTD_decompressBegin_usingDict);
-EXPORT_SYMBOL(ZSTD_copyDCtx);
-EXPORT_SYMBOL(ZSTD_nextSrcSizeToDecompress);
-EXPORT_SYMBOL(ZSTD_decompressContinue);
-EXPORT_SYMBOL(ZSTD_nextInputType);
-
-EXPORT_SYMBOL(ZSTD_decompressBlock);
-EXPORT_SYMBOL(ZSTD_insertBlock);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("Zstd Decompressor");
diff --git a/lib/zstd/decompress/huf_decompress.c b/lib/zstd/decompress/huf_decompress.c
new file mode 100644 (file)
index 0000000..5105e59
--- /dev/null
@@ -0,0 +1,1206 @@
+/* ******************************************************************
+ * huff0 huffman decoder,
+ * part of Finite State Entropy library
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ *
+ *  You can contact the author at :
+ *  - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+****************************************************************** */
+
+/* **************************************************************
+*  Dependencies
+****************************************************************/
+#include "../common/zstd_deps.h"  /* ZSTD_memcpy, ZSTD_memset */
+#include "../common/compiler.h"
+#include "../common/bitstream.h"  /* BIT_* */
+#include "../common/fse.h"        /* to compress headers */
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "../common/error_private.h"
+
+/* **************************************************************
+*  Macros
+****************************************************************/
+
+/* These two optional macros force the use one way or another of the two
+ * Huffman decompression implementations. You can't force in both directions
+ * at the same time.
+ */
+#if defined(HUF_FORCE_DECOMPRESS_X1) && \
+    defined(HUF_FORCE_DECOMPRESS_X2)
+#error "Cannot force the use of the X1 and X2 decoders at the same time!"
+#endif
+
+
+/* **************************************************************
+*  Error Management
+****************************************************************/
+#define HUF_isError ERR_isError
+
+
+/* **************************************************************
+*  Byte alignment for workSpace management
+****************************************************************/
+#define HUF_ALIGN(x, a)         HUF_ALIGN_MASK((x), (a) - 1)
+#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask))
+
+
+/* **************************************************************
+*  BMI2 Variant Wrappers
+****************************************************************/
+#if DYNAMIC_BMI2
+
+#define HUF_DGEN(fn)                                                        \
+                                                                            \
+    static size_t fn##_default(                                             \
+                  void* dst,  size_t dstSize,                               \
+            const void* cSrc, size_t cSrcSize,                              \
+            const HUF_DTable* DTable)                                       \
+    {                                                                       \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }                                                                       \
+                                                                            \
+    static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2(                       \
+                  void* dst,  size_t dstSize,                               \
+            const void* cSrc, size_t cSrcSize,                              \
+            const HUF_DTable* DTable)                                       \
+    {                                                                       \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }                                                                       \
+                                                                            \
+    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
+                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
+    {                                                                       \
+        if (bmi2) {                                                         \
+            return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);         \
+        }                                                                   \
+        return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable);          \
+    }
+
+#else
+
+#define HUF_DGEN(fn)                                                        \
+    static size_t fn(void* dst, size_t dstSize, void const* cSrc,           \
+                     size_t cSrcSize, HUF_DTable const* DTable, int bmi2)   \
+    {                                                                       \
+        (void)bmi2;                                                         \
+        return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
+    }
+
+#endif
+
+
+/*-***************************/
+/*  generic DTableDesc       */
+/*-***************************/
+typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
+
+static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
+{
+    DTableDesc dtd;
+    ZSTD_memcpy(&dtd, table, sizeof(dtd));
+    return dtd;
+}
+
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+
+/*-***************************/
+/*  single-symbol decoding   */
+/*-***************************/
+typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */
+
+/*
+ * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
+ * a time.
+ */
+static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
+    U64 D4;
+    if (MEM_isLittleEndian()) {
+        D4 = symbol + (nbBits << 8);
+    } else {
+        D4 = (symbol << 8) + nbBits;
+    }
+    D4 *= 0x0001000100010001ULL;
+    return D4;
+}
+
+typedef struct {
+        U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
+        U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
+        U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+        BYTE symbols[HUF_SYMBOLVALUE_MAX + 1];
+        BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
+} HUF_ReadDTableX1_Workspace;
+
+
+size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
+{
+    return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+    U32 tableLog = 0;
+    U32 nbSymbols = 0;
+    size_t iSize;
+    void* const dtPtr = DTable + 1;
+    HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
+    HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace;
+
+    DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp));
+    if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge);
+
+    DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
+    /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */   /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* Table header */
+    {   DTableDesc dtd = HUF_getDTableDesc(DTable);
+        if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
+        dtd.tableType = 0;
+        dtd.tableLog = (BYTE)tableLog;
+        ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
+    }
+
+    /* Compute symbols and rankStart given rankVal:
+     *
+     * rankVal already contains the number of values of each weight.
+     *
+     * symbols contains the symbols ordered by weight. First are the rankVal[0]
+     * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on.
+     * symbols[0] is filled (but unused) to avoid a branch.
+     *
+     * rankStart contains the offset where each rank belongs in the DTable.
+     * rankStart[0] is not filled because there are no entries in the table for
+     * weight 0.
+     */
+    {
+        int n;
+        int nextRankStart = 0;
+        int const unroll = 4;
+        int const nLimit = (int)nbSymbols - unroll + 1;
+        for (n=0; n<(int)tableLog+1; n++) {
+            U32 const curr = nextRankStart;
+            nextRankStart += wksp->rankVal[n];
+            wksp->rankStart[n] = curr;
+        }
+        for (n=0; n < nLimit; n += unroll) {
+            int u;
+            for (u=0; u < unroll; ++u) {
+                size_t const w = wksp->huffWeight[n+u];
+                wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u);
+            }
+        }
+        for (; n < (int)nbSymbols; ++n) {
+            size_t const w = wksp->huffWeight[n];
+            wksp->symbols[wksp->rankStart[w]++] = (BYTE)n;
+        }
+    }
+
+    /* fill DTable
+     * We fill all entries of each weight in order.
+     * That way length is a constant for each iteration of the outter loop.
+     * We can switch based on the length to a different inner loop which is
+     * optimized for that particular case.
+     */
+    {
+        U32 w;
+        int symbol=wksp->rankVal[0];
+        int rankStart=0;
+        for (w=1; w<tableLog+1; ++w) {
+            int const symbolCount = wksp->rankVal[w];
+            int const length = (1 << w) >> 1;
+            int uStart = rankStart;
+            BYTE const nbBits = (BYTE)(tableLog + 1 - w);
+            int s;
+            int u;
+            switch (length) {
+            case 1:
+                for (s=0; s<symbolCount; ++s) {
+                    HUF_DEltX1 D;
+                    D.byte = wksp->symbols[symbol + s];
+                    D.nbBits = nbBits;
+                    dt[uStart] = D;
+                    uStart += 1;
+                }
+                break;
+            case 2:
+                for (s=0; s<symbolCount; ++s) {
+                    HUF_DEltX1 D;
+                    D.byte = wksp->symbols[symbol + s];
+                    D.nbBits = nbBits;
+                    dt[uStart+0] = D;
+                    dt[uStart+1] = D;
+                    uStart += 2;
+                }
+                break;
+            case 4:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    MEM_write64(dt + uStart, D4);
+                    uStart += 4;
+                }
+                break;
+            case 8:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    MEM_write64(dt + uStart, D4);
+                    MEM_write64(dt + uStart + 4, D4);
+                    uStart += 8;
+                }
+                break;
+            default:
+                for (s=0; s<symbolCount; ++s) {
+                    U64 const D4 = HUF_DEltX1_set4(wksp->symbols[symbol + s], nbBits);
+                    for (u=0; u < length; u += 16) {
+                        MEM_write64(dt + uStart + u + 0, D4);
+                        MEM_write64(dt + uStart + u + 4, D4);
+                        MEM_write64(dt + uStart + u + 8, D4);
+                        MEM_write64(dt + uStart + u + 12, D4);
+                    }
+                    assert(u == length);
+                    uStart += length;
+                }
+                break;
+            }
+            symbol += symbolCount;
+            rankStart += symbolCount * length;
+        }
+    }
+    return iSize;
+}
+
+FORCE_INLINE_TEMPLATE BYTE
+HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
+    BYTE const c = dt[val].byte;
+    BIT_skipBits(Dstream, dt[val].nbBits);
+    return c;
+}
+
+#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
+    *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
+    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+
+#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+
+HINT_INLINE size_t
+HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 4 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
+        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+    }
+
+    /* [0-3] symbols remaining */
+    if (MEM_32bits())
+        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
+            HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+
+    /* no more data to retrieve from bitstream, no need to reload */
+    while (p < pEnd)
+        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+
+    return pEnd-pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X1_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    BYTE* op = (BYTE*)dst;
+    BYTE* const oend = op + dstSize;
+    const void* dtPtr = DTable + 1;
+    const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
+    BIT_DStream_t bitD;
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+    U32 const dtLog = dtd.tableLog;
+
+    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
+
+    HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
+
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    return dstSize;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X1_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    /* Check */
+    if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        BYTE* const olimit = oend - 3;
+        const void* const dtPtr = DTable + 1;
+        const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        const size_t segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+        U32 endSignal = 1;
+
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+
+        /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
+        for ( ; (endSignal) & (op4 < olimit) ; ) {
+            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
+            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+        }
+
+        /* check corruption */
+        /* note : should not be necessary : op# advance in lock step, and we control op4.
+         *        but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 supposed already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX1(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+          if (!endCheck) return ERROR(corruption_detected); }
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+
+typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
+                                               const void *cSrc,
+                                               size_t cSrcSize,
+                                               const HUF_DTable *DTable);
+
+HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
+HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
+
+
+
+size_t HUF_decompress1X1_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress4X1_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 0) return ERROR(GENERIC);
+    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
+}
+
+
+#endif /* HUF_FORCE_DECOMPRESS_X2 */
+
+
+#ifndef HUF_FORCE_DECOMPRESS_X1
+
+/* *************************/
+/* double-symbols decoding */
+/* *************************/
+
+typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
+typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
+typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
+
+
+/* HUF_fillDTableX2Level2() :
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
+static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
+                           const U32* rankValOrigin, const int minWeight,
+                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
+                           U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize)
+{
+    HUF_DEltX2 DElt;
+    U32* rankVal = wksp;
+
+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
+    (void)wkspSize;
+    /* get pre-calculated rankVal */
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
+
+    /* fill skipped values */
+    if (minWeight>1) {
+        U32 i, skipSize = rankVal[minWeight];
+        MEM_writeLE16(&(DElt.sequence), baseSeq);
+        DElt.nbBits   = (BYTE)(consumed);
+        DElt.length   = 1;
+        for (i = 0; i < skipSize; i++)
+            DTable[i] = DElt;
+    }
+
+    /* fill DTable */
+    {   U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
+            const U32 symbol = sortedSymbols[s].symbol;
+            const U32 weight = sortedSymbols[s].weight;
+            const U32 nbBits = nbBitsBaseline - weight;
+            const U32 length = 1 << (sizeLog-nbBits);
+            const U32 start = rankVal[weight];
+            U32 i = start;
+            const U32 end = start + length;
+
+            MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
+            DElt.nbBits = (BYTE)(nbBits + consumed);
+            DElt.length = 2;
+            do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
+
+            rankVal[weight] += length;
+    }   }
+}
+
+
+static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
+                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32 nbBitsBaseline, U32* wksp, size_t wkspSize)
+{
+    U32* rankVal = wksp;
+    const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
+    const U32 minBits  = nbBitsBaseline - maxWeight;
+    U32 s;
+
+    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
+    wksp += HUF_TABLELOG_MAX + 1;
+    wkspSize -= HUF_TABLELOG_MAX + 1;
+
+    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
+
+    /* fill DTable */
+    for (s=0; s<sortedListSize; s++) {
+        const U16 symbol = sortedList[s].symbol;
+        const U32 weight = sortedList[s].weight;
+        const U32 nbBits = nbBitsBaseline - weight;
+        const U32 start = rankVal[weight];
+        const U32 length = 1 << (targetLog-nbBits);
+
+        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
+            U32 sortedRank;
+            int minWeight = nbBits + scaleLog;
+            if (minWeight < 1) minWeight = 1;
+            sortedRank = rankStart[minWeight];
+            HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
+                           rankValOrigin[nbBits], minWeight,
+                           sortedList+sortedRank, sortedListSize-sortedRank,
+                           nbBitsBaseline, symbol, wksp, wkspSize);
+        } else {
+            HUF_DEltX2 DElt;
+            MEM_writeLE16(&(DElt.sequence), symbol);
+            DElt.nbBits = (BYTE)(nbBits);
+            DElt.length = 1;
+            {   U32 const end = start + length;
+                U32 u;
+                for (u = start; u < end; u++) DTable[u] = DElt;
+        }   }
+        rankVal[weight] += length;
+    }
+}
+
+typedef struct {
+    rankValCol_t rankVal[HUF_TABLELOG_MAX];
+    U32 rankStats[HUF_TABLELOG_MAX + 1];
+    U32 rankStart0[HUF_TABLELOG_MAX + 2];
+    sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
+    BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
+    U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
+} HUF_ReadDTableX2_Workspace;
+
+size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
+                       const void* src, size_t srcSize,
+                             void* workSpace, size_t wkspSize)
+{
+    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    U32 const maxTableLog = dtd.maxTableLog;
+    size_t iSize;
+    void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
+    HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
+    U32 *rankStart;
+
+    HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace;
+
+    if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC);
+
+    rankStart = wksp->rankStart0 + 1;
+    ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats));
+    ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0));
+
+    DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable));   /* if compiler fails here, assertion is wrong */
+    if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
+    /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
+
+    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0);
+    if (HUF_isError(iSize)) return iSize;
+
+    /* check result */
+    if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+
+    /* find maxWeight */
+    for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
+
+    /* Get start index of each weight */
+    {   U32 w, nextRankStart = 0;
+        for (w=1; w<maxW+1; w++) {
+            U32 curr = nextRankStart;
+            nextRankStart += wksp->rankStats[w];
+            rankStart[w] = curr;
+        }
+        rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
+        sizeOfSort = nextRankStart;
+    }
+
+    /* sort symbols by weight */
+    {   U32 s;
+        for (s=0; s<nbSymbols; s++) {
+            U32 const w = wksp->weightList[s];
+            U32 const r = rankStart[w]++;
+            wksp->sortedSymbol[r].symbol = (BYTE)s;
+            wksp->sortedSymbol[r].weight = (BYTE)w;
+        }
+        rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
+    }
+
+    /* Build rankVal */
+    {   U32* const rankVal0 = wksp->rankVal[0];
+        {   int const rescale = (maxTableLog-tableLog) - 1;   /* tableLog <= maxTableLog */
+            U32 nextRankVal = 0;
+            U32 w;
+            for (w=1; w<maxW+1; w++) {
+                U32 curr = nextRankVal;
+                nextRankVal += wksp->rankStats[w] << (w+rescale);
+                rankVal0[w] = curr;
+        }   }
+        {   U32 const minBits = tableLog+1 - maxW;
+            U32 consumed;
+            for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
+                U32* const rankValPtr = wksp->rankVal[consumed];
+                U32 w;
+                for (w = 1; w < maxW+1; w++) {
+                    rankValPtr[w] = rankVal0[w] >> consumed;
+    }   }   }   }
+
+    HUF_fillDTableX2(dt, maxTableLog,
+                   wksp->sortedSymbol, sizeOfSort,
+                   wksp->rankStart0, wksp->rankVal, maxW,
+                   tableLog+1,
+                   wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32));
+
+    dtd.tableLog = (BYTE)maxTableLog;
+    dtd.tableType = 1;
+    ZSTD_memcpy(DTable, &dtd, sizeof(dtd));
+    return iSize;
+}
+
+
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    ZSTD_memcpy(op, dt+val, 2);
+    BIT_skipBits(DStream, dt[val].nbBits);
+    return dt[val].length;
+}
+
+FORCE_INLINE_TEMPLATE U32
+HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
+{
+    size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
+    ZSTD_memcpy(op, dt+val, 1);
+    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
+    else {
+        if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
+            BIT_skipBits(DStream, dt[val].nbBits);
+            if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
+                /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
+                DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
+    }   }
+    return 1;
+}
+
+#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
+    ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
+    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
+    if (MEM_64bits()) \
+        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+
+HINT_INLINE size_t
+HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
+                const HUF_DEltX2* const dt, const U32 dtLog)
+{
+    BYTE* const pStart = p;
+
+    /* up to 8 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    }
+
+    /* closer to end : up to 2 symbols at a time */
+    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+
+    while (p <= pEnd-2)
+        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+
+    if (p < pEnd)
+        p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
+
+    return p-pStart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress1X2_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    BIT_DStream_t bitD;
+
+    /* Init */
+    CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
+
+    /* decode */
+    {   BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
+        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
+    }
+
+    /* check */
+    if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
+
+    /* decoded size */
+    return dstSize;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+HUF_decompress4X2_usingDTable_internal_body(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+
+    {   const BYTE* const istart = (const BYTE*) cSrc;
+        BYTE* const ostart = (BYTE*) dst;
+        BYTE* const oend = ostart + dstSize;
+        BYTE* const olimit = oend - (sizeof(size_t)-1);
+        const void* const dtPtr = DTable+1;
+        const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
+
+        /* Init */
+        BIT_DStream_t bitD1;
+        BIT_DStream_t bitD2;
+        BIT_DStream_t bitD3;
+        BIT_DStream_t bitD4;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
+        const BYTE* const istart1 = istart + 6;  /* jumpTable */
+        const BYTE* const istart2 = istart1 + length1;
+        const BYTE* const istart3 = istart2 + length2;
+        const BYTE* const istart4 = istart3 + length3;
+        size_t const segmentSize = (dstSize+3) / 4;
+        BYTE* const opStart2 = ostart + segmentSize;
+        BYTE* const opStart3 = opStart2 + segmentSize;
+        BYTE* const opStart4 = opStart3 + segmentSize;
+        BYTE* op1 = ostart;
+        BYTE* op2 = opStart2;
+        BYTE* op3 = opStart3;
+        BYTE* op4 = opStart4;
+        U32 endSignal = 1;
+        DTableDesc const dtd = HUF_getDTableDesc(DTable);
+        U32 const dtLog = dtd.tableLog;
+
+        if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
+        CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
+        CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
+        CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
+
+        /* 16-32 symbols per loop (4-8 symbols per stream) */
+        for ( ; (endSignal) & (op4 < olimit); ) {
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+#else
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+            endSignal = (U32)LIKELY((U32)
+                        (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
+                      & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
+#endif
+        }
+
+        /* check corruption */
+        if (op1 > opStart2) return ERROR(corruption_detected);
+        if (op2 > opStart3) return ERROR(corruption_detected);
+        if (op3 > opStart4) return ERROR(corruption_detected);
+        /* note : op4 already verified within main loop */
+
+        /* finish bitStreams one by one */
+        HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
+        HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
+        HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
+        HUF_decodeStreamX2(op4, &bitD4, oend,     dt, dtLog);
+
+        /* check */
+        { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
+          if (!endCheck) return ERROR(corruption_detected); }
+
+        /* decoded size */
+        return dstSize;
+    }
+}
+
+HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
+HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
+
+size_t HUF_decompress1X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
+                                               workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
+}
+
+
+size_t HUF_decompress4X2_usingDTable(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    DTableDesc dtd = HUF_getDTableDesc(DTable);
+    if (dtd.tableType != 1) return ERROR(GENERIC);
+    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+}
+
+static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
+                                         workSpace, wkspSize);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+
+size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                   const void* cSrc, size_t cSrcSize,
+                                   void* workSpace, size_t wkspSize)
+{
+    return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+
+#endif /* HUF_FORCE_DECOMPRESS_X1 */
+
+
+/* ***********************************/
+/* Universal decompression selectors */
+/* ***********************************/
+
+size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUF_DTable* DTable)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#else
+    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#endif
+}
+
+size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
+                                    const void* cSrc, size_t cSrcSize,
+                                    const HUF_DTable* DTable)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#else
+    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
+                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
+#endif
+}
+
+
+#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
+typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
+static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+{
+    /* single, double, quad */
+    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
+    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
+    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
+    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
+    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
+    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
+    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
+    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
+    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
+    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
+    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
+    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
+    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
+    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
+    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+};
+#endif
+
+/* HUF_selectDecoder() :
+ *  Tells which decoder is likely to decode faster,
+ *  based on a set of pre-computed metrics.
+ * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
+ *  Assumption : 0 < dstSize <= 128 KB */
+U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
+{
+    assert(dstSize > 0);
+    assert(dstSize <= 128*1024);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dstSize;
+    (void)cSrcSize;
+    return 0;
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dstSize;
+    (void)cSrcSize;
+    return 1;
+#else
+    /* decoder timing evaluation */
+    {   U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize);   /* Q < 16 */
+        U32 const D256 = (U32)(dstSize >> 8);
+        U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
+        U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
+        DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, to reduce cache eviction */
+        return DTime1 < DTime0;
+    }
+#endif
+}
+
+
+size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst,
+                                     size_t dstSize, const void* cSrc,
+                                     size_t cSrcSize, void* workSpace,
+                                     size_t wkspSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize == 0) return ERROR(corruption_detected);
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#else
+        return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                            cSrcSize, workSpace, wkspSize):
+                        HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
+#endif
+    }
+}
+
+size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
+                                  const void* cSrc, size_t cSrcSize,
+                                  void* workSpace, size_t wkspSize)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize > dstSize) return ERROR(corruption_detected);   /* invalid */
+    if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; }   /* not compressed */
+    if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; }   /* RLE */
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#else
+        return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize):
+                        HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
+                                cSrcSize, workSpace, wkspSize);
+#endif
+    }
+}
+
+
+size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#else
+    return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+                           HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#endif
+}
+
+#ifndef HUF_FORCE_DECOMPRESS_X2
+size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+    const BYTE* ip = (const BYTE*) cSrc;
+
+    size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+    if (HUF_isError(hSize)) return hSize;
+    if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
+    ip += hSize; cSrcSize -= hSize;
+
+    return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
+}
+#endif
+
+size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
+{
+    DTableDesc const dtd = HUF_getDTableDesc(DTable);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+    (void)dtd;
+    assert(dtd.tableType == 0);
+    return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+    (void)dtd;
+    assert(dtd.tableType == 1);
+    return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#else
+    return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
+                           HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
+#endif
+}
+
+size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
+{
+    /* validation checks */
+    if (dstSize == 0) return ERROR(dstSize_tooSmall);
+    if (cSrcSize == 0) return ERROR(corruption_detected);
+
+    {   U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
+#if defined(HUF_FORCE_DECOMPRESS_X1)
+        (void)algoNb;
+        assert(algoNb == 0);
+        return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#elif defined(HUF_FORCE_DECOMPRESS_X2)
+        (void)algoNb;
+        assert(algoNb == 1);
+        return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#else
+        return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
+                        HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
+#endif
+    }
+}
+
diff --git a/lib/zstd/decompress/zstd_ddict.c b/lib/zstd/decompress/zstd_ddict.c
new file mode 100644 (file)
index 0000000..dbbc791
--- /dev/null
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* zstd_ddict.c :
+ * concentrates all logic that needs to know the internals of ZSTD_DDict object */
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/cpu.h"         /* bmi2 */
+#include "../common/mem.h"         /* low level memory routines */
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "zstd_decompress_internal.h"
+#include "zstd_ddict.h"
+
+
+
+
+/*-*******************************************************
+*  Types
+*********************************************************/
+struct ZSTD_DDict_s {
+    void* dictBuffer;
+    const void* dictContent;
+    size_t dictSize;
+    ZSTD_entropyDTables_t entropy;
+    U32 dictID;
+    U32 entropyPresent;
+    ZSTD_customMem cMem;
+};  /* typedef'd to ZSTD_DDict within "zstd.h" */
+
+const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
+{
+    assert(ddict != NULL);
+    return ddict->dictContent;
+}
+
+size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
+{
+    assert(ddict != NULL);
+    return ddict->dictSize;
+}
+
+void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+    DEBUGLOG(4, "ZSTD_copyDDictParameters");
+    assert(dctx != NULL);
+    assert(ddict != NULL);
+    dctx->dictID = ddict->dictID;
+    dctx->prefixStart = ddict->dictContent;
+    dctx->virtualStart = ddict->dictContent;
+    dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
+    dctx->previousDstEnd = dctx->dictEnd;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+    dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
+    if (ddict->entropyPresent) {
+        dctx->litEntropy = 1;
+        dctx->fseEntropy = 1;
+        dctx->LLTptr = ddict->entropy.LLTable;
+        dctx->MLTptr = ddict->entropy.MLTable;
+        dctx->OFTptr = ddict->entropy.OFTable;
+        dctx->HUFptr = ddict->entropy.hufTable;
+        dctx->entropy.rep[0] = ddict->entropy.rep[0];
+        dctx->entropy.rep[1] = ddict->entropy.rep[1];
+        dctx->entropy.rep[2] = ddict->entropy.rep[2];
+    } else {
+        dctx->litEntropy = 0;
+        dctx->fseEntropy = 0;
+    }
+}
+
+
+static size_t
+ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
+                           ZSTD_dictContentType_e dictContentType)
+{
+    ddict->dictID = 0;
+    ddict->entropyPresent = 0;
+    if (dictContentType == ZSTD_dct_rawContent) return 0;
+
+    if (ddict->dictSize < 8) {
+        if (dictContentType == ZSTD_dct_fullDict)
+            return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
+        return 0;   /* pure content mode */
+    }
+    {   U32 const magic = MEM_readLE32(ddict->dictContent);
+        if (magic != ZSTD_MAGIC_DICTIONARY) {
+            if (dictContentType == ZSTD_dct_fullDict)
+                return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
+            return 0;   /* pure content mode */
+        }
+    }
+    ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
+
+    /* load entropy tables */
+    RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
+            &ddict->entropy, ddict->dictContent, ddict->dictSize)),
+        dictionary_corrupted, "");
+    ddict->entropyPresent = 1;
+    return 0;
+}
+
+
+static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
+                                      const void* dict, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType)
+{
+    if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
+        ddict->dictBuffer = NULL;
+        ddict->dictContent = dict;
+        if (!dict) dictSize = 0;
+    } else {
+        void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
+        ddict->dictBuffer = internalBuffer;
+        ddict->dictContent = internalBuffer;
+        if (!internalBuffer) return ERROR(memory_allocation);
+        ZSTD_memcpy(internalBuffer, dict, dictSize);
+    }
+    ddict->dictSize = dictSize;
+    ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
+
+    /* parse dictionary content */
+    FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
+
+    return 0;
+}
+
+ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
+                                      ZSTD_dictLoadMethod_e dictLoadMethod,
+                                      ZSTD_dictContentType_e dictContentType,
+                                      ZSTD_customMem customMem)
+{
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+
+    {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
+        if (ddict == NULL) return NULL;
+        ddict->cMem = customMem;
+        {   size_t const initResult = ZSTD_initDDict_internal(ddict,
+                                            dict, dictSize,
+                                            dictLoadMethod, dictContentType);
+            if (ZSTD_isError(initResult)) {
+                ZSTD_freeDDict(ddict);
+                return NULL;
+        }   }
+        return ddict;
+    }
+}
+
+/*! ZSTD_createDDict() :
+*   Create a digested dictionary, to start decompression without startup delay.
+*   `dict` content is copied inside DDict.
+*   Consequently, `dict` can be released after `ZSTD_DDict` creation */
+ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
+{
+    ZSTD_customMem const allocator = { NULL, NULL, NULL };
+    return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
+}
+
+/*! ZSTD_createDDict_byReference() :
+ *  Create a digested dictionary, to start decompression without startup delay.
+ *  Dictionary content is simply referenced, it will be accessed during decompression.
+ *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
+ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
+{
+    ZSTD_customMem const allocator = { NULL, NULL, NULL };
+    return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
+}
+
+
+const ZSTD_DDict* ZSTD_initStaticDDict(
+                                void* sBuffer, size_t sBufferSize,
+                                const void* dict, size_t dictSize,
+                                ZSTD_dictLoadMethod_e dictLoadMethod,
+                                ZSTD_dictContentType_e dictContentType)
+{
+    size_t const neededSpace = sizeof(ZSTD_DDict)
+                             + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
+    ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
+    assert(sBuffer != NULL);
+    assert(dict != NULL);
+    if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
+    if (sBufferSize < neededSpace) return NULL;
+    if (dictLoadMethod == ZSTD_dlm_byCopy) {
+        ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
+        dict = ddict+1;
+    }
+    if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
+                                              dict, dictSize,
+                                              ZSTD_dlm_byRef, dictContentType) ))
+        return NULL;
+    return ddict;
+}
+
+
+size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;   /* support free on NULL */
+    {   ZSTD_customMem const cMem = ddict->cMem;
+        ZSTD_customFree(ddict->dictBuffer, cMem);
+        ZSTD_customFree(ddict, cMem);
+        return 0;
+    }
+}
+
+/*! ZSTD_estimateDDictSize() :
+ *  Estimate amount of memory that will be needed to create a dictionary for decompression.
+ *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
+size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
+{
+    return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
+}
+
+size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;   /* support sizeof on NULL */
+    return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
+}
+
+/*! ZSTD_getDictID_fromDDict() :
+ *  Provides the dictID of the dictionary loaded into `ddict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
+{
+    if (ddict==NULL) return 0;
+    return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
+}
diff --git a/lib/zstd/decompress/zstd_ddict.h b/lib/zstd/decompress/zstd_ddict.h
new file mode 100644 (file)
index 0000000..8c1a79d
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+#ifndef ZSTD_DDICT_H
+#define ZSTD_DDICT_H
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+#include "../common/zstd_deps.h"   /* size_t */
+#include <linux/zstd.h>     /* ZSTD_DDict, and several public functions */
+
+
+/*-*******************************************************
+ *  Interface
+ *********************************************************/
+
+/* note: several prototypes are already published in `zstd.h` :
+ * ZSTD_createDDict()
+ * ZSTD_createDDict_byReference()
+ * ZSTD_createDDict_advanced()
+ * ZSTD_freeDDict()
+ * ZSTD_initStaticDDict()
+ * ZSTD_sizeof_DDict()
+ * ZSTD_estimateDDictSize()
+ * ZSTD_getDictID_fromDict()
+ */
+
+const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict);
+size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict);
+
+void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+
+
+
+#endif /* ZSTD_DDICT_H */
diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c
new file mode 100644 (file)
index 0000000..b4d81d8
--- /dev/null
@@ -0,0 +1,2085 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* ***************************************************************
+*  Tuning parameters
+*****************************************************************/
+/*!
+ * HEAPMODE :
+ * Select how default decompression function ZSTD_decompress() allocates its context,
+ * on stack (0), or into heap (1, default; requires malloc()).
+ * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected.
+ */
+#ifndef ZSTD_HEAPMODE
+#  define ZSTD_HEAPMODE 1
+#endif
+
+/*!
+*  LEGACY_SUPPORT :
+*  if set to 1+, ZSTD_decompress() can decode older formats (v0.1+)
+*/
+
+/*!
+ *  MAXWINDOWSIZE_DEFAULT :
+ *  maximum window size accepted by DStream __by default__.
+ *  Frames requiring more memory will be rejected.
+ *  It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize().
+ */
+#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT
+#  define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1)
+#endif
+
+/*!
+ *  NO_FORWARD_PROGRESS_MAX :
+ *  maximum allowed nb of calls to ZSTD_decompressStream()
+ *  without any forward progress
+ *  (defined as: no byte read from input, and no byte flushed to output)
+ *  before triggering an error.
+ */
+#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX
+#  define ZSTD_NO_FORWARD_PROGRESS_MAX 16
+#endif
+
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/cpu.h"         /* bmi2 */
+#include "../common/mem.h"         /* low level memory routines */
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include <linux/xxhash.h> /* xxh64_reset, xxh64_update, xxh64_digest, XXH64 */
+#include "../common/zstd_internal.h"  /* blockProperties_t */
+#include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
+#include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
+#include "zstd_decompress_block.h"   /* ZSTD_decompressBlock_internal */
+
+
+
+
+/* ***********************************
+ * Multiple DDicts Hashset internals *
+ *************************************/
+
+#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4
+#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3   /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float.
+                                                     * Currently, that means a 0.75 load factor.
+                                                     * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded
+                                                     * the load factor of the ddict hash set.
+                                                     */
+
+#define DDICT_HASHSET_TABLE_BASE_SIZE 64
+#define DDICT_HASHSET_RESIZE_FACTOR 2
+
+/* Hash function to determine starting position of dict insertion within the table
+ * Returns an index between [0, hashSet->ddictPtrTableSize]
+ */
+static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) {
+    const U64 hash = xxh64(&dictID, sizeof(U32), 0);
+    /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */
+    return hash & (hashSet->ddictPtrTableSize - 1);
+}
+
+/* Adds DDict to a hashset without resizing it.
+ * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set.
+ * Returns 0 if successful, or a zstd error code if something went wrong.
+ */
+static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) {
+    const U32 dictID = ZSTD_getDictID_fromDDict(ddict);
+    size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
+    RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!");
+    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
+    while (hashSet->ddictPtrTable[idx] != NULL) {
+        /* Replace existing ddict if inserting ddict with same dictID */
+        if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) {
+            DEBUGLOG(4, "DictID already exists, replacing rather than adding");
+            hashSet->ddictPtrTable[idx] = ddict;
+            return 0;
+        }
+        idx &= idxRangeMask;
+        idx++;
+    }
+    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
+    hashSet->ddictPtrTable[idx] = ddict;
+    hashSet->ddictPtrCount++;
+    return 0;
+}
+
+/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and
+ * rehashes all values, allocates new table, frees old table.
+ * Returns 0 on success, otherwise a zstd error code.
+ */
+static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
+    size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR;
+    const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem);
+    const ZSTD_DDict** oldTable = hashSet->ddictPtrTable;
+    size_t oldTableSize = hashSet->ddictPtrTableSize;
+    size_t i;
+
+    DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize);
+    RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!");
+    hashSet->ddictPtrTable = newTable;
+    hashSet->ddictPtrTableSize = newTableSize;
+    hashSet->ddictPtrCount = 0;
+    for (i = 0; i < oldTableSize; ++i) {
+        if (oldTable[i] != NULL) {
+            FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), "");
+        }
+    }
+    ZSTD_customFree((void*)oldTable, customMem);
+    DEBUGLOG(4, "Finished re-hash");
+    return 0;
+}
+
+/* Fetches a DDict with the given dictID
+ * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL.
+ */
+static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) {
+    size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+    const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1;
+    DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
+    for (;;) {
+        size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]);
+        if (currDictID == dictID || currDictID == 0) {
+            /* currDictID == 0 implies a NULL ddict entry */
+            break;
+        } else {
+            idx &= idxRangeMask;    /* Goes to start of table when we reach the end */
+            idx++;
+        }
+    }
+    DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx);
+    return hashSet->ddictPtrTable[idx];
+}
+
+/* Allocates space for and returns a ddict hash set
+ * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with.
+ * Returns NULL if allocation failed.
+ */
+static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
+    ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem);
+    DEBUGLOG(4, "Allocating new hash set");
+    if (!ret)
+        return NULL;
+    ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem);
+    if (!ret->ddictPtrTable) {
+        ZSTD_customFree(ret, customMem);
+        return NULL;
+    }
+    ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
+    ret->ddictPtrCount = 0;
+    return ret;
+}
+
+/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself.
+ * Note: The ZSTD_DDict* within the table are NOT freed.
+ */
+static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
+    DEBUGLOG(4, "Freeing ddict hash set");
+    if (hashSet && hashSet->ddictPtrTable) {
+        ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem);
+    }
+    if (hashSet) {
+        ZSTD_customFree(hashSet, customMem);
+    }
+}
+
+/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set.
+ * Returns 0 on success, or a ZSTD error.
+ */
+static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) {
+    DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize);
+    if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) {
+        FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), "");
+    }
+    FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), "");
+    return 0;
+}
+
+/*-*************************************************************
+*   Context management
+***************************************************************/
+size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx)
+{
+    if (dctx==NULL) return 0;   /* support sizeof NULL */
+    return sizeof(*dctx)
+           + ZSTD_sizeof_DDict(dctx->ddictLocal)
+           + dctx->inBuffSize + dctx->outBuffSize;
+}
+
+size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); }
+
+
+static size_t ZSTD_startingInputLength(ZSTD_format_e format)
+{
+    size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format);
+    /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
+    assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) );
+    return startingInputLength;
+}
+
+static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
+{
+    assert(dctx->streamStage == zdss_init);
+    dctx->format = ZSTD_f_zstd1;
+    dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
+    dctx->outBufferMode = ZSTD_bm_buffered;
+    dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
+    dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
+}
+
+static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
+{
+    dctx->staticSize  = 0;
+    dctx->ddict       = NULL;
+    dctx->ddictLocal  = NULL;
+    dctx->dictEnd     = NULL;
+    dctx->ddictIsCold = 0;
+    dctx->dictUses = ZSTD_dont_use;
+    dctx->inBuff      = NULL;
+    dctx->inBuffSize  = 0;
+    dctx->outBuffSize = 0;
+    dctx->streamStage = zdss_init;
+    dctx->legacyContext = NULL;
+    dctx->previousLegacyVersion = 0;
+    dctx->noForwardProgress = 0;
+    dctx->oversizedDuration = 0;
+    dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    dctx->ddictSet = NULL;
+    ZSTD_DCtx_resetParameters(dctx);
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentEndForFuzzing = NULL;
+#endif
+}
+
+ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
+{
+    ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace;
+
+    if ((size_t)workspace & 7) return NULL;  /* 8-aligned */
+    if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL;  /* minimum size */
+
+    ZSTD_initDCtx_internal(dctx);
+    dctx->staticSize = workspaceSize;
+    dctx->inBuff = (char*)(dctx+1);
+    return dctx;
+}
+
+ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
+{
+    if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
+
+    {   ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem);
+        if (!dctx) return NULL;
+        dctx->customMem = customMem;
+        ZSTD_initDCtx_internal(dctx);
+        return dctx;
+    }
+}
+
+ZSTD_DCtx* ZSTD_createDCtx(void)
+{
+    DEBUGLOG(3, "ZSTD_createDCtx");
+    return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
+}
+
+static void ZSTD_clearDict(ZSTD_DCtx* dctx)
+{
+    ZSTD_freeDDict(dctx->ddictLocal);
+    dctx->ddictLocal = NULL;
+    dctx->ddict = NULL;
+    dctx->dictUses = ZSTD_dont_use;
+}
+
+size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx)
+{
+    if (dctx==NULL) return 0;   /* support free on NULL */
+    RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx");
+    {   ZSTD_customMem const cMem = dctx->customMem;
+        ZSTD_clearDict(dctx);
+        ZSTD_customFree(dctx->inBuff, cMem);
+        dctx->inBuff = NULL;
+        if (dctx->ddictSet) {
+            ZSTD_freeDDictHashSet(dctx->ddictSet, cMem);
+            dctx->ddictSet = NULL;
+        }
+        ZSTD_customFree(dctx, cMem);
+        return 0;
+    }
+}
+
+/* no longer useful */
+void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx)
+{
+    size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx);
+    ZSTD_memcpy(dstDCtx, srcDCtx, toCopy);  /* no need to copy workspace */
+}
+
+/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on
+ * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then
+ * accordingly sets the ddict to be used to decompress the frame.
+ *
+ * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is.
+ *
+ * ZSTD_d_refMultipleDDicts must be enabled for this function to be called.
+ */
+static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) {
+    assert(dctx->refMultipleDDicts && dctx->ddictSet);
+    DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame");
+    if (dctx->ddict) {
+        const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID);
+        if (frameDDict) {
+            DEBUGLOG(4, "DDict found!");
+            ZSTD_clearDict(dctx);
+            dctx->dictID = dctx->fParams.dictID;
+            dctx->ddict = frameDDict;
+            dctx->dictUses = ZSTD_use_indefinitely;
+        }
+    }
+}
+
+
+/*-*************************************************************
+ *   Frame header decoding
+ ***************************************************************/
+
+/*! ZSTD_isFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
+ *  Note 3 : Skippable Frame Identifiers are considered valid. */
+unsigned ZSTD_isFrame(const void* buffer, size_t size)
+{
+    if (size < ZSTD_FRAMEIDSIZE) return 0;
+    {   U32 const magic = MEM_readLE32(buffer);
+        if (magic == ZSTD_MAGICNUMBER) return 1;
+        if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
+    }
+    return 0;
+}
+
+/* ZSTD_frameHeaderSize_internal() :
+ *  srcSize must be large enough to reach header size fields.
+ *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
+ * @return : size of the Frame Header
+ *           or an error code, which can be tested with ZSTD_isError() */
+static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format)
+{
+    size_t const minInputSize = ZSTD_startingInputLength(format);
+    RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, "");
+
+    {   BYTE const fhd = ((const BYTE*)src)[minInputSize-1];
+        U32 const dictID= fhd & 3;
+        U32 const singleSegment = (fhd >> 5) & 1;
+        U32 const fcsId = fhd >> 6;
+        return minInputSize + !singleSegment
+             + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId]
+             + (singleSegment && !fcsId);
+    }
+}
+
+/* ZSTD_frameHeaderSize() :
+ *  srcSize must be >= ZSTD_frameHeaderSize_prefix.
+ * @return : size of the Frame Header,
+ *           or an error code (if srcSize is too small) */
+size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
+{
+    return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1);
+}
+
+
+/* ZSTD_getFrameHeader_advanced() :
+ *  decode Frame Header, or require larger `srcSize`.
+ *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
+{
+    const BYTE* ip = (const BYTE*)src;
+    size_t const minInputSize = ZSTD_startingInputLength(format);
+
+    ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));   /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
+    if (srcSize < minInputSize) return minInputSize;
+    RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter");
+
+    if ( (format != ZSTD_f_zstd1_magicless)
+      && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
+        if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+            /* skippable frame */
+            if (srcSize < ZSTD_SKIPPABLEHEADERSIZE)
+                return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */
+            ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr));
+            zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
+            zfhPtr->frameType = ZSTD_skippableFrame;
+            return 0;
+        }
+        RETURN_ERROR(prefix_unknown, "");
+    }
+
+    /* ensure there is enough `srcSize` to fully read/decode frame header */
+    {   size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format);
+        if (srcSize < fhsize) return fhsize;
+        zfhPtr->headerSize = (U32)fhsize;
+    }
+
+    {   BYTE const fhdByte = ip[minInputSize-1];
+        size_t pos = minInputSize;
+        U32 const dictIDSizeCode = fhdByte&3;
+        U32 const checksumFlag = (fhdByte>>2)&1;
+        U32 const singleSegment = (fhdByte>>5)&1;
+        U32 const fcsID = fhdByte>>6;
+        U64 windowSize = 0;
+        U32 dictID = 0;
+        U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN;
+        RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported,
+                        "reserved bits, must be zero");
+
+        if (!singleSegment) {
+            BYTE const wlByte = ip[pos++];
+            U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN;
+            RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, "");
+            windowSize = (1ULL << windowLog);
+            windowSize += (windowSize >> 3) * (wlByte&7);
+        }
+        switch(dictIDSizeCode)
+        {
+            default:
+                assert(0);  /* impossible */
+                ZSTD_FALLTHROUGH;
+            case 0 : break;
+            case 1 : dictID = ip[pos]; pos++; break;
+            case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break;
+            case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break;
+        }
+        switch(fcsID)
+        {
+            default:
+                assert(0);  /* impossible */
+                ZSTD_FALLTHROUGH;
+            case 0 : if (singleSegment) frameContentSize = ip[pos]; break;
+            case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break;
+            case 2 : frameContentSize = MEM_readLE32(ip+pos); break;
+            case 3 : frameContentSize = MEM_readLE64(ip+pos); break;
+        }
+        if (singleSegment) windowSize = frameContentSize;
+
+        zfhPtr->frameType = ZSTD_frame;
+        zfhPtr->frameContentSize = frameContentSize;
+        zfhPtr->windowSize = windowSize;
+        zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+        zfhPtr->dictID = dictID;
+        zfhPtr->checksumFlag = checksumFlag;
+    }
+    return 0;
+}
+
+/* ZSTD_getFrameHeader() :
+ *  decode Frame Header, or require larger `srcSize`.
+ *  note : this function does not consume input, it only reads it.
+ * @return : 0, `zfhPtr` is correctly filled,
+ *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
+ *           or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
+{
+    return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
+}
+
+
+/* ZSTD_getFrameContentSize() :
+ *  compatible with legacy mode
+ * @return : decompressed size of the single frame pointed to be `src` if known, otherwise
+ *         - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
+ *         - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */
+unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize)
+{
+    {   ZSTD_frameHeader zfh;
+        if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0)
+            return ZSTD_CONTENTSIZE_ERROR;
+        if (zfh.frameType == ZSTD_skippableFrame) {
+            return 0;
+        } else {
+            return zfh.frameContentSize;
+    }   }
+}
+
+static size_t readSkippableFrameSize(void const* src, size_t srcSize)
+{
+    size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE;
+    U32 sizeU32;
+
+    RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, "");
+
+    sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE);
+    RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32,
+                    frameParameter_unsupported, "");
+    {
+        size_t const skippableSize = skippableHeaderSize + sizeU32;
+        RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, "");
+        return skippableSize;
+    }
+}
+
+/* ZSTD_findDecompressedSize() :
+ *  compatible with legacy mode
+ *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
+ *      skippable frames
+ *  @return : decompressed size of the frames contained */
+unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize)
+{
+    unsigned long long totalDstSize = 0;
+
+    while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) {
+        U32 const magicNumber = MEM_readLE32(src);
+
+        if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+            size_t const skippableSize = readSkippableFrameSize(src, srcSize);
+            if (ZSTD_isError(skippableSize)) {
+                return ZSTD_CONTENTSIZE_ERROR;
+            }
+            assert(skippableSize <= srcSize);
+
+            src = (const BYTE *)src + skippableSize;
+            srcSize -= skippableSize;
+            continue;
+        }
+
+        {   unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+            if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret;
+
+            /* check for overflow */
+            if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR;
+            totalDstSize += ret;
+        }
+        {   size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize);
+            if (ZSTD_isError(frameSrcSize)) {
+                return ZSTD_CONTENTSIZE_ERROR;
+            }
+
+            src = (const BYTE *)src + frameSrcSize;
+            srcSize -= frameSrcSize;
+        }
+    }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
+
+    if (srcSize) return ZSTD_CONTENTSIZE_ERROR;
+
+    return totalDstSize;
+}
+
+/* ZSTD_getDecompressedSize() :
+ *  compatible with legacy mode
+ * @return : decompressed size if known, 0 otherwise
+             note : 0 can mean any of the following :
+                   - frame content is empty
+                   - decompressed size field is not present in frame header
+                   - frame header unknown / not supported
+                   - frame header not complete (`srcSize` too small) */
+unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize)
+{
+    unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize);
+    ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN);
+    return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret;
+}
+
+
+/* ZSTD_decodeFrameHeader() :
+ * `headerSize` must be the size provided by ZSTD_frameHeaderSize().
+ * If multiple DDict references are enabled, also will choose the correct DDict to use.
+ * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
+static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
+{
+    size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
+    if (ZSTD_isError(result)) return result;    /* invalid header */
+    RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
+
+    /* Reference DDict requested by frame if dctx references multiple ddicts */
+    if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) {
+        ZSTD_DCtx_selectFrameDDict(dctx);
+    }
+
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    /* Skip the dictID check in fuzzing mode, because it makes the search
+     * harder.
+     */
+    RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID),
+                    dictionary_wrong, "");
+#endif
+    dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0;
+    if (dctx->validateChecksum) xxh64_reset(&dctx->xxhState, 0);
+    dctx->processedCSize += headerSize;
+    return 0;
+}
+
+static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
+{
+    ZSTD_frameSizeInfo frameSizeInfo;
+    frameSizeInfo.compressedSize = ret;
+    frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR;
+    return frameSizeInfo;
+}
+
+static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo frameSizeInfo;
+    ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
+
+
+    if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
+        && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+        frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
+        assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
+               frameSizeInfo.compressedSize <= srcSize);
+        return frameSizeInfo;
+    } else {
+        const BYTE* ip = (const BYTE*)src;
+        const BYTE* const ipstart = ip;
+        size_t remainingSize = srcSize;
+        size_t nbBlocks = 0;
+        ZSTD_frameHeader zfh;
+
+        /* Extract Frame Header */
+        {   size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
+            if (ZSTD_isError(ret))
+                return ZSTD_errorFrameSizeInfo(ret);
+            if (ret > 0)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+        }
+
+        ip += zfh.headerSize;
+        remainingSize -= zfh.headerSize;
+
+        /* Iterate over each block */
+        while (1) {
+            blockProperties_t blockProperties;
+            size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties);
+            if (ZSTD_isError(cBlockSize))
+                return ZSTD_errorFrameSizeInfo(cBlockSize);
+
+            if (ZSTD_blockHeaderSize + cBlockSize > remainingSize)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+
+            ip += ZSTD_blockHeaderSize + cBlockSize;
+            remainingSize -= ZSTD_blockHeaderSize + cBlockSize;
+            nbBlocks++;
+
+            if (blockProperties.lastBlock) break;
+        }
+
+        /* Final frame content checksum */
+        if (zfh.checksumFlag) {
+            if (remainingSize < 4)
+                return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong));
+            ip += 4;
+        }
+
+        frameSizeInfo.compressedSize = (size_t)(ip - ipstart);
+        frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN)
+                                        ? zfh.frameContentSize
+                                        : nbBlocks * zfh.blockSizeMax;
+        return frameSizeInfo;
+    }
+}
+
+/* ZSTD_findFrameCompressedSize() :
+ *  compatible with legacy mode
+ *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
+ *  `srcSize` must be at least as large as the frame contained
+ *  @return : the compressed size of the frame starting at `src` */
+size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
+{
+    ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+    return frameSizeInfo.compressedSize;
+}
+
+/* ZSTD_decompressBound() :
+ *  compatible with legacy mode
+ *  `src` must point to the start of a ZSTD frame or a skippeable frame
+ *  `srcSize` must be at least as large as the frame contained
+ *  @return : the maximum decompressed size of the compressed source
+ */
+unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
+{
+    unsigned long long bound = 0;
+    /* Iterate over each frame */
+    while (srcSize > 0) {
+        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+        size_t const compressedSize = frameSizeInfo.compressedSize;
+        unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
+        if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
+            return ZSTD_CONTENTSIZE_ERROR;
+        assert(srcSize >= compressedSize);
+        src = (const BYTE*)src + compressedSize;
+        srcSize -= compressedSize;
+        bound += decompressedBound;
+    }
+    return bound;
+}
+
+
+/*-*************************************************************
+ *   Frame decoding
+ ***************************************************************/
+
+/* ZSTD_insertBlock() :
+ *  insert `src` block into `dctx` history. Useful to track uncompressed blocks. */
+size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize)
+{
+    DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize);
+    ZSTD_checkContinuity(dctx, blockStart, blockSize);
+    dctx->previousDstEnd = (const char*)blockStart + blockSize;
+    return blockSize;
+}
+
+
+static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
+                          const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_copyRawBlock");
+    RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
+    if (dst == NULL) {
+        if (srcSize == 0) return 0;
+        RETURN_ERROR(dstBuffer_null, "");
+    }
+    ZSTD_memcpy(dst, src, srcSize);
+    return srcSize;
+}
+
+static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity,
+                               BYTE b,
+                               size_t regenSize)
+{
+    RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, "");
+    if (dst == NULL) {
+        if (regenSize == 0) return 0;
+        RETURN_ERROR(dstBuffer_null, "");
+    }
+    ZSTD_memset(dst, b, regenSize);
+    return regenSize;
+}
+
+static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming)
+{
+    (void)dctx;
+    (void)uncompressedSize;
+    (void)compressedSize;
+    (void)streaming;
+}
+
+
+/*! ZSTD_decompressFrame() :
+ * @dctx must be properly initialized
+ *  will update *srcPtr and *srcSizePtr,
+ *  to make *srcPtr progress by one frame. */
+static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
+                                   void* dst, size_t dstCapacity,
+                             const void** srcPtr, size_t *srcSizePtr)
+{
+    const BYTE* const istart = (const BYTE*)(*srcPtr);
+    const BYTE* ip = istart;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart;
+    BYTE* op = ostart;
+    size_t remainingSrcSize = *srcSizePtr;
+
+    DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr);
+
+    /* check */
+    RETURN_ERROR_IF(
+        remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize,
+        srcSize_wrong, "");
+
+    /* Frame Header */
+    {   size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal(
+                ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format);
+        if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize;
+        RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize,
+                        srcSize_wrong, "");
+        FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , "");
+        ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
+    }
+
+    /* Loop on each block */
+    while (1) {
+        size_t decodedSize;
+        blockProperties_t blockProperties;
+        size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
+        if (ZSTD_isError(cBlockSize)) return cBlockSize;
+
+        ip += ZSTD_blockHeaderSize;
+        remainingSrcSize -= ZSTD_blockHeaderSize;
+        RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
+
+        switch(blockProperties.blockType)
+        {
+        case bt_compressed:
+            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1);
+            break;
+        case bt_raw :
+            decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
+            break;
+        case bt_rle :
+            decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize);
+            break;
+        case bt_reserved :
+        default:
+            RETURN_ERROR(corruption_detected, "invalid block type");
+        }
+
+        if (ZSTD_isError(decodedSize)) return decodedSize;
+        if (dctx->validateChecksum)
+            xxh64_update(&dctx->xxhState, op, decodedSize);
+        if (decodedSize != 0)
+            op += decodedSize;
+        assert(ip != NULL);
+        ip += cBlockSize;
+        remainingSrcSize -= cBlockSize;
+        if (blockProperties.lastBlock) break;
+    }
+
+    if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+        RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize,
+                        corruption_detected, "");
+    }
+    if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
+        RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, "");
+        if (!dctx->forceIgnoreChecksum) {
+            U32 const checkCalc = (U32)xxh64_digest(&dctx->xxhState);
+            U32 checkRead;
+            checkRead = MEM_readLE32(ip);
+            RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, "");
+        }
+        ip += 4;
+        remainingSrcSize -= 4;
+    }
+    ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0);
+    /* Allow caller to get size read */
+    *srcPtr = ip;
+    *srcSizePtr = remainingSrcSize;
+    return (size_t)(op-ostart);
+}
+
+static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
+                                        void* dst, size_t dstCapacity,
+                                  const void* src, size_t srcSize,
+                                  const void* dict, size_t dictSize,
+                                  const ZSTD_DDict* ddict)
+{
+    void* const dststart = dst;
+    int moreThan1Frame = 0;
+
+    DEBUGLOG(5, "ZSTD_decompressMultiFrame");
+    assert(dict==NULL || ddict==NULL);  /* either dict or ddict set, not both */
+
+    if (ddict) {
+        dict = ZSTD_DDict_dictContent(ddict);
+        dictSize = ZSTD_DDict_dictSize(ddict);
+    }
+
+    while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
+
+
+        {   U32 const magicNumber = MEM_readLE32(src);
+            DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
+                        (unsigned)magicNumber, ZSTD_MAGICNUMBER);
+            if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
+                size_t const skippableSize = readSkippableFrameSize(src, srcSize);
+                FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed");
+                assert(skippableSize <= srcSize);
+
+                src = (const BYTE *)src + skippableSize;
+                srcSize -= skippableSize;
+                continue;
+        }   }
+
+        if (ddict) {
+            /* we were called from ZSTD_decompress_usingDDict */
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), "");
+        } else {
+            /* this will initialize correctly with no dict if dict == NULL, so
+             * use this in all cases but ddict */
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), "");
+        }
+        ZSTD_checkContinuity(dctx, dst, dstCapacity);
+
+        {   const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
+                                                    &src, &srcSize);
+            RETURN_ERROR_IF(
+                (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
+             && (moreThan1Frame==1),
+                srcSize_wrong,
+                "At least one frame successfully completed, "
+                "but following bytes are garbage: "
+                "it's more likely to be a srcSize error, "
+                "specifying more input bytes than size of frame(s). "
+                "Note: one could be unlucky, it might be a corruption error instead, "
+                "happening right at the place where we expect zstd magic bytes. "
+                "But this is _much_ less likely than a srcSize field error.");
+            if (ZSTD_isError(res)) return res;
+            assert(res <= dstCapacity);
+            if (res != 0)
+                dst = (BYTE*)dst + res;
+            dstCapacity -= res;
+        }
+        moreThan1Frame = 1;
+    }  /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
+
+    RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed");
+
+    return (size_t)((BYTE*)dst - (BYTE*)dststart);
+}
+
+size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx,
+                                 void* dst, size_t dstCapacity,
+                           const void* src, size_t srcSize,
+                           const void* dict, size_t dictSize)
+{
+    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL);
+}
+
+
+static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx)
+{
+    switch (dctx->dictUses) {
+    default:
+        assert(0 /* Impossible */);
+        ZSTD_FALLTHROUGH;
+    case ZSTD_dont_use:
+        ZSTD_clearDict(dctx);
+        return NULL;
+    case ZSTD_use_indefinitely:
+        return dctx->ddict;
+    case ZSTD_use_once:
+        dctx->dictUses = ZSTD_dont_use;
+        return dctx->ddict;
+    }
+}
+
+size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx));
+}
+
+
+size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
+    size_t regenSize;
+    ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+    RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!");
+    regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
+    ZSTD_freeDCtx(dctx);
+    return regenSize;
+#else   /* stack mode */
+    ZSTD_DCtx dctx;
+    ZSTD_initDCtx_internal(&dctx);
+    return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
+#endif
+}
+
+
+/*-**************************************
+*   Advanced Streaming Decompression API
+*   Bufferless and synchronous
+****************************************/
+size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
+
+/*
+ * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed,
+ * we allow taking a partial block as the input. Currently only raw uncompressed blocks can
+ * be streamed.
+ *
+ * For blocks that can be streamed, this allows us to reduce the latency until we produce
+ * output, and avoid copying the input.
+ *
+ * @param inputSize - The total amount of input that the caller currently has.
+ */
+static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) {
+    if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock))
+        return dctx->expected;
+    if (dctx->bType != bt_raw)
+        return dctx->expected;
+    return MIN(MAX(inputSize, 1), dctx->expected);
+}
+
+ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
+    switch(dctx->stage)
+    {
+    default:   /* should not happen */
+        assert(0);
+        ZSTD_FALLTHROUGH;
+    case ZSTDds_getFrameHeaderSize:
+        ZSTD_FALLTHROUGH;
+    case ZSTDds_decodeFrameHeader:
+        return ZSTDnit_frameHeader;
+    case ZSTDds_decodeBlockHeader:
+        return ZSTDnit_blockHeader;
+    case ZSTDds_decompressBlock:
+        return ZSTDnit_block;
+    case ZSTDds_decompressLastBlock:
+        return ZSTDnit_lastBlock;
+    case ZSTDds_checkChecksum:
+        return ZSTDnit_checksum;
+    case ZSTDds_decodeSkippableHeader:
+        ZSTD_FALLTHROUGH;
+    case ZSTDds_skipFrame:
+        return ZSTDnit_skippableFrame;
+    }
+}
+
+static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; }
+
+/* ZSTD_decompressContinue() :
+ *  srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress())
+ *  @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity)
+ *            or an error code, which can be tested using ZSTD_isError() */
+size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
+{
+    DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize);
+    /* Sanity check */
+    RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed");
+    ZSTD_checkContinuity(dctx, dst, dstCapacity);
+
+    dctx->processedCSize += srcSize;
+
+    switch (dctx->stage)
+    {
+    case ZSTDds_getFrameHeaderSize :
+        assert(src != NULL);
+        if (dctx->format == ZSTD_f_zstd1) {  /* allows header */
+            assert(srcSize >= ZSTD_FRAMEIDSIZE);  /* to read skippable magic number */
+            if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {        /* skippable frame */
+                ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
+                dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize;  /* remaining to load to get full skippable frame header */
+                dctx->stage = ZSTDds_decodeSkippableHeader;
+                return 0;
+        }   }
+        dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format);
+        if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize;
+        ZSTD_memcpy(dctx->headerBuffer, src, srcSize);
+        dctx->expected = dctx->headerSize - srcSize;
+        dctx->stage = ZSTDds_decodeFrameHeader;
+        return 0;
+
+    case ZSTDds_decodeFrameHeader:
+        assert(src != NULL);
+        ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize);
+        FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), "");
+        dctx->expected = ZSTD_blockHeaderSize;
+        dctx->stage = ZSTDds_decodeBlockHeader;
+        return 0;
+
+    case ZSTDds_decodeBlockHeader:
+        {   blockProperties_t bp;
+            size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
+            if (ZSTD_isError(cBlockSize)) return cBlockSize;
+            RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum");
+            dctx->expected = cBlockSize;
+            dctx->bType = bp.blockType;
+            dctx->rleSize = bp.origSize;
+            if (cBlockSize) {
+                dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
+                return 0;
+            }
+            /* empty block */
+            if (bp.lastBlock) {
+                if (dctx->fParams.checksumFlag) {
+                    dctx->expected = 4;
+                    dctx->stage = ZSTDds_checkChecksum;
+                } else {
+                    dctx->expected = 0; /* end of frame */
+                    dctx->stage = ZSTDds_getFrameHeaderSize;
+                }
+            } else {
+                dctx->expected = ZSTD_blockHeaderSize;  /* jump to next header */
+                dctx->stage = ZSTDds_decodeBlockHeader;
+            }
+            return 0;
+        }
+
+    case ZSTDds_decompressLastBlock:
+    case ZSTDds_decompressBlock:
+        DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock");
+        {   size_t rSize;
+            switch(dctx->bType)
+            {
+            case bt_compressed:
+                DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
+                rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
+                dctx->expected = 0;  /* Streaming not supported */
+                break;
+            case bt_raw :
+                assert(srcSize <= dctx->expected);
+                rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
+                FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed");
+                assert(rSize == srcSize);
+                dctx->expected -= rSize;
+                break;
+            case bt_rle :
+                rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize);
+                dctx->expected = 0;  /* Streaming not supported */
+                break;
+            case bt_reserved :   /* should never happen */
+            default:
+                RETURN_ERROR(corruption_detected, "invalid block type");
+            }
+            FORWARD_IF_ERROR(rSize, "");
+            RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
+            DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
+            dctx->decodedSize += rSize;
+            if (dctx->validateChecksum) xxh64_update(&dctx->xxhState, dst, rSize);
+            dctx->previousDstEnd = (char*)dst + rSize;
+
+            /* Stay on the same stage until we are finished streaming the block. */
+            if (dctx->expected > 0) {
+                return rSize;
+            }
+
+            if (dctx->stage == ZSTDds_decompressLastBlock) {   /* end of frame */
+                DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize);
+                RETURN_ERROR_IF(
+                    dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                 && dctx->decodedSize != dctx->fParams.frameContentSize,
+                    corruption_detected, "");
+                if (dctx->fParams.checksumFlag) {  /* another round for frame checksum */
+                    dctx->expected = 4;
+                    dctx->stage = ZSTDds_checkChecksum;
+                } else {
+                    ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
+                    dctx->expected = 0;   /* ends here */
+                    dctx->stage = ZSTDds_getFrameHeaderSize;
+                }
+            } else {
+                dctx->stage = ZSTDds_decodeBlockHeader;
+                dctx->expected = ZSTD_blockHeaderSize;
+            }
+            return rSize;
+        }
+
+    case ZSTDds_checkChecksum:
+        assert(srcSize == 4);  /* guaranteed by dctx->expected */
+        {
+            if (dctx->validateChecksum) {
+                U32 const h32 = (U32)xxh64_digest(&dctx->xxhState);
+                U32 const check32 = MEM_readLE32(src);
+                DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32);
+                RETURN_ERROR_IF(check32 != h32, checksum_wrong, "");
+            }
+            ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1);
+            dctx->expected = 0;
+            dctx->stage = ZSTDds_getFrameHeaderSize;
+            return 0;
+        }
+
+    case ZSTDds_decodeSkippableHeader:
+        assert(src != NULL);
+        assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
+        ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize);   /* complete skippable header */
+        dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE);   /* note : dctx->expected can grow seriously large, beyond local buffer size */
+        dctx->stage = ZSTDds_skipFrame;
+        return 0;
+
+    case ZSTDds_skipFrame:
+        dctx->expected = 0;
+        dctx->stage = ZSTDds_getFrameHeaderSize;
+        return 0;
+
+    default:
+        assert(0);   /* impossible */
+        RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
+    }
+}
+
+
+static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    dctx->dictEnd = dctx->previousDstEnd;
+    dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
+    dctx->prefixStart = dict;
+    dctx->previousDstEnd = (const char*)dict + dictSize;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    dctx->dictContentBeginForFuzzing = dctx->prefixStart;
+    dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
+#endif
+    return 0;
+}
+
+/*! ZSTD_loadDEntropy() :
+ *  dict : must point at beginning of a valid zstd dictionary.
+ * @return : size of entropy tables read */
+size_t
+ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
+                  const void* const dict, size_t const dictSize)
+{
+    const BYTE* dictPtr = (const BYTE*)dict;
+    const BYTE* const dictEnd = dictPtr + dictSize;
+
+    RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small");
+    assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY);   /* dict must be valid */
+    dictPtr += 8;   /* skip header = magic + dictID */
+
+    ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable));
+    ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable));
+    ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
+    {   void* const workspace = &entropy->LLTable;   /* use fse tables as temporary workspace; implies fse tables are grouped together */
+        size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
+#ifdef HUF_FORCE_DECOMPRESS_X1
+        /* in minimal huffman, we always use X1 variants */
+        size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable,
+                                                dictPtr, dictEnd - dictPtr,
+                                                workspace, workspaceSize);
+#else
+        size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
+                                                dictPtr, (size_t)(dictEnd - dictPtr),
+                                                workspace, workspaceSize);
+#endif
+        RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, "");
+        dictPtr += hSize;
+    }
+
+    {   short offcodeNCount[MaxOff+1];
+        unsigned offcodeMaxValue = MaxOff, offcodeLog;
+        size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr));
+        RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, "");
+        RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable( entropy->OFTable,
+                            offcodeNCount, offcodeMaxValue,
+                            OF_base, OF_bits,
+                            offcodeLog,
+                            entropy->workspace, sizeof(entropy->workspace),
+                            /* bmi2 */0);
+        dictPtr += offcodeHeaderSize;
+    }
+
+    {   short matchlengthNCount[MaxML+1];
+        unsigned matchlengthMaxValue = MaxML, matchlengthLog;
+        size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
+        RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, "");
+        RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable( entropy->MLTable,
+                            matchlengthNCount, matchlengthMaxValue,
+                            ML_base, ML_bits,
+                            matchlengthLog,
+                            entropy->workspace, sizeof(entropy->workspace),
+                            /* bmi2 */ 0);
+        dictPtr += matchlengthHeaderSize;
+    }
+
+    {   short litlengthNCount[MaxLL+1];
+        unsigned litlengthMaxValue = MaxLL, litlengthLog;
+        size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr));
+        RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, "");
+        RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, "");
+        ZSTD_buildFSETable( entropy->LLTable,
+                            litlengthNCount, litlengthMaxValue,
+                            LL_base, LL_bits,
+                            litlengthLog,
+                            entropy->workspace, sizeof(entropy->workspace),
+                            /* bmi2 */ 0);
+        dictPtr += litlengthHeaderSize;
+    }
+
+    RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, "");
+    {   int i;
+        size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12));
+        for (i=0; i<3; i++) {
+            U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4;
+            RETURN_ERROR_IF(rep==0 || rep > dictContentSize,
+                            dictionary_corrupted, "");
+            entropy->rep[i] = rep;
+    }   }
+
+    return (size_t)(dictPtr - (const BYTE*)dict);
+}
+
+static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize);
+    {   U32 const magic = MEM_readLE32(dict);
+        if (magic != ZSTD_MAGIC_DICTIONARY) {
+            return ZSTD_refDictContent(dctx, dict, dictSize);   /* pure content mode */
+    }   }
+    dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
+
+    /* load entropy tables */
+    {   size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize);
+        RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, "");
+        dict = (const char*)dict + eSize;
+        dictSize -= eSize;
+    }
+    dctx->litEntropy = dctx->fseEntropy = 1;
+
+    /* reference dictionary content */
+    return ZSTD_refDictContent(dctx, dict, dictSize);
+}
+
+size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
+{
+    assert(dctx != NULL);
+    dctx->expected = ZSTD_startingInputLength(dctx->format);  /* dctx->format must be properly set */
+    dctx->stage = ZSTDds_getFrameHeaderSize;
+    dctx->processedCSize = 0;
+    dctx->decodedSize = 0;
+    dctx->previousDstEnd = NULL;
+    dctx->prefixStart = NULL;
+    dctx->virtualStart = NULL;
+    dctx->dictEnd = NULL;
+    dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
+    dctx->litEntropy = dctx->fseEntropy = 0;
+    dctx->dictID = 0;
+    dctx->bType = bt_reserved;
+    ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
+    ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
+    dctx->LLTptr = dctx->entropy.LLTable;
+    dctx->MLTptr = dctx->entropy.MLTable;
+    dctx->OFTptr = dctx->entropy.OFTable;
+    dctx->HUFptr = dctx->entropy.hufTable;
+    return 0;
+}
+
+size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
+    if (dict && dictSize)
+        RETURN_ERROR_IF(
+            ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)),
+            dictionary_corrupted, "");
+    return 0;
+}
+
+
+/* ======   ZSTD_DDict   ====== */
+
+size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+    DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
+    assert(dctx != NULL);
+    if (ddict) {
+        const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict);
+        size_t const dictSize = ZSTD_DDict_dictSize(ddict);
+        const void* const dictEnd = dictStart + dictSize;
+        dctx->ddictIsCold = (dctx->dictEnd != dictEnd);
+        DEBUGLOG(4, "DDict is %s",
+                    dctx->ddictIsCold ? "~cold~" : "hot!");
+    }
+    FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , "");
+    if (ddict) {   /* NULL ddict is equivalent to no dictionary */
+        ZSTD_copyDDictParameters(dctx, ddict);
+    }
+    return 0;
+}
+
+/*! ZSTD_getDictID_fromDict() :
+ *  Provides the dictID stored within dictionary.
+ *  if @return == 0, the dictionary is not conformant with Zstandard specification.
+ *  It can still be loaded, but as a content-only dictionary. */
+unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize)
+{
+    if (dictSize < 8) return 0;
+    if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
+    return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
+}
+
+/*! ZSTD_getDictID_fromFrame() :
+ *  Provides the dictID required to decompress frame stored within `src`.
+ *  If @return == 0, the dictID could not be decoded.
+ *  This could for one of the following reasons :
+ *  - The frame does not require a dictionary (most common case).
+ *  - The frame was built with dictID intentionally removed.
+ *    Needed dictionary is a hidden information.
+ *    Note : this use case also happens when using a non-conformant dictionary.
+ *  - `srcSize` is too small, and as a result, frame header could not be decoded.
+ *    Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`.
+ *  - This is not a Zstandard frame.
+ *  When identifying the exact failure cause, it's possible to use
+ *  ZSTD_getFrameHeader(), which will provide a more precise error code. */
+unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize)
+{
+    ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 };
+    size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize);
+    if (ZSTD_isError(hError)) return 0;
+    return zfp.dictID;
+}
+
+
+/*! ZSTD_decompress_usingDDict() :
+*   Decompression using a pre-digested Dictionary
+*   Use dictionary without significant overhead. */
+size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
+                                  void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize,
+                            const ZSTD_DDict* ddict)
+{
+    /* pass content and size in case legacy frames are encountered */
+    return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize,
+                                     NULL, 0,
+                                     ddict);
+}
+
+
+/*=====================================
+*   Streaming decompression
+*====================================*/
+
+ZSTD_DStream* ZSTD_createDStream(void)
+{
+    DEBUGLOG(3, "ZSTD_createDStream");
+    return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
+}
+
+ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
+{
+    return ZSTD_initStaticDCtx(workspace, workspaceSize);
+}
+
+ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
+{
+    return ZSTD_createDCtx_advanced(customMem);
+}
+
+size_t ZSTD_freeDStream(ZSTD_DStream* zds)
+{
+    return ZSTD_freeDCtx(zds);
+}
+
+
+/* ***  Initialization  *** */
+
+size_t ZSTD_DStreamInSize(void)  { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
+size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
+
+size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
+                                   const void* dict, size_t dictSize,
+                                         ZSTD_dictLoadMethod_e dictLoadMethod,
+                                         ZSTD_dictContentType_e dictContentType)
+{
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    ZSTD_clearDict(dctx);
+    if (dict && dictSize != 0) {
+        dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem);
+        RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!");
+        dctx->ddict = dctx->ddictLocal;
+        dctx->dictUses = ZSTD_use_indefinitely;
+    }
+    return 0;
+}
+
+size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
+{
+    return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
+}
+
+size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
+{
+    FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), "");
+    dctx->dictUses = ZSTD_use_once;
+    return 0;
+}
+
+size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize)
+{
+    return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent);
+}
+
+
+/* ZSTD_initDStream_usingDict() :
+ * return : expected size, aka ZSTD_startingInputLength().
+ * this function cannot fail */
+size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize)
+{
+    DEBUGLOG(4, "ZSTD_initDStream_usingDict");
+    FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , "");
+    return ZSTD_startingInputLength(zds->format);
+}
+
+/* note : this variant can't fail */
+size_t ZSTD_initDStream(ZSTD_DStream* zds)
+{
+    DEBUGLOG(4, "ZSTD_initDStream");
+    return ZSTD_initDStream_usingDDict(zds, NULL);
+}
+
+/* ZSTD_initDStream_usingDDict() :
+ * ddict will just be referenced, and must outlive decompression session
+ * this function cannot fail */
+size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict)
+{
+    FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , "");
+    FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , "");
+    return ZSTD_startingInputLength(dctx->format);
+}
+
+/* ZSTD_resetDStream() :
+ * return : expected size, aka ZSTD_startingInputLength().
+ * this function cannot fail */
+size_t ZSTD_resetDStream(ZSTD_DStream* dctx)
+{
+    FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), "");
+    return ZSTD_startingInputLength(dctx->format);
+}
+
+
+size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
+{
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    ZSTD_clearDict(dctx);
+    if (ddict) {
+        dctx->ddict = ddict;
+        dctx->dictUses = ZSTD_use_indefinitely;
+        if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) {
+            if (dctx->ddictSet == NULL) {
+                dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem);
+                if (!dctx->ddictSet) {
+                    RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!");
+                }
+            }
+            assert(!dctx->staticSize);  /* Impossible: ddictSet cannot have been allocated if static dctx */
+            FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), "");
+        }
+    }
+    return 0;
+}
+
+/* ZSTD_DCtx_setMaxWindowSize() :
+ * note : no direct equivalence in ZSTD_DCtx_setParameter,
+ * since this version sets windowSize, and the other sets windowLog */
+size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
+{
+    ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax);
+    size_t const min = (size_t)1 << bounds.lowerBound;
+    size_t const max = (size_t)1 << bounds.upperBound;
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, "");
+    RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, "");
+    dctx->maxWindowSize = maxWindowSize;
+    return 0;
+}
+
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format)
+{
+    return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format);
+}
+
+ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
+{
+    ZSTD_bounds bounds = { 0, 0, 0 };
+    switch(dParam) {
+        case ZSTD_d_windowLogMax:
+            bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN;
+            bounds.upperBound = ZSTD_WINDOWLOG_MAX;
+            return bounds;
+        case ZSTD_d_format:
+            bounds.lowerBound = (int)ZSTD_f_zstd1;
+            bounds.upperBound = (int)ZSTD_f_zstd1_magicless;
+            ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
+            return bounds;
+        case ZSTD_d_stableOutBuffer:
+            bounds.lowerBound = (int)ZSTD_bm_buffered;
+            bounds.upperBound = (int)ZSTD_bm_stable;
+            return bounds;
+        case ZSTD_d_forceIgnoreChecksum:
+            bounds.lowerBound = (int)ZSTD_d_validateChecksum;
+            bounds.upperBound = (int)ZSTD_d_ignoreChecksum;
+            return bounds;
+        case ZSTD_d_refMultipleDDicts:
+            bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
+            bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
+            return bounds;
+        default:;
+    }
+    bounds.error = ERROR(parameter_unsupported);
+    return bounds;
+}
+
+/* ZSTD_dParam_withinBounds:
+ * @return 1 if value is within dParam bounds,
+ * 0 otherwise */
+static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value)
+{
+    ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam);
+    if (ZSTD_isError(bounds.error)) return 0;
+    if (value < bounds.lowerBound) return 0;
+    if (value > bounds.upperBound) return 0;
+    return 1;
+}
+
+#define CHECK_DBOUNDS(p,v) {                \
+    RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \
+}
+
+size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value)
+{
+    switch (param) {
+        case ZSTD_d_windowLogMax:
+            *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize);
+            return 0;
+        case ZSTD_d_format:
+            *value = (int)dctx->format;
+            return 0;
+        case ZSTD_d_stableOutBuffer:
+            *value = (int)dctx->outBufferMode;
+            return 0;
+        case ZSTD_d_forceIgnoreChecksum:
+            *value = (int)dctx->forceIgnoreChecksum;
+            return 0;
+        case ZSTD_d_refMultipleDDicts:
+            *value = (int)dctx->refMultipleDDicts;
+            return 0;
+        default:;
+    }
+    RETURN_ERROR(parameter_unsupported, "");
+}
+
+size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value)
+{
+    RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+    switch(dParam) {
+        case ZSTD_d_windowLogMax:
+            if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
+            CHECK_DBOUNDS(ZSTD_d_windowLogMax, value);
+            dctx->maxWindowSize = ((size_t)1) << value;
+            return 0;
+        case ZSTD_d_format:
+            CHECK_DBOUNDS(ZSTD_d_format, value);
+            dctx->format = (ZSTD_format_e)value;
+            return 0;
+        case ZSTD_d_stableOutBuffer:
+            CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value);
+            dctx->outBufferMode = (ZSTD_bufferMode_e)value;
+            return 0;
+        case ZSTD_d_forceIgnoreChecksum:
+            CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value);
+            dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value;
+            return 0;
+        case ZSTD_d_refMultipleDDicts:
+            CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value);
+            if (dctx->staticSize != 0) {
+                RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!");
+            }
+            dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
+            return 0;
+        default:;
+    }
+    RETURN_ERROR(parameter_unsupported, "");
+}
+
+size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
+{
+    if ( (reset == ZSTD_reset_session_only)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        dctx->streamStage = zdss_init;
+        dctx->noForwardProgress = 0;
+    }
+    if ( (reset == ZSTD_reset_parameters)
+      || (reset == ZSTD_reset_session_and_parameters) ) {
+        RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, "");
+        ZSTD_clearDict(dctx);
+        ZSTD_DCtx_resetParameters(dctx);
+    }
+    return 0;
+}
+
+
+size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
+{
+    return ZSTD_sizeof_DCtx(dctx);
+}
+
+size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
+{
+    size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+    unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
+    unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
+    size_t const minRBSize = (size_t) neededSize;
+    RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
+                    frameParameter_windowTooLarge, "");
+    return minRBSize;
+}
+
+size_t ZSTD_estimateDStreamSize(size_t windowSize)
+{
+    size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
+    size_t const inBuffSize = blockSize;  /* no block can be larger */
+    size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN);
+    return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize;
+}
+
+size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize)
+{
+    U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX;   /* note : should be user-selectable, but requires an additional parameter (or a dctx) */
+    ZSTD_frameHeader zfh;
+    size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize);
+    if (ZSTD_isError(err)) return err;
+    RETURN_ERROR_IF(err>0, srcSize_wrong, "");
+    RETURN_ERROR_IF(zfh.windowSize > windowSizeMax,
+                    frameParameter_windowTooLarge, "");
+    return ZSTD_estimateDStreamSize((size_t)zfh.windowSize);
+}
+
+
+/* *****   Decompression   ***** */
+
+static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
+{
+    return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR;
+}
+
+static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize)
+{
+    if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize))
+        zds->oversizedDuration++;
+    else
+        zds->oversizedDuration = 0;
+}
+
+static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds)
+{
+    return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION;
+}
+
+/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */
+static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output)
+{
+    ZSTD_outBuffer const expect = zds->expectedOutBuffer;
+    /* No requirement when ZSTD_obm_stable is not enabled. */
+    if (zds->outBufferMode != ZSTD_bm_stable)
+        return 0;
+    /* Any buffer is allowed in zdss_init, this must be the same for every other call until
+     * the context is reset.
+     */
+    if (zds->streamStage == zdss_init)
+        return 0;
+    /* The buffer must match our expectation exactly. */
+    if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size)
+        return 0;
+    RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!");
+}
+
+/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream()
+ * and updates the stage and the output buffer state. This call is extracted so it can be
+ * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode.
+ * NOTE: You must break after calling this function since the streamStage is modified.
+ */
+static size_t ZSTD_decompressContinueStream(
+            ZSTD_DStream* zds, char** op, char* oend,
+            void const* src, size_t srcSize) {
+    int const isSkipFrame = ZSTD_isSkipFrame(zds);
+    if (zds->outBufferMode == ZSTD_bm_buffered) {
+        size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart;
+        size_t const decodedSize = ZSTD_decompressContinue(zds,
+                zds->outBuff + zds->outStart, dstSize, src, srcSize);
+        FORWARD_IF_ERROR(decodedSize, "");
+        if (!decodedSize && !isSkipFrame) {
+            zds->streamStage = zdss_read;
+        } else {
+            zds->outEnd = zds->outStart + decodedSize;
+            zds->streamStage = zdss_flush;
+        }
+    } else {
+        /* Write directly into the output buffer */
+        size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op);
+        size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize);
+        FORWARD_IF_ERROR(decodedSize, "");
+        *op += decodedSize;
+        /* Flushing is not needed. */
+        zds->streamStage = zdss_read;
+        assert(*op <= oend);
+        assert(zds->outBufferMode == ZSTD_bm_stable);
+    }
+    return 0;
+}
+
+size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+{
+    const char* const src = (const char*)input->src;
+    const char* const istart = input->pos != 0 ? src + input->pos : src;
+    const char* const iend = input->size != 0 ? src + input->size : src;
+    const char* ip = istart;
+    char* const dst = (char*)output->dst;
+    char* const ostart = output->pos != 0 ? dst + output->pos : dst;
+    char* const oend = output->size != 0 ? dst + output->size : dst;
+    char* op = ostart;
+    U32 someMoreWork = 1;
+
+    DEBUGLOG(5, "ZSTD_decompressStream");
+    RETURN_ERROR_IF(
+        input->pos > input->size,
+        srcSize_wrong,
+        "forbidden. in: pos: %u   vs size: %u",
+        (U32)input->pos, (U32)input->size);
+    RETURN_ERROR_IF(
+        output->pos > output->size,
+        dstSize_tooSmall,
+        "forbidden. out: pos: %u   vs size: %u",
+        (U32)output->pos, (U32)output->size);
+    DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos));
+    FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), "");
+
+    while (someMoreWork) {
+        switch(zds->streamStage)
+        {
+        case zdss_init :
+            DEBUGLOG(5, "stage zdss_init => transparent reset ");
+            zds->streamStage = zdss_loadHeader;
+            zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
+            zds->legacyVersion = 0;
+            zds->hostageByte = 0;
+            zds->expectedOutBuffer = *output;
+            ZSTD_FALLTHROUGH;
+
+        case zdss_loadHeader :
+            DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip));
+            {   size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
+                if (zds->refMultipleDDicts && zds->ddictSet) {
+                    ZSTD_DCtx_selectFrameDDict(zds);
+                }
+                DEBUGLOG(5, "header size : %u", (U32)hSize);
+                if (ZSTD_isError(hSize)) {
+                    return hSize;   /* error */
+                }
+                if (hSize != 0) {   /* need more input */
+                    size_t const toLoad = hSize - zds->lhSize;   /* if hSize!=0, hSize > zds->lhSize */
+                    size_t const remainingInput = (size_t)(iend-ip);
+                    assert(iend >= ip);
+                    if (toLoad > remainingInput) {   /* not enough input to load full header */
+                        if (remainingInput > 0) {
+                            ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput);
+                            zds->lhSize += remainingInput;
+                        }
+                        input->pos = input->size;
+                        return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize;   /* remaining header bytes + next block header */
+                    }
+                    assert(ip != NULL);
+                    ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad;
+                    break;
+            }   }
+
+            /* check for single-pass mode opportunity */
+            if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                && zds->fParams.frameType != ZSTD_skippableFrame
+                && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
+                size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart));
+                if (cSize <= (size_t)(iend-istart)) {
+                    /* shortcut : using single-pass mode */
+                    size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
+                    if (ZSTD_isError(decompressedSize)) return decompressedSize;
+                    DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
+                    ip = istart + cSize;
+                    op += decompressedSize;
+                    zds->expected = 0;
+                    zds->streamStage = zdss_init;
+                    someMoreWork = 0;
+                    break;
+            }   }
+
+            /* Check output buffer is large enough for ZSTD_odm_stable. */
+            if (zds->outBufferMode == ZSTD_bm_stable
+                && zds->fParams.frameType != ZSTD_skippableFrame
+                && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
+                && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) {
+                RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small");
+            }
+
+            /* Consume header (see ZSTDds_decodeFrameHeader) */
+            DEBUGLOG(4, "Consume header");
+            FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
+
+            if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
+                zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
+                zds->stage = ZSTDds_skipFrame;
+            } else {
+                FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), "");
+                zds->expected = ZSTD_blockHeaderSize;
+                zds->stage = ZSTDds_decodeBlockHeader;
+            }
+
+            /* control buffer memory usage */
+            DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)",
+                        (U32)(zds->fParams.windowSize >>10),
+                        (U32)(zds->maxWindowSize >> 10) );
+            zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
+            RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
+                            frameParameter_windowTooLarge, "");
+
+            /* Adapt buffer sizes to frame header instructions */
+            {   size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
+                size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
+                        ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
+                        : 0;
+
+                ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
+
+                {   int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize);
+                    int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds);
+
+                    if (tooSmall || tooLarge) {
+                        size_t const bufferSize = neededInBuffSize + neededOutBuffSize;
+                        DEBUGLOG(4, "inBuff  : from %u to %u",
+                                    (U32)zds->inBuffSize, (U32)neededInBuffSize);
+                        DEBUGLOG(4, "outBuff : from %u to %u",
+                                    (U32)zds->outBuffSize, (U32)neededOutBuffSize);
+                        if (zds->staticSize) {  /* static DCtx */
+                            DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize);
+                            assert(zds->staticSize >= sizeof(ZSTD_DCtx));  /* controlled at init */
+                            RETURN_ERROR_IF(
+                                bufferSize > zds->staticSize - sizeof(ZSTD_DCtx),
+                                memory_allocation, "");
+                        } else {
+                            ZSTD_customFree(zds->inBuff, zds->customMem);
+                            zds->inBuffSize = 0;
+                            zds->outBuffSize = 0;
+                            zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem);
+                            RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, "");
+                        }
+                        zds->inBuffSize = neededInBuffSize;
+                        zds->outBuff = zds->inBuff + zds->inBuffSize;
+                        zds->outBuffSize = neededOutBuffSize;
+            }   }   }
+            zds->streamStage = zdss_read;
+            ZSTD_FALLTHROUGH;
+
+        case zdss_read:
+            DEBUGLOG(5, "stage zdss_read");
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip));
+                DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize);
+                if (neededInSize==0) {  /* end of frame */
+                    zds->streamStage = zdss_init;
+                    someMoreWork = 0;
+                    break;
+                }
+                if ((size_t)(iend-ip) >= neededInSize) {  /* decode directly from src */
+                    FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), "");
+                    ip += neededInSize;
+                    /* Function modifies the stage so we must break */
+                    break;
+            }   }
+            if (ip==iend) { someMoreWork = 0; break; }   /* no more input */
+            zds->streamStage = zdss_load;
+            ZSTD_FALLTHROUGH;
+
+        case zdss_load:
+            {   size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds);
+                size_t const toLoad = neededInSize - zds->inPos;
+                int const isSkipFrame = ZSTD_isSkipFrame(zds);
+                size_t loadedSize;
+                /* At this point we shouldn't be decompressing a block that we can stream. */
+                assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip));
+                if (isSkipFrame) {
+                    loadedSize = MIN(toLoad, (size_t)(iend-ip));
+                } else {
+                    RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos,
+                                    corruption_detected,
+                                    "should never happen");
+                    loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip));
+                }
+                ip += loadedSize;
+                zds->inPos += loadedSize;
+                if (loadedSize < toLoad) { someMoreWork = 0; break; }   /* not enough input, wait for more */
+
+                /* decode loaded input */
+                zds->inPos = 0;   /* input is consumed */
+                FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), "");
+                /* Function modifies the stage so we must break */
+                break;
+            }
+        case zdss_flush:
+            {   size_t const toFlushSize = zds->outEnd - zds->outStart;
+                size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize);
+                op += flushedSize;
+                zds->outStart += flushedSize;
+                if (flushedSize == toFlushSize) {  /* flush completed */
+                    zds->streamStage = zdss_read;
+                    if ( (zds->outBuffSize < zds->fParams.frameContentSize)
+                      && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) {
+                        DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)",
+                                (int)(zds->outBuffSize - zds->outStart),
+                                (U32)zds->fParams.blockSizeMax);
+                        zds->outStart = zds->outEnd = 0;
+                    }
+                    break;
+            }   }
+            /* cannot complete flush */
+            someMoreWork = 0;
+            break;
+
+        default:
+            assert(0);    /* impossible */
+            RETURN_ERROR(GENERIC, "impossible to reach");   /* some compiler require default to do something */
+    }   }
+
+    /* result */
+    input->pos = (size_t)(ip - (const char*)(input->src));
+    output->pos = (size_t)(op - (char*)(output->dst));
+
+    /* Update the expected output buffer for ZSTD_obm_stable. */
+    zds->expectedOutBuffer = *output;
+
+    if ((ip==istart) && (op==ostart)) {  /* no forward progress */
+        zds->noForwardProgress ++;
+        if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
+            RETURN_ERROR_IF(op==oend, dstSize_tooSmall, "");
+            RETURN_ERROR_IF(ip==iend, srcSize_wrong, "");
+            assert(0);
+        }
+    } else {
+        zds->noForwardProgress = 0;
+    }
+    {   size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
+        if (!nextSrcSizeHint) {   /* frame fully decoded */
+            if (zds->outEnd == zds->outStart) {  /* output fully flushed */
+                if (zds->hostageByte) {
+                    if (input->pos >= input->size) {
+                        /* can't release hostage (not present) */
+                        zds->streamStage = zdss_read;
+                        return 1;
+                    }
+                    input->pos++;  /* release hostage */
+                }   /* zds->hostageByte */
+                return 0;
+            }  /* zds->outEnd == zds->outStart */
+            if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */
+                input->pos--;   /* note : pos > 0, otherwise, impossible to finish reading last block */
+                zds->hostageByte=1;
+            }
+            return 1;
+        }  /* nextSrcSizeHint==0 */
+        nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block);   /* preload header of next block */
+        assert(zds->inPos <= nextSrcSizeHint);
+        nextSrcSizeHint -= zds->inPos;   /* part already loaded*/
+        return nextSrcSizeHint;
+    }
+}
+
+size_t ZSTD_decompressStream_simpleArgs (
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity, size_t* dstPos,
+                      const void* src, size_t srcSize, size_t* srcPos)
+{
+    ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
+    ZSTD_inBuffer  input  = { src, srcSize, *srcPos };
+    /* ZSTD_compress_generic() will check validity of dstPos and srcPos */
+    size_t const cErr = ZSTD_decompressStream(dctx, &output, &input);
+    *dstPos = output.pos;
+    *srcPos = input.pos;
+    return cErr;
+}
diff --git a/lib/zstd/decompress/zstd_decompress_block.c b/lib/zstd/decompress/zstd_decompress_block.c
new file mode 100644 (file)
index 0000000..2d101d9
--- /dev/null
@@ -0,0 +1,1540 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/* zstd_decompress_block :
+ * this module takes care of decompressing _compressed_ block */
+
+/*-*******************************************************
+*  Dependencies
+*********************************************************/
+#include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/compiler.h"    /* prefetch */
+#include "../common/cpu.h"         /* bmi2 */
+#include "../common/mem.h"         /* low level memory routines */
+#define FSE_STATIC_LINKING_ONLY
+#include "../common/fse.h"
+#define HUF_STATIC_LINKING_ONLY
+#include "../common/huf.h"
+#include "../common/zstd_internal.h"
+#include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
+#include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
+#include "zstd_decompress_block.h"
+
+/*_*******************************************************
+*  Macros
+**********************************************************/
+
+/* These two optional macros force the use one way or another of the two
+ * ZSTD_decompressSequences implementations. You can't force in both directions
+ * at the same time.
+ */
+#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
+#endif
+
+
+/*_*******************************************************
+*  Memory operations
+**********************************************************/
+static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
+
+
+/*-*************************************************************
+ *   Block decoding
+ ***************************************************************/
+
+/*! ZSTD_getcBlockSize() :
+ *  Provides the size of compressed block from block header `src` */
+size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
+                          blockProperties_t* bpPtr)
+{
+    RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
+
+    {   U32 const cBlockHeader = MEM_readLE24(src);
+        U32 const cSize = cBlockHeader >> 3;
+        bpPtr->lastBlock = cBlockHeader & 1;
+        bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
+        bpPtr->origSize = cSize;   /* only useful for RLE */
+        if (bpPtr->blockType == bt_rle) return 1;
+        RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
+        return cSize;
+    }
+}
+
+
+/* Hidden declaration for fullbench */
+size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize);
+/*! ZSTD_decodeLiteralsBlock() :
+ * @return : nb of bytes read from src (< srcSize )
+ *  note : symbol not declared but exposed for fullbench */
+size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+{
+    DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
+    RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
+
+    {   const BYTE* const istart = (const BYTE*) src;
+        symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
+
+        switch(litEncType)
+        {
+        case set_repeat:
+            DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
+            RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
+            ZSTD_FALLTHROUGH;
+
+        case set_compressed:
+            RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
+            {   size_t lhSize, litSize, litCSize;
+                U32 singleStream=0;
+                U32 const lhlCode = (istart[0] >> 2) & 3;
+                U32 const lhc = MEM_readLE32(istart);
+                size_t hufSuccess;
+                switch(lhlCode)
+                {
+                case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    /* 2 - 2 - 10 - 10 */
+                    singleStream = !lhlCode;
+                    lhSize = 3;
+                    litSize  = (lhc >> 4) & 0x3FF;
+                    litCSize = (lhc >> 14) & 0x3FF;
+                    break;
+                case 2:
+                    /* 2 - 2 - 14 - 14 */
+                    lhSize = 4;
+                    litSize  = (lhc >> 4) & 0x3FFF;
+                    litCSize = lhc >> 18;
+                    break;
+                case 3:
+                    /* 2 - 2 - 18 - 18 */
+                    lhSize = 5;
+                    litSize  = (lhc >> 4) & 0x3FFFF;
+                    litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
+                    break;
+                }
+                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+                RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
+
+                /* prefetch huffman table if cold */
+                if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
+                    PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
+                }
+
+                if (litEncType==set_repeat) {
+                    if (singleStream) {
+                        hufSuccess = HUF_decompress1X_usingDTable_bmi2(
+                            dctx->litBuffer, litSize, istart+lhSize, litCSize,
+                            dctx->HUFptr, dctx->bmi2);
+                    } else {
+                        hufSuccess = HUF_decompress4X_usingDTable_bmi2(
+                            dctx->litBuffer, litSize, istart+lhSize, litCSize,
+                            dctx->HUFptr, dctx->bmi2);
+                    }
+                } else {
+                    if (singleStream) {
+#if defined(HUF_FORCE_DECOMPRESS_X2)
+                        hufSuccess = HUF_decompress1X_DCtx_wksp(
+                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                            istart+lhSize, litCSize, dctx->workspace,
+                            sizeof(dctx->workspace));
+#else
+                        hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
+                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                            istart+lhSize, litCSize, dctx->workspace,
+                            sizeof(dctx->workspace), dctx->bmi2);
+#endif
+                    } else {
+                        hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
+                            dctx->entropy.hufTable, dctx->litBuffer, litSize,
+                            istart+lhSize, litCSize, dctx->workspace,
+                            sizeof(dctx->workspace), dctx->bmi2);
+                    }
+                }
+
+                RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
+
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                dctx->litEntropy = 1;
+                if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
+                ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+                return litCSize + lhSize;
+            }
+
+        case set_basic:
+            {   size_t litSize, lhSize;
+                U32 const lhlCode = ((istart[0]) >> 2) & 3;
+                switch(lhlCode)
+                {
+                case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    lhSize = 1;
+                    litSize = istart[0] >> 3;
+                    break;
+                case 1:
+                    lhSize = 2;
+                    litSize = MEM_readLE16(istart) >> 4;
+                    break;
+                case 3:
+                    lhSize = 3;
+                    litSize = MEM_readLE24(istart) >> 4;
+                    break;
+                }
+
+                if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
+                    RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
+                    ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize);
+                    dctx->litPtr = dctx->litBuffer;
+                    dctx->litSize = litSize;
+                    ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
+                    return lhSize+litSize;
+                }
+                /* direct reference into compressed stream */
+                dctx->litPtr = istart+lhSize;
+                dctx->litSize = litSize;
+                return lhSize+litSize;
+            }
+
+        case set_rle:
+            {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
+                size_t litSize, lhSize;
+                switch(lhlCode)
+                {
+                case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
+                    lhSize = 1;
+                    litSize = istart[0] >> 3;
+                    break;
+                case 1:
+                    lhSize = 2;
+                    litSize = MEM_readLE16(istart) >> 4;
+                    break;
+                case 3:
+                    lhSize = 3;
+                    litSize = MEM_readLE24(istart) >> 4;
+                    RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
+                    break;
+                }
+                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+                ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
+                dctx->litPtr = dctx->litBuffer;
+                dctx->litSize = litSize;
+                return lhSize+1;
+            }
+        default:
+            RETURN_ERROR(corruption_detected, "impossible");
+        }
+    }
+}
+
+/* Default FSE distribution tables.
+ * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
+ * They were generated programmatically with following method :
+ * - start from default distributions, present in /lib/common/zstd_internal.h
+ * - generate tables normally, using ZSTD_buildFSETable()
+ * - printout the content of tables
+ * - pretify output, report below, test with fuzzer to ensure it's correct */
+
+/* Default FSE distribution table for Literal Lengths */
+static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
+     {  1,  1,  1, LL_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
+     /* nextState, nbAddBits, nbBits, baseVal */
+     {  0,  0,  4,    0},  { 16,  0,  4,    0},
+     { 32,  0,  5,    1},  {  0,  0,  5,    3},
+     {  0,  0,  5,    4},  {  0,  0,  5,    6},
+     {  0,  0,  5,    7},  {  0,  0,  5,    9},
+     {  0,  0,  5,   10},  {  0,  0,  5,   12},
+     {  0,  0,  6,   14},  {  0,  1,  5,   16},
+     {  0,  1,  5,   20},  {  0,  1,  5,   22},
+     {  0,  2,  5,   28},  {  0,  3,  5,   32},
+     {  0,  4,  5,   48},  { 32,  6,  5,   64},
+     {  0,  7,  5,  128},  {  0,  8,  6,  256},
+     {  0, 10,  6, 1024},  {  0, 12,  6, 4096},
+     { 32,  0,  4,    0},  {  0,  0,  4,    1},
+     {  0,  0,  5,    2},  { 32,  0,  5,    4},
+     {  0,  0,  5,    5},  { 32,  0,  5,    7},
+     {  0,  0,  5,    8},  { 32,  0,  5,   10},
+     {  0,  0,  5,   11},  {  0,  0,  6,   13},
+     { 32,  1,  5,   16},  {  0,  1,  5,   18},
+     { 32,  1,  5,   22},  {  0,  2,  5,   24},
+     { 32,  3,  5,   32},  {  0,  3,  5,   40},
+     {  0,  6,  4,   64},  { 16,  6,  4,   64},
+     { 32,  7,  5,  128},  {  0,  9,  6,  512},
+     {  0, 11,  6, 2048},  { 48,  0,  4,    0},
+     { 16,  0,  4,    1},  { 32,  0,  5,    2},
+     { 32,  0,  5,    3},  { 32,  0,  5,    5},
+     { 32,  0,  5,    6},  { 32,  0,  5,    8},
+     { 32,  0,  5,    9},  { 32,  0,  5,   11},
+     { 32,  0,  5,   12},  {  0,  0,  6,   15},
+     { 32,  1,  5,   18},  { 32,  1,  5,   20},
+     { 32,  2,  5,   24},  { 32,  2,  5,   28},
+     { 32,  3,  5,   40},  { 32,  4,  5,   48},
+     {  0, 16,  6,65536},  {  0, 15,  6,32768},
+     {  0, 14,  6,16384},  {  0, 13,  6, 8192},
+};   /* LL_defaultDTable */
+
+/* Default FSE distribution table for Offset Codes */
+static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
+    {  1,  1,  1, OF_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
+    /* nextState, nbAddBits, nbBits, baseVal */
+    {  0,  0,  5,    0},     {  0,  6,  4,   61},
+    {  0,  9,  5,  509},     {  0, 15,  5,32765},
+    {  0, 21,  5,2097149},   {  0,  3,  5,    5},
+    {  0,  7,  4,  125},     {  0, 12,  5, 4093},
+    {  0, 18,  5,262141},    {  0, 23,  5,8388605},
+    {  0,  5,  5,   29},     {  0,  8,  4,  253},
+    {  0, 14,  5,16381},     {  0, 20,  5,1048573},
+    {  0,  2,  5,    1},     { 16,  7,  4,  125},
+    {  0, 11,  5, 2045},     {  0, 17,  5,131069},
+    {  0, 22,  5,4194301},   {  0,  4,  5,   13},
+    { 16,  8,  4,  253},     {  0, 13,  5, 8189},
+    {  0, 19,  5,524285},    {  0,  1,  5,    1},
+    { 16,  6,  4,   61},     {  0, 10,  5, 1021},
+    {  0, 16,  5,65533},     {  0, 28,  5,268435453},
+    {  0, 27,  5,134217725}, {  0, 26,  5,67108861},
+    {  0, 25,  5,33554429},  {  0, 24,  5,16777213},
+};   /* OF_defaultDTable */
+
+
+/* Default FSE distribution table for Match Lengths */
+static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
+    {  1,  1,  1, ML_DEFAULTNORMLOG},  /* header : fastMode, tableLog */
+    /* nextState, nbAddBits, nbBits, baseVal */
+    {  0,  0,  6,    3},  {  0,  0,  4,    4},
+    { 32,  0,  5,    5},  {  0,  0,  5,    6},
+    {  0,  0,  5,    8},  {  0,  0,  5,    9},
+    {  0,  0,  5,   11},  {  0,  0,  6,   13},
+    {  0,  0,  6,   16},  {  0,  0,  6,   19},
+    {  0,  0,  6,   22},  {  0,  0,  6,   25},
+    {  0,  0,  6,   28},  {  0,  0,  6,   31},
+    {  0,  0,  6,   34},  {  0,  1,  6,   37},
+    {  0,  1,  6,   41},  {  0,  2,  6,   47},
+    {  0,  3,  6,   59},  {  0,  4,  6,   83},
+    {  0,  7,  6,  131},  {  0,  9,  6,  515},
+    { 16,  0,  4,    4},  {  0,  0,  4,    5},
+    { 32,  0,  5,    6},  {  0,  0,  5,    7},
+    { 32,  0,  5,    9},  {  0,  0,  5,   10},
+    {  0,  0,  6,   12},  {  0,  0,  6,   15},
+    {  0,  0,  6,   18},  {  0,  0,  6,   21},
+    {  0,  0,  6,   24},  {  0,  0,  6,   27},
+    {  0,  0,  6,   30},  {  0,  0,  6,   33},
+    {  0,  1,  6,   35},  {  0,  1,  6,   39},
+    {  0,  2,  6,   43},  {  0,  3,  6,   51},
+    {  0,  4,  6,   67},  {  0,  5,  6,   99},
+    {  0,  8,  6,  259},  { 32,  0,  4,    4},
+    { 48,  0,  4,    4},  { 16,  0,  4,    5},
+    { 32,  0,  5,    7},  { 32,  0,  5,    8},
+    { 32,  0,  5,   10},  { 32,  0,  5,   11},
+    {  0,  0,  6,   14},  {  0,  0,  6,   17},
+    {  0,  0,  6,   20},  {  0,  0,  6,   23},
+    {  0,  0,  6,   26},  {  0,  0,  6,   29},
+    {  0,  0,  6,   32},  {  0, 16,  6,65539},
+    {  0, 15,  6,32771},  {  0, 14,  6,16387},
+    {  0, 13,  6, 8195},  {  0, 12,  6, 4099},
+    {  0, 11,  6, 2051},  {  0, 10,  6, 1027},
+};   /* ML_defaultDTable */
+
+
+static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
+{
+    void* ptr = dt;
+    ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
+    ZSTD_seqSymbol* const cell = dt + 1;
+
+    DTableH->tableLog = 0;
+    DTableH->fastMode = 0;
+
+    cell->nbBits = 0;
+    cell->nextState = 0;
+    assert(nbAddBits < 255);
+    cell->nbAdditionalBits = (BYTE)nbAddBits;
+    cell->baseValue = baseValue;
+}
+
+
+/* ZSTD_buildFSETable() :
+ * generate FSE decoding table for one symbol (ll, ml or off)
+ * cannot fail if input is valid =>
+ * all inputs are presumed validated at this stage */
+FORCE_INLINE_TEMPLATE
+void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize)
+{
+    ZSTD_seqSymbol* const tableDecode = dt+1;
+    U32 const maxSV1 = maxSymbolValue + 1;
+    U32 const tableSize = 1 << tableLog;
+
+    U16* symbolNext = (U16*)wksp;
+    BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
+    U32 highThreshold = tableSize - 1;
+
+
+    /* Sanity Checks */
+    assert(maxSymbolValue <= MaxSeq);
+    assert(tableLog <= MaxFSELog);
+    assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
+    (void)wkspSize;
+    /* Init, lay down lowprob symbols */
+    {   ZSTD_seqSymbol_header DTableH;
+        DTableH.tableLog = tableLog;
+        DTableH.fastMode = 1;
+        {   S16 const largeLimit= (S16)(1 << (tableLog-1));
+            U32 s;
+            for (s=0; s<maxSV1; s++) {
+                if (normalizedCounter[s]==-1) {
+                    tableDecode[highThreshold--].baseValue = s;
+                    symbolNext[s] = 1;
+                } else {
+                    if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
+                    assert(normalizedCounter[s]>=0);
+                    symbolNext[s] = (U16)normalizedCounter[s];
+        }   }   }
+        ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
+    }
+
+    /* Spread symbols */
+    assert(tableSize <= 512);
+    /* Specialized symbol spreading for the case when there are
+     * no low probability (-1 count) symbols. When compressing
+     * small blocks we avoid low probability symbols to hit this
+     * case, since header decoding speed matters more.
+     */
+    if (highThreshold == tableSize - 1) {
+        size_t const tableMask = tableSize-1;
+        size_t const step = FSE_TABLESTEP(tableSize);
+        /* First lay down the symbols in order.
+         * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
+         * misses since small blocks generally have small table logs, so nearly
+         * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
+         * our buffer to handle the over-write.
+         */
+        {
+            U64 const add = 0x0101010101010101ull;
+            size_t pos = 0;
+            U64 sv = 0;
+            U32 s;
+            for (s=0; s<maxSV1; ++s, sv += add) {
+                int i;
+                int const n = normalizedCounter[s];
+                MEM_write64(spread + pos, sv);
+                for (i = 8; i < n; i += 8) {
+                    MEM_write64(spread + pos + i, sv);
+                }
+                pos += n;
+            }
+        }
+        /* Now we spread those positions across the table.
+         * The benefit of doing it in two stages is that we avoid the the
+         * variable size inner loop, which caused lots of branch misses.
+         * Now we can run through all the positions without any branch misses.
+         * We unroll the loop twice, since that is what emperically worked best.
+         */
+        {
+            size_t position = 0;
+            size_t s;
+            size_t const unroll = 2;
+            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+            for (s = 0; s < (size_t)tableSize; s += unroll) {
+                size_t u;
+                for (u = 0; u < unroll; ++u) {
+                    size_t const uPosition = (position + (u * step)) & tableMask;
+                    tableDecode[uPosition].baseValue = spread[s + u];
+                }
+                position = (position + (unroll * step)) & tableMask;
+            }
+            assert(position == 0);
+        }
+    } else {
+        U32 const tableMask = tableSize-1;
+        U32 const step = FSE_TABLESTEP(tableSize);
+        U32 s, position = 0;
+        for (s=0; s<maxSV1; s++) {
+            int i;
+            int const n = normalizedCounter[s];
+            for (i=0; i<n; i++) {
+                tableDecode[position].baseValue = s;
+                position = (position + step) & tableMask;
+                while (position > highThreshold) position = (position + step) & tableMask;   /* lowprob area */
+        }   }
+        assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
+    }
+
+    /* Build Decoding table */
+    {
+        U32 u;
+        for (u=0; u<tableSize; u++) {
+            U32 const symbol = tableDecode[u].baseValue;
+            U32 const nextState = symbolNext[symbol]++;
+            tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
+            tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
+            assert(nbAdditionalBits[symbol] < 255);
+            tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
+            tableDecode[u].baseValue = baseValue[symbol];
+        }
+    }
+}
+
+/* Avoids the FORCE_INLINE of the _body() function. */
+static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize)
+{
+    ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+
+#if DYNAMIC_BMI2
+TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize)
+{
+    ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+#endif
+
+void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+            const short* normalizedCounter, unsigned maxSymbolValue,
+            const U32* baseValue, const U32* nbAdditionalBits,
+            unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+        ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
+                baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+        return;
+    }
+#endif
+    (void)bmi2;
+    ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
+            baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
+}
+
+
+/*! ZSTD_buildSeqTable() :
+ * @return : nb bytes read from src,
+ *           or an error code if it fails */
+static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
+                                 symbolEncodingType_e type, unsigned max, U32 maxLog,
+                                 const void* src, size_t srcSize,
+                                 const U32* baseValue, const U32* nbAdditionalBits,
+                                 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
+                                 int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
+                                 int bmi2)
+{
+    switch(type)
+    {
+    case set_rle :
+        RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
+        RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
+        {   U32 const symbol = *(const BYTE*)src;
+            U32 const baseline = baseValue[symbol];
+            U32 const nbBits = nbAdditionalBits[symbol];
+            ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
+        }
+        *DTablePtr = DTableSpace;
+        return 1;
+    case set_basic :
+        *DTablePtr = defaultTable;
+        return 0;
+    case set_repeat:
+        RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
+        /* prefetch FSE table if used */
+        if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
+            const void* const pStart = *DTablePtr;
+            size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
+            PREFETCH_AREA(pStart, pSize);
+        }
+        return 0;
+    case set_compressed :
+        {   unsigned tableLog;
+            S16 norm[MaxSeq+1];
+            size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
+            RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
+            RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
+            ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
+            *DTablePtr = DTableSpace;
+            return headerSize;
+        }
+    default :
+        assert(0);
+        RETURN_ERROR(GENERIC, "impossible");
+    }
+}
+
+size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
+                             const void* src, size_t srcSize)
+{
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* ip = istart;
+    int nbSeq;
+    DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
+
+    /* check */
+    RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
+
+    /* SeqHead */
+    nbSeq = *ip++;
+    if (!nbSeq) {
+        *nbSeqPtr=0;
+        RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
+        return 1;
+    }
+    if (nbSeq > 0x7F) {
+        if (nbSeq == 0xFF) {
+            RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
+            nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
+            ip+=2;
+        } else {
+            RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
+            nbSeq = ((nbSeq-0x80)<<8) + *ip++;
+        }
+    }
+    *nbSeqPtr = nbSeq;
+
+    /* FSE table descriptors */
+    RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
+    {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
+        symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
+        symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
+        ip++;
+
+        /* Build DTables */
+        {   size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
+                                                      LLtype, MaxLL, LLFSELog,
+                                                      ip, iend-ip,
+                                                      LL_base, LL_bits,
+                                                      LL_defaultDTable, dctx->fseEntropy,
+                                                      dctx->ddictIsCold, nbSeq,
+                                                      dctx->workspace, sizeof(dctx->workspace),
+                                                      dctx->bmi2);
+            RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+            ip += llhSize;
+        }
+
+        {   size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
+                                                      OFtype, MaxOff, OffFSELog,
+                                                      ip, iend-ip,
+                                                      OF_base, OF_bits,
+                                                      OF_defaultDTable, dctx->fseEntropy,
+                                                      dctx->ddictIsCold, nbSeq,
+                                                      dctx->workspace, sizeof(dctx->workspace),
+                                                      dctx->bmi2);
+            RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+            ip += ofhSize;
+        }
+
+        {   size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
+                                                      MLtype, MaxML, MLFSELog,
+                                                      ip, iend-ip,
+                                                      ML_base, ML_bits,
+                                                      ML_defaultDTable, dctx->fseEntropy,
+                                                      dctx->ddictIsCold, nbSeq,
+                                                      dctx->workspace, sizeof(dctx->workspace),
+                                                      dctx->bmi2);
+            RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
+            ip += mlhSize;
+        }
+    }
+
+    return ip-istart;
+}
+
+
+typedef struct {
+    size_t litLength;
+    size_t matchLength;
+    size_t offset;
+    const BYTE* match;
+} seq_t;
+
+typedef struct {
+    size_t state;
+    const ZSTD_seqSymbol* table;
+} ZSTD_fseState;
+
+typedef struct {
+    BIT_DStream_t DStream;
+    ZSTD_fseState stateLL;
+    ZSTD_fseState stateOffb;
+    ZSTD_fseState stateML;
+    size_t prevOffset[ZSTD_REP_NUM];
+    const BYTE* prefixStart;
+    const BYTE* dictEnd;
+    size_t pos;
+} seqState_t;
+
+/*! ZSTD_overlapCopy8() :
+ *  Copies 8 bytes from ip to op and updates op and ip where ip <= op.
+ *  If the offset is < 8 then the offset is spread to at least 8 bytes.
+ *
+ *  Precondition: *ip <= *op
+ *  Postcondition: *op - *op >= 8
+ */
+HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
+    assert(*ip <= *op);
+    if (offset < 8) {
+        /* close range match, overlap */
+        static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };   /* added */
+        static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 };   /* subtracted */
+        int const sub2 = dec64table[offset];
+        (*op)[0] = (*ip)[0];
+        (*op)[1] = (*ip)[1];
+        (*op)[2] = (*ip)[2];
+        (*op)[3] = (*ip)[3];
+        *ip += dec32table[offset];
+        ZSTD_copy4(*op+4, *ip);
+        *ip -= sub2;
+    } else {
+        ZSTD_copy8(*op, *ip);
+    }
+    *ip += 8;
+    *op += 8;
+    assert(*op - *ip >= 8);
+}
+
+/*! ZSTD_safecopy() :
+ *  Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
+ *  and write up to 16 bytes past oend_w (op >= oend_w is allowed).
+ *  This function is only called in the uncommon case where the sequence is near the end of the block. It
+ *  should be fast for a single long sequence, but can be slow for several short sequences.
+ *
+ *  @param ovtype controls the overlap detection
+ *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
+ *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
+ *           The src buffer must be before the dst buffer.
+ */
+static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
+    ptrdiff_t const diff = op - ip;
+    BYTE* const oend = op + length;
+
+    assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
+           (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
+
+    if (length < 8) {
+        /* Handle short lengths. */
+        while (op < oend) *op++ = *ip++;
+        return;
+    }
+    if (ovtype == ZSTD_overlap_src_before_dst) {
+        /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
+        assert(length >= 8);
+        ZSTD_overlapCopy8(&op, &ip, diff);
+        assert(op - ip >= 8);
+        assert(op <= oend);
+    }
+
+    if (oend <= oend_w) {
+        /* No risk of overwrite. */
+        ZSTD_wildcopy(op, ip, length, ovtype);
+        return;
+    }
+    if (op <= oend_w) {
+        /* Wildcopy until we get close to the end. */
+        assert(oend > oend_w);
+        ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
+        ip += oend_w - op;
+        op = oend_w;
+    }
+    /* Handle the leftovers. */
+    while (op < oend) *op++ = *ip++;
+}
+
+/* ZSTD_execSequenceEnd():
+ * This version handles cases that are near the end of the output buffer. It requires
+ * more careful checks to make sure there is no overflow. By separating out these hard
+ * and unlikely cases, we can speed up the common cases.
+ *
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
+ */
+FORCE_NOINLINE
+size_t ZSTD_execSequenceEnd(BYTE* op,
+                            BYTE* const oend, seq_t sequence,
+                            const BYTE** litPtr, const BYTE* const litLimit,
+                            const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
+
+    /* bounds checks : careful of address space overflow in 32-bit mode */
+    RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
+    RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
+    assert(op < op + sequenceLength);
+    assert(oLitEnd < op + sequenceLength);
+
+    /* copy literals */
+    ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
+    op = oLitEnd;
+    *litPtr = iLitEnd;
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+        /* offset beyond prefix */
+        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
+        match = dictEnd - (prefixStart-match);
+        if (match + sequence.matchLength <= dictEnd) {
+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            ZSTD_memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = prefixStart;
+    }   }
+    ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
+    return sequenceLength;
+}
+
+HINT_INLINE
+size_t ZSTD_execSequence(BYTE* op,
+                         BYTE* const oend, seq_t sequence,
+                         const BYTE** litPtr, const BYTE* const litLimit,
+                         const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;   /* risk : address space underflow on oend=NULL */
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+    assert(op != NULL /* Precondition */);
+    assert(oend_w < oend /* No underflow */);
+    /* Handle edge cases in a slow path:
+     *   - Read beyond end of literals
+     *   - Match end is within WILDCOPY_OVERLIMIT of oend
+     *   - 32-bit mode and the match length overflows
+     */
+    if (UNLIKELY(
+            iLitEnd > litLimit ||
+            oMatchEnd > oend_w ||
+            (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
+        return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
+
+    /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
+    assert(op <= oLitEnd /* No overflow */);
+    assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
+    assert(oMatchEnd <= oend /* No underflow */);
+    assert(iLitEnd <= litLimit /* Literal length is in bounds */);
+    assert(oLitEnd <= oend_w /* Can wildcopy literals */);
+    assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
+
+    /* Copy Literals:
+     * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
+     * We likely don't need the full 32-byte wildcopy.
+     */
+    assert(WILDCOPY_OVERLENGTH >= 16);
+    ZSTD_copy16(op, (*litPtr));
+    if (UNLIKELY(sequence.litLength > 16)) {
+        ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
+    }
+    op = oLitEnd;
+    *litPtr = iLitEnd;   /* update for next sequence */
+
+    /* Copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+        /* offset beyond prefix -> go into extDict */
+        RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
+        match = dictEnd + (match - prefixStart);
+        if (match + sequence.matchLength <= dictEnd) {
+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+            ZSTD_memmove(oLitEnd, match, length1);
+            op = oLitEnd + length1;
+            sequence.matchLength -= length1;
+            match = prefixStart;
+    }   }
+    /* Match within prefix of 1 or more bytes */
+    assert(op <= oMatchEnd);
+    assert(oMatchEnd <= oend_w);
+    assert(match >= prefixStart);
+    assert(sequence.matchLength >= 1);
+
+    /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
+     * without overlap checking.
+     */
+    if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
+        /* We bet on a full wildcopy for matches, since we expect matches to be
+         * longer than literals (in general). In silesia, ~10% of matches are longer
+         * than 16 bytes.
+         */
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
+        return sequenceLength;
+    }
+    assert(sequence.offset < WILDCOPY_VECLEN);
+
+    /* Copy 8 bytes and spread the offset to be >= 8. */
+    ZSTD_overlapCopy8(&op, &match, sequence.offset);
+
+    /* If the match length is > 8 bytes, then continue with the wildcopy. */
+    if (sequence.matchLength > 8) {
+        assert(op < oMatchEnd);
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
+    }
+    return sequenceLength;
+}
+
+static void
+ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
+{
+    const void* ptr = dt;
+    const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
+    DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
+    DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
+                (U32)DStatePtr->state, DTableH->tableLog);
+    BIT_reloadDStream(bitD);
+    DStatePtr->table = dt + 1;
+}
+
+FORCE_INLINE_TEMPLATE void
+ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
+{
+    ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.nextState + lowBits;
+}
+
+FORCE_INLINE_TEMPLATE void
+ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
+{
+    U32 const nbBits = DInfo.nbBits;
+    size_t const lowBits = BIT_readBits(bitD, nbBits);
+    DStatePtr->state = DInfo.nextState + lowBits;
+}
+
+/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
+ * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
+ * bits before reloading. This value is the maximum number of bytes we read
+ * after reloading when we are decoding long offsets.
+ */
+#define LONG_OFFSETS_MAX_EXTRA_BITS_32                       \
+    (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32       \
+        ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32  \
+        : 0)
+
+typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
+typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
+
+FORCE_INLINE_TEMPLATE seq_t
+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
+{
+    seq_t seq;
+    ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
+    ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
+    ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
+    U32 const llBase = llDInfo.baseValue;
+    U32 const mlBase = mlDInfo.baseValue;
+    U32 const ofBase = ofDInfo.baseValue;
+    BYTE const llBits = llDInfo.nbAdditionalBits;
+    BYTE const mlBits = mlDInfo.nbAdditionalBits;
+    BYTE const ofBits = ofDInfo.nbAdditionalBits;
+    BYTE const totalBits = llBits+mlBits+ofBits;
+
+    /* sequence */
+    {   size_t offset;
+        if (ofBits > 1) {
+            ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
+            ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
+            assert(ofBits <= MaxOff);
+            if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
+                U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
+                offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
+                BIT_reloadDStream(&seqState->DStream);
+                if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
+                assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */
+            } else {
+                offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
+                if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
+            }
+            seqState->prevOffset[2] = seqState->prevOffset[1];
+            seqState->prevOffset[1] = seqState->prevOffset[0];
+            seqState->prevOffset[0] = offset;
+        } else {
+            U32 const ll0 = (llBase == 0);
+            if (LIKELY((ofBits == 0))) {
+                if (LIKELY(!ll0))
+                    offset = seqState->prevOffset[0];
+                else {
+                    offset = seqState->prevOffset[1];
+                    seqState->prevOffset[1] = seqState->prevOffset[0];
+                    seqState->prevOffset[0] = offset;
+                }
+            } else {
+                offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
+                {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+                    temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
+                    if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+                    seqState->prevOffset[1] = seqState->prevOffset[0];
+                    seqState->prevOffset[0] = offset = temp;
+        }   }   }
+        seq.offset = offset;
+    }
+
+    seq.matchLength = mlBase;
+    if (mlBits > 0)
+        seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
+
+    if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
+        BIT_reloadDStream(&seqState->DStream);
+    if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
+        BIT_reloadDStream(&seqState->DStream);
+    /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
+    ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
+
+    seq.litLength = llBase;
+    if (llBits > 0)
+        seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
+
+    if (MEM_32bits())
+        BIT_reloadDStream(&seqState->DStream);
+
+    DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
+                (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+
+    if (prefetch == ZSTD_p_prefetch) {
+        size_t const pos = seqState->pos + seq.litLength;
+        const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
+        seq.match = matchBase + pos - seq.offset;  /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+                                                    * No consequence though : no memory access will occur, offset is only used for prefetching */
+        seqState->pos = pos + seq.matchLength;
+    }
+
+    /* ANS state update
+     * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
+     * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
+     * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
+     * better option, so it is the default for other compilers. But, if you
+     * measure that it is worse, please put up a pull request.
+     */
+    {
+#if !defined(__clang__)
+        const int kUseUpdateFseState = 1;
+#else
+        const int kUseUpdateFseState = 0;
+#endif
+        if (kUseUpdateFseState) {
+            ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
+            ZSTD_updateFseState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
+            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+            ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
+        } else {
+            ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo);    /* <=  9 bits */
+            ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo);    /* <=  9 bits */
+            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+            ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo);  /* <=  8 bits */
+        }
+    }
+
+    return seq;
+}
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
+{
+    size_t const windowSize = dctx->fParams.windowSize;
+    /* No dictionary used. */
+    if (dctx->dictContentEndForFuzzing == NULL) return 0;
+    /* Dictionary is our prefix. */
+    if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
+    /* Dictionary is not our ext-dict. */
+    if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
+    /* Dictionary is not within our window size. */
+    if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
+    /* Dictionary is active. */
+    return 1;
+}
+
+MEM_STATIC void ZSTD_assertValidSequence(
+        ZSTD_DCtx const* dctx,
+        BYTE const* op, BYTE const* oend,
+        seq_t const seq,
+        BYTE const* prefixStart, BYTE const* virtualStart)
+{
+#if DEBUGLEVEL >= 1
+    size_t const windowSize = dctx->fParams.windowSize;
+    size_t const sequenceSize = seq.litLength + seq.matchLength;
+    BYTE const* const oLitEnd = op + seq.litLength;
+    DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
+            (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+    assert(op <= oend);
+    assert((size_t)(oend - op) >= sequenceSize);
+    assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
+    if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
+        size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
+        /* Offset must be within the dictionary. */
+        assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
+        assert(seq.offset <= windowSize + dictSize);
+    } else {
+        /* Offset must be within our window. */
+        assert(seq.offset <= windowSize);
+    }
+#else
+    (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
+#endif
+}
+#endif
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+FORCE_INLINE_TEMPLATE size_t
+DONT_VECTORIZE
+ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize, int nbSeq,
+                         const ZSTD_longOffset_e isLongOffset,
+                         const int frame)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    BYTE* op = ostart;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
+    const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    DEBUGLOG(5, "ZSTD_decompressSequences_body");
+    (void)frame;
+
+    /* Regen sequences */
+    if (nbSeq) {
+        seqState_t seqState;
+        size_t error = 0;
+        dctx->fseEntropy = 1;
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+        RETURN_ERROR_IF(
+            ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
+            corruption_detected, "");
+        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+        assert(dst != NULL);
+
+        ZSTD_STATIC_ASSERT(
+                BIT_DStream_unfinished < BIT_DStream_completed &&
+                BIT_DStream_endOfBuffer < BIT_DStream_completed &&
+                BIT_DStream_completed < BIT_DStream_overflow);
+
+#if defined(__x86_64__)
+        /* Align the decompression loop to 32 + 16 bytes.
+         *
+         * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
+         * speed swings based on the alignment of the decompression loop. This
+         * performance swing is caused by parts of the decompression loop falling
+         * out of the DSB. The entire decompression loop should fit in the DSB,
+         * when it can't we get much worse performance. You can measure if you've
+         * hit the good case or the bad case with this perf command for some
+         * compressed file test.zst:
+         *
+         *   perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
+         *             -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
+         *
+         * If you see most cycles served out of the MITE you've hit the bad case.
+         * If you see most cycles served out of the DSB you've hit the good case.
+         * If it is pretty even then you may be in an okay case.
+         *
+         * I've been able to reproduce this issue on the following CPUs:
+         *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
+         *               Use Instruments->Counters to get DSB/MITE cycles.
+         *               I never got performance swings, but I was able to
+         *               go from the good case of mostly DSB to half of the
+         *               cycles served from MITE.
+         *   - Coffeelake: Intel i9-9900k
+         *
+         * I haven't been able to reproduce the instability or DSB misses on any
+         * of the following CPUS:
+         *   - Haswell
+         *   - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
+         *   - Skylake
+         *
+         * If you are seeing performance stability this script can help test.
+         * It tests on 4 commits in zstd where I saw performance change.
+         *
+         *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
+         */
+        __asm__(".p2align 5");
+        __asm__("nop");
+        __asm__(".p2align 4");
+#endif
+        for ( ; ; ) {
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+#endif
+            DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+            BIT_reloadDStream(&(seqState.DStream));
+            op += oneSeqSize;
+            /* gcc and clang both don't like early returns in this loop.
+             * Instead break and check for an error at the end of the loop.
+             */
+            if (UNLIKELY(ZSTD_isError(oneSeqSize))) {
+                error = oneSeqSize;
+                break;
+            }
+            if (UNLIKELY(!--nbSeq)) break;
+        }
+
+        /* check if reached exact end */
+        DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
+        if (ZSTD_isError(error)) return error;
+        RETURN_ERROR_IF(nbSeq, corruption_detected, "");
+        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
+        /* save reps for next block */
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+    }
+
+    /* last literal segment */
+    {   size_t const lastLLSize = litEnd - litPtr;
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+        if (op != NULL) {
+            ZSTD_memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+static size_t
+ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_decompressSequencesLong_body(
+                               ZSTD_DCtx* dctx,
+                               void* dst, size_t maxDstSize,
+                         const void* seqStart, size_t seqSize, int nbSeq,
+                         const ZSTD_longOffset_e isLongOffset,
+                         const int frame)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = ostart + maxDstSize;
+    BYTE* op = ostart;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
+    const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
+    const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
+    (void)frame;
+
+    /* Regen sequences */
+    if (nbSeq) {
+#define STORED_SEQS 4
+#define STORED_SEQS_MASK (STORED_SEQS-1)
+#define ADVANCED_SEQS 4
+        seq_t sequences[STORED_SEQS];
+        int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
+        seqState_t seqState;
+        int seqNb;
+        dctx->fseEntropy = 1;
+        { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+        seqState.prefixStart = prefixStart;
+        seqState.pos = (size_t)(op-prefixStart);
+        seqState.dictEnd = dictEnd;
+        assert(dst != NULL);
+        assert(iend >= ip);
+        RETURN_ERROR_IF(
+            ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
+            corruption_detected, "");
+        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+
+        /* prepare in advance */
+        for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
+            sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
+            PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+        }
+        RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
+
+        /* decode and decompress */
+        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+            sequences[seqNb & STORED_SEQS_MASK] = sequence;
+            op += oneSeqSize;
+        }
+        RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
+
+        /* finish queue */
+        seqNb -= seqAdvance;
+        for ( ; seqNb<nbSeq ; seqNb++) {
+            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+            assert(!ZSTD_isError(oneSeqSize));
+            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
+            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+            op += oneSeqSize;
+        }
+
+        /* save reps for next block */
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+    }
+
+    /* last literal segment */
+    {   size_t const lastLLSize = litEnd - litPtr;
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+        if (op != NULL) {
+            ZSTD_memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+static size_t
+ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+
+
+#if DYNAMIC_BMI2
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+static TARGET_ATTRIBUTE("bmi2") size_t
+DONT_VECTORIZE
+ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+static TARGET_ATTRIBUTE("bmi2") size_t
+ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+#endif /* DYNAMIC_BMI2 */
+
+typedef size_t (*ZSTD_decompressSequences_t)(
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t maxDstSize,
+                            const void* seqStart, size_t seqSize, int nbSeq,
+                            const ZSTD_longOffset_e isLongOffset,
+                            const int frame);
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+static size_t
+ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
+                   const void* seqStart, size_t seqSize, int nbSeq,
+                   const ZSTD_longOffset_e isLongOffset,
+                   const int frame)
+{
+    DEBUGLOG(5, "ZSTD_decompressSequences");
+#if DYNAMIC_BMI2
+    if (dctx->bmi2) {
+        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    }
+#endif
+  return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
+
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+/* ZSTD_decompressSequencesLong() :
+ * decompression function triggered when a minimum share of offsets is considered "long",
+ * aka out of cache.
+ * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
+ * This function will try to mitigate main memory latency through the use of prefetching */
+static size_t
+ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
+                             void* dst, size_t maxDstSize,
+                             const void* seqStart, size_t seqSize, int nbSeq,
+                             const ZSTD_longOffset_e isLongOffset,
+                             const int frame)
+{
+    DEBUGLOG(5, "ZSTD_decompressSequencesLong");
+#if DYNAMIC_BMI2
+    if (dctx->bmi2) {
+        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    }
+#endif
+  return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
+
+
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+/* ZSTD_getLongOffsetsShare() :
+ * condition : offTable must be valid
+ * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
+ *           compared to maximum possible of (1<<OffFSELog) */
+static unsigned
+ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
+{
+    const void* ptr = offTable;
+    U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
+    const ZSTD_seqSymbol* table = offTable + 1;
+    U32 const max = 1 << tableLog;
+    U32 u, total = 0;
+    DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
+
+    assert(max <= (1 << OffFSELog));  /* max not too large */
+    for (u=0; u<max; u++) {
+        if (table[u].nbAdditionalBits > 22) total += 1;
+    }
+
+    assert(tableLog <= OffFSELog);
+    total <<= (OffFSELog - tableLog);  /* scale to OffFSELog */
+
+    return total;
+}
+#endif
+
+size_t
+ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                              void* dst, size_t dstCapacity,
+                        const void* src, size_t srcSize, const int frame)
+{   /* blockType == blockCompressed */
+    const BYTE* ip = (const BYTE*)src;
+    /* isLongOffset must be true if there are long offsets.
+     * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
+     * We don't expect that to be the case in 64-bit mode.
+     * In block mode, window size is not known, so we have to be conservative.
+     * (note: but it could be evaluated from current-lowLimit)
+     */
+    ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
+    DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
+
+    RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
+
+    /* Decode literals section */
+    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+        DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
+        if (ZSTD_isError(litCSize)) return litCSize;
+        ip += litCSize;
+        srcSize -= litCSize;
+    }
+
+    /* Build Decoding Tables */
+    {
+        /* These macros control at build-time which decompressor implementation
+         * we use. If neither is defined, we do some inspection and dispatch at
+         * runtime.
+         */
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        int usePrefetchDecoder = dctx->ddictIsCold;
+#endif
+        int nbSeq;
+        size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
+        if (ZSTD_isError(seqHSize)) return seqHSize;
+        ip += seqHSize;
+        srcSize -= seqHSize;
+
+        RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        if ( !usePrefetchDecoder
+          && (!frame || (dctx->fParams.windowSize > (1<<24)))
+          && (nbSeq>ADVANCED_SEQS) ) {  /* could probably use a larger nbSeq limit */
+            U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
+            U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
+            usePrefetchDecoder = (shareLongOffsets >= minShare);
+        }
+#endif
+
+        dctx->ddictIsCold = 0;
+
+#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
+    !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
+        if (usePrefetchDecoder)
+#endif
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+#endif
+
+#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+        /* else */
+        return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+#endif
+    }
+}
+
+
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
+{
+    if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */
+        dctx->dictEnd = dctx->previousDstEnd;
+        dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
+        dctx->prefixStart = dst;
+        dctx->previousDstEnd = dst;
+    }
+}
+
+
+size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
+                            void* dst, size_t dstCapacity,
+                      const void* src, size_t srcSize)
+{
+    size_t dSize;
+    ZSTD_checkContinuity(dctx, dst, dstCapacity);
+    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
+    dctx->previousDstEnd = (char*)dst + dSize;
+    return dSize;
+}
diff --git a/lib/zstd/decompress/zstd_decompress_block.h b/lib/zstd/decompress/zstd_decompress_block.h
new file mode 100644 (file)
index 0000000..e7f5f66
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+#ifndef ZSTD_DEC_BLOCK_H
+#define ZSTD_DEC_BLOCK_H
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+#include "../common/zstd_deps.h"   /* size_t */
+#include <linux/zstd.h>    /* DCtx, and some public functions */
+#include "../common/zstd_internal.h"  /* blockProperties_t, and some public functions */
+#include "zstd_decompress_internal.h"  /* ZSTD_seqSymbol */
+
+
+/* ===   Prototypes   === */
+
+/* note: prototypes already published within `zstd.h` :
+ * ZSTD_decompressBlock()
+ */
+
+/* note: prototypes already published within `zstd_internal.h` :
+ * ZSTD_getcBlockSize()
+ * ZSTD_decodeSeqHeaders()
+ */
+
+
+/* ZSTD_decompressBlock_internal() :
+ * decompress block, starting at `src`,
+ * into destination buffer `dst`.
+ * @return : decompressed block size,
+ *           or an error code (which can be tested using ZSTD_isError())
+ */
+size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
+                               void* dst, size_t dstCapacity,
+                         const void* src, size_t srcSize, const int frame);
+
+/* ZSTD_buildFSETable() :
+ * generate FSE decoding table for one symbol (ll, ml or off)
+ * this function must be called with valid parameters only
+ * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.)
+ * in which case it cannot fail.
+ * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is
+ * defined in zstd_decompress_internal.h.
+ * Internal use only.
+ */
+void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
+             const short* normalizedCounter, unsigned maxSymbolValue,
+             const U32* baseValue, const U32* nbAdditionalBits,
+                   unsigned tableLog, void* wksp, size_t wkspSize,
+                   int bmi2);
+
+
+#endif /* ZSTD_DEC_BLOCK_H */
diff --git a/lib/zstd/decompress/zstd_decompress_internal.h b/lib/zstd/decompress/zstd_decompress_internal.h
new file mode 100644 (file)
index 0000000..4b9052f
--- /dev/null
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* zstd_decompress_internal:
+ * objects and definitions shared within lib/decompress modules */
+
+ #ifndef ZSTD_DECOMPRESS_INTERNAL_H
+ #define ZSTD_DECOMPRESS_INTERNAL_H
+
+
+/*-*******************************************************
+ *  Dependencies
+ *********************************************************/
+#include "../common/mem.h"             /* BYTE, U16, U32 */
+#include "../common/zstd_internal.h"   /* ZSTD_seqSymbol */
+
+
+
+/*-*******************************************************
+ *  Constants
+ *********************************************************/
+static UNUSED_ATTR const U32 LL_base[MaxLL+1] = {
+                 0,    1,    2,     3,     4,     5,     6,      7,
+                 8,    9,   10,    11,    12,    13,    14,     15,
+                16,   18,   20,    22,    24,    28,    32,     40,
+                48,   64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000,
+                0x2000, 0x4000, 0x8000, 0x10000 };
+
+static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
+                 0,        1,       1,       5,     0xD,     0x1D,     0x3D,     0x7D,
+                 0xFD,   0x1FD,   0x3FD,   0x7FD,   0xFFD,   0x1FFD,   0x3FFD,   0x7FFD,
+                 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
+                 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
+
+static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
+                     0,  1,  2,  3,  4,  5,  6,  7,
+                     8,  9, 10, 11, 12, 13, 14, 15,
+                    16, 17, 18, 19, 20, 21, 22, 23,
+                    24, 25, 26, 27, 28, 29, 30, 31 };
+
+static UNUSED_ATTR const U32 ML_base[MaxML+1] = {
+                     3,  4,  5,    6,     7,     8,     9,    10,
+                    11, 12, 13,   14,    15,    16,    17,    18,
+                    19, 20, 21,   22,    23,    24,    25,    26,
+                    27, 28, 29,   30,    31,    32,    33,    34,
+                    35, 37, 39,   41,    43,    47,    51,    59,
+                    67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
+                    0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
+
+
+/*-*******************************************************
+ *  Decompression types
+ *********************************************************/
+ typedef struct {
+     U32 fastMode;
+     U32 tableLog;
+ } ZSTD_seqSymbol_header;
+
+ typedef struct {
+     U16  nextState;
+     BYTE nbAdditionalBits;
+     BYTE nbBits;
+     U32  baseValue;
+ } ZSTD_seqSymbol;
+
+ #define SEQSYMBOL_TABLE_SIZE(log)   (1 + (1 << (log)))
+
+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64))
+#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32))
+
+typedef struct {
+    ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];    /* Note : Space reserved for FSE Tables */
+    ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];   /* is also used as temporary workspace while building hufTable during DDict creation */
+    ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];    /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
+    HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)];  /* can accommodate HUF_decompress4X */
+    U32 rep[ZSTD_REP_NUM];
+    U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32];
+} ZSTD_entropyDTables_t;
+
+typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
+               ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
+               ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
+               ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
+
+typedef enum { zdss_init=0, zdss_loadHeader,
+               zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage;
+
+typedef enum {
+    ZSTD_use_indefinitely = -1,  /* Use the dictionary indefinitely */
+    ZSTD_dont_use = 0,           /* Do not use the dictionary (if one exists free it) */
+    ZSTD_use_once = 1            /* Use the dictionary once and set to ZSTD_dont_use */
+} ZSTD_dictUses_e;
+
+/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */
+typedef struct {
+    const ZSTD_DDict** ddictPtrTable;
+    size_t ddictPtrTableSize;
+    size_t ddictPtrCount;
+} ZSTD_DDictHashSet;
+
+struct ZSTD_DCtx_s
+{
+    const ZSTD_seqSymbol* LLTptr;
+    const ZSTD_seqSymbol* MLTptr;
+    const ZSTD_seqSymbol* OFTptr;
+    const HUF_DTable* HUFptr;
+    ZSTD_entropyDTables_t entropy;
+    U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];   /* space needed when building huffman tables */
+    const void* previousDstEnd;   /* detect continuity */
+    const void* prefixStart;      /* start of current segment */
+    const void* virtualStart;     /* virtual start of previous segment if it was just before current one */
+    const void* dictEnd;          /* end of previous segment */
+    size_t expected;
+    ZSTD_frameHeader fParams;
+    U64 processedCSize;
+    U64 decodedSize;
+    blockType_e bType;            /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */
+    ZSTD_dStage stage;
+    U32 litEntropy;
+    U32 fseEntropy;
+    struct xxh64_state xxhState;
+    size_t headerSize;
+    ZSTD_format_e format;
+    ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum;   /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */
+    U32 validateChecksum;         /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */
+    const BYTE* litPtr;
+    ZSTD_customMem customMem;
+    size_t litSize;
+    size_t rleSize;
+    size_t staticSize;
+    int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
+
+    /* dictionary */
+    ZSTD_DDict* ddictLocal;
+    const ZSTD_DDict* ddict;     /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
+    U32 dictID;
+    int ddictIsCold;             /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
+    ZSTD_dictUses_e dictUses;
+    ZSTD_DDictHashSet* ddictSet;                    /* Hash set for multiple ddicts */
+    ZSTD_refMultipleDDicts_e refMultipleDDicts;     /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
+
+    /* streaming */
+    ZSTD_dStreamStage streamStage;
+    char*  inBuff;
+    size_t inBuffSize;
+    size_t inPos;
+    size_t maxWindowSize;
+    char*  outBuff;
+    size_t outBuffSize;
+    size_t outStart;
+    size_t outEnd;
+    size_t lhSize;
+    void* legacyContext;
+    U32 previousLegacyVersion;
+    U32 legacyVersion;
+    U32 hostageByte;
+    int noForwardProgress;
+    ZSTD_bufferMode_e outBufferMode;
+    ZSTD_outBuffer expectedOutBuffer;
+
+    /* workspace */
+    BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
+    BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
+
+    size_t oversizedDuration;
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+    void const* dictContentBeginForFuzzing;
+    void const* dictContentEndForFuzzing;
+#endif
+
+    /* Tracing */
+};  /* typedef'd to ZSTD_DCtx within "zstd.h" */
+
+
+/*-*******************************************************
+ *  Shared internal functions
+ *********************************************************/
+
+/*! ZSTD_loadDEntropy() :
+ *  dict : must point at beginning of a valid zstd dictionary.
+ * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */
+size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy,
+                   const void* const dict, size_t const dictSize);
+
+/*! ZSTD_checkContinuity() :
+ *  check if next `dst` follows previous position, where decompression ended.
+ *  If yes, do nothing (continue on current segment).
+ *  If not, classify previous segment as "external dictionary", and start a new segment.
+ *  This function cannot fail. */
+void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize);
+
+
+#endif /* ZSTD_DECOMPRESS_INTERNAL_H */
diff --git a/lib/zstd/decompress_sources.h b/lib/zstd/decompress_sources.h
new file mode 100644 (file)
index 0000000..0fbec50
--- /dev/null
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause */
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+/*
+ * This file includes every .c file needed for decompression.
+ * It is used by lib/decompress_unzstd.c to include the decompression
+ * source into the translation-unit, so it can be used for kernel
+ * decompression.
+ */
+
+#include "common/debug.c"
+#include "common/entropy_common.c"
+#include "common/error_private.c"
+#include "common/fse_decompress.c"
+#include "common/zstd_common.c"
+#include "decompress/huf_decompress.c"
+#include "decompress/zstd_ddict.c"
+#include "decompress/zstd_decompress.c"
+#include "decompress/zstd_decompress_block.c"
+#include "zstd_decompress_module.c"
diff --git a/lib/zstd/entropy_common.c b/lib/zstd/entropy_common.c
deleted file mode 100644 (file)
index 2b0a643..0000000
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Common functions of New Generation Entropy library
- * Copyright (C) 2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-
-/* *************************************
-*  Dependencies
-***************************************/
-#include "error_private.h" /* ERR_*, ERROR */
-#include "fse.h"
-#include "huf.h"
-#include "mem.h"
-
-/*===   Version   ===*/
-unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; }
-
-/*===   Error Management   ===*/
-unsigned FSE_isError(size_t code) { return ERR_isError(code); }
-
-unsigned HUF_isError(size_t code) { return ERR_isError(code); }
-
-/*-**************************************************************
-*  FSE NCount encoding-decoding
-****************************************************************/
-size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSVPtr, unsigned *tableLogPtr, const void *headerBuffer, size_t hbSize)
-{
-       const BYTE *const istart = (const BYTE *)headerBuffer;
-       const BYTE *const iend = istart + hbSize;
-       const BYTE *ip = istart;
-       int nbBits;
-       int remaining;
-       int threshold;
-       U32 bitStream;
-       int bitCount;
-       unsigned charnum = 0;
-       int previous0 = 0;
-
-       if (hbSize < 4)
-               return ERROR(srcSize_wrong);
-       bitStream = ZSTD_readLE32(ip);
-       nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
-       if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX)
-               return ERROR(tableLog_tooLarge);
-       bitStream >>= 4;
-       bitCount = 4;
-       *tableLogPtr = nbBits;
-       remaining = (1 << nbBits) + 1;
-       threshold = 1 << nbBits;
-       nbBits++;
-
-       while ((remaining > 1) & (charnum <= *maxSVPtr)) {
-               if (previous0) {
-                       unsigned n0 = charnum;
-                       while ((bitStream & 0xFFFF) == 0xFFFF) {
-                               n0 += 24;
-                               if (ip < iend - 5) {
-                                       ip += 2;
-                                       bitStream = ZSTD_readLE32(ip) >> bitCount;
-                               } else {
-                                       bitStream >>= 16;
-                                       bitCount += 16;
-                               }
-                       }
-                       while ((bitStream & 3) == 3) {
-                               n0 += 3;
-                               bitStream >>= 2;
-                               bitCount += 2;
-                       }
-                       n0 += bitStream & 3;
-                       bitCount += 2;
-                       if (n0 > *maxSVPtr)
-                               return ERROR(maxSymbolValue_tooSmall);
-                       while (charnum < n0)
-                               normalizedCounter[charnum++] = 0;
-                       if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) {
-                               ip += bitCount >> 3;
-                               bitCount &= 7;
-                               bitStream = ZSTD_readLE32(ip) >> bitCount;
-                       } else {
-                               bitStream >>= 2;
-                       }
-               }
-               {
-                       int const max = (2 * threshold - 1) - remaining;
-                       int count;
-
-                       if ((bitStream & (threshold - 1)) < (U32)max) {
-                               count = bitStream & (threshold - 1);
-                               bitCount += nbBits - 1;
-                       } else {
-                               count = bitStream & (2 * threshold - 1);
-                               if (count >= threshold)
-                                       count -= max;
-                               bitCount += nbBits;
-                       }
-
-                       count--;                                 /* extra accuracy */
-                       remaining -= count < 0 ? -count : count; /* -1 means +1 */
-                       normalizedCounter[charnum++] = (short)count;
-                       previous0 = !count;
-                       while (remaining < threshold) {
-                               nbBits--;
-                               threshold >>= 1;
-                       }
-
-                       if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) {
-                               ip += bitCount >> 3;
-                               bitCount &= 7;
-                       } else {
-                               bitCount -= (int)(8 * (iend - 4 - ip));
-                               ip = iend - 4;
-                       }
-                       bitStream = ZSTD_readLE32(ip) >> (bitCount & 31);
-               }
-       } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
-       if (remaining != 1)
-               return ERROR(corruption_detected);
-       if (bitCount > 32)
-               return ERROR(corruption_detected);
-       *maxSVPtr = charnum - 1;
-
-       ip += (bitCount + 7) >> 3;
-       return ip - istart;
-}
-
-/*! HUF_readStats() :
-       Read compact Huffman tree, saved by HUF_writeCTable().
-       `huffWeight` is destination buffer.
-       `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32.
-       @return : size read from `src` , or an error Code .
-       Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
-*/
-size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
-{
-       U32 weightTotal;
-       const BYTE *ip = (const BYTE *)src;
-       size_t iSize;
-       size_t oSize;
-
-       if (!srcSize)
-               return ERROR(srcSize_wrong);
-       iSize = ip[0];
-       /* memset(huffWeight, 0, hwSize);   */ /* is not necessary, even though some analyzer complain ... */
-
-       if (iSize >= 128) { /* special header */
-               oSize = iSize - 127;
-               iSize = ((oSize + 1) / 2);
-               if (iSize + 1 > srcSize)
-                       return ERROR(srcSize_wrong);
-               if (oSize >= hwSize)
-                       return ERROR(corruption_detected);
-               ip += 1;
-               {
-                       U32 n;
-                       for (n = 0; n < oSize; n += 2) {
-                               huffWeight[n] = ip[n / 2] >> 4;
-                               huffWeight[n + 1] = ip[n / 2] & 15;
-                       }
-               }
-       } else {                                                 /* header compressed with FSE (normal case) */
-               if (iSize + 1 > srcSize)
-                       return ERROR(srcSize_wrong);
-               oSize = FSE_decompress_wksp(huffWeight, hwSize - 1, ip + 1, iSize, 6, workspace, workspaceSize); /* max (hwSize-1) values decoded, as last one is implied */
-               if (FSE_isError(oSize))
-                       return oSize;
-       }
-
-       /* collect weight stats */
-       memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
-       weightTotal = 0;
-       {
-               U32 n;
-               for (n = 0; n < oSize; n++) {
-                       if (huffWeight[n] >= HUF_TABLELOG_MAX)
-                               return ERROR(corruption_detected);
-                       rankStats[huffWeight[n]]++;
-                       weightTotal += (1 << huffWeight[n]) >> 1;
-               }
-       }
-       if (weightTotal == 0)
-               return ERROR(corruption_detected);
-
-       /* get last non-null symbol weight (implied, total must be 2^n) */
-       {
-               U32 const tableLog = BIT_highbit32(weightTotal) + 1;
-               if (tableLog > HUF_TABLELOG_MAX)
-                       return ERROR(corruption_detected);
-               *tableLogPtr = tableLog;
-               /* determine last weight */
-               {
-                       U32 const total = 1 << tableLog;
-                       U32 const rest = total - weightTotal;
-                       U32 const verif = 1 << BIT_highbit32(rest);
-                       U32 const lastWeight = BIT_highbit32(rest) + 1;
-                       if (verif != rest)
-                               return ERROR(corruption_detected); /* last value must be a clean power of 2 */
-                       huffWeight[oSize] = (BYTE)lastWeight;
-                       rankStats[lastWeight]++;
-               }
-       }
-
-       /* check tree construction validity */
-       if ((rankStats[1] < 2) || (rankStats[1] & 1))
-               return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
-
-       /* results */
-       *nbSymbolsPtr = (U32)(oSize + 1);
-       return iSize + 1;
-}
diff --git a/lib/zstd/error_private.h b/lib/zstd/error_private.h
deleted file mode 100644 (file)
index 1a60b31..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-/* Note : this module is expected to remain private, do not expose it */
-
-#ifndef ERROR_H_MODULE
-#define ERROR_H_MODULE
-
-/* ****************************************
-*  Dependencies
-******************************************/
-#include <linux/types.h> /* size_t */
-#include <linux/zstd.h>  /* enum list */
-
-/* ****************************************
-*  Compiler-specific
-******************************************/
-#define ERR_STATIC static __attribute__((unused))
-
-/*-****************************************
-*  Customization (error_public.h)
-******************************************/
-typedef ZSTD_ErrorCode ERR_enum;
-#define PREFIX(name) ZSTD_error_##name
-
-/*-****************************************
-*  Error codes handling
-******************************************/
-#define ERROR(name) ((size_t)-PREFIX(name))
-
-ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
-
-ERR_STATIC ERR_enum ERR_getErrorCode(size_t code)
-{
-       if (!ERR_isError(code))
-               return (ERR_enum)0;
-       return (ERR_enum)(0 - code);
-}
-
-#endif /* ERROR_H_MODULE */
diff --git a/lib/zstd/fse.h b/lib/zstd/fse.h
deleted file mode 100644 (file)
index 7460ab0..0000000
+++ /dev/null
@@ -1,575 +0,0 @@
-/*
- * FSE : Finite State Entropy codec
- * Public Prototypes declaration
- * Copyright (C) 2013-2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-#ifndef FSE_H
-#define FSE_H
-
-/*-*****************************************
-*  Dependencies
-******************************************/
-#include <linux/types.h> /* size_t, ptrdiff_t */
-
-/*-*****************************************
-*  FSE_PUBLIC_API : control library symbols visibility
-******************************************/
-#define FSE_PUBLIC_API
-
-/*------   Version   ------*/
-#define FSE_VERSION_MAJOR 0
-#define FSE_VERSION_MINOR 9
-#define FSE_VERSION_RELEASE 0
-
-#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
-#define FSE_QUOTE(str) #str
-#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
-#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
-
-#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR * 100 * 100 + FSE_VERSION_MINOR * 100 + FSE_VERSION_RELEASE)
-FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
-
-/*-*****************************************
-*  Tool functions
-******************************************/
-FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */
-
-/* Error Management */
-FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */
-
-/*-*****************************************
-*  FSE detailed API
-******************************************/
-/*!
-FSE_compress() does the following:
-1. count symbol occurrence from source[] into table count[]
-2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
-3. save normalized counters to memory buffer using writeNCount()
-4. build encoding table 'CTable' from normalized counters
-5. encode the data stream using encoding table 'CTable'
-
-FSE_decompress() does the following:
-1. read normalized counters with readNCount()
-2. build decoding table 'DTable' from normalized counters
-3. decode the data stream using decoding table 'DTable'
-
-The following API allows targeting specific sub-functions for advanced tasks.
-For example, it's possible to compress several blocks using the same 'CTable',
-or to save and provide normalized distribution using external method.
-*/
-
-/* *** COMPRESSION *** */
-/*! FSE_optimalTableLog():
-       dynamically downsize 'tableLog' when conditions are met.
-       It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
-       @return : recommended tableLog (necessarily <= 'maxTableLog') */
-FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
-
-/*! FSE_normalizeCount():
-       normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
-       'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
-       @return : tableLog,
-                         or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t srcSize, unsigned maxSymbolValue);
-
-/*! FSE_NCountWriteBound():
-       Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
-       Typically useful for allocation purpose. */
-FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
-
-/*! FSE_writeNCount():
-       Compactly save 'normalizedCounter' into 'buffer'.
-       @return : size of the compressed table,
-                         or an errorCode, which can be tested using FSE_isError(). */
-FSE_PUBLIC_API size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
-
-/*! Constructor and Destructor of FSE_CTable.
-       Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
-typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
-
-/*! FSE_compress_usingCTable():
-       Compress `src` using `ct` into `dst` which must be already allocated.
-       @return : size of compressed data (<= `dstCapacity`),
-                         or 0 if compressed data could not fit into `dst`,
-                         or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_compress_usingCTable(void *dst, size_t dstCapacity, const void *src, size_t srcSize, const FSE_CTable *ct);
-
-/*!
-Tutorial :
-----------
-The first step is to count all symbols. FSE_count() does this job very fast.
-Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
-'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
-maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
-FSE_count() will return the number of occurrence of the most frequent symbol.
-This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
-If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
-
-The next step is to normalize the frequencies.
-FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
-It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
-You can use 'tableLog'==0 to mean "use default tableLog value".
-If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
-which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
-
-The result of FSE_normalizeCount() will be saved into a table,
-called 'normalizedCounter', which is a table of signed short.
-'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
-The return value is tableLog if everything proceeded as expected.
-It is 0 if there is a single symbol within distribution.
-If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
-
-'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
-'buffer' must be already allocated.
-For guaranteed success, buffer size must be at least FSE_headerBound().
-The result of the function is the number of bytes written into 'buffer'.
-If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
-
-'normalizedCounter' can then be used to create the compression table 'CTable'.
-The space required by 'CTable' must be already allocated, using FSE_createCTable().
-You can then use FSE_buildCTable() to fill 'CTable'.
-If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
-
-'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
-Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
-The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
-If it returns '0', compressed data could not fit into 'dst'.
-If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
-*/
-
-/* *** DECOMPRESSION *** */
-
-/*! FSE_readNCount():
-       Read compactly saved 'normalizedCounter' from 'rBuffer'.
-       @return : size read from 'rBuffer',
-                         or an errorCode, which can be tested using FSE_isError().
-                         maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
-FSE_PUBLIC_API size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSymbolValuePtr, unsigned *tableLogPtr, const void *rBuffer, size_t rBuffSize);
-
-/*! Constructor and Destructor of FSE_DTable.
-       Note that its size depends on 'tableLog' */
-typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
-
-/*! FSE_buildDTable():
-       Builds 'dt', which must be already allocated, using FSE_createDTable().
-       return : 0, or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable *dt, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize);
-
-/*! FSE_decompress_usingDTable():
-       Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
-       into `dst` which must be already allocated.
-       @return : size of regenerated data (necessarily <= `dstCapacity`),
-                         or an errorCode, which can be tested using FSE_isError() */
-FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt);
-
-/*!
-Tutorial :
-----------
-(Note : these functions only decompress FSE-compressed blocks.
- If block is uncompressed, use memcpy() instead
- If block is a single repeated byte, use memset() instead )
-
-The first step is to obtain the normalized frequencies of symbols.
-This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
-'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
-In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
-or size the table to handle worst case situations (typically 256).
-FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
-The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
-Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
-If there is an error, the function will return an error code, which can be tested using FSE_isError().
-
-The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
-This is performed by the function FSE_buildDTable().
-The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
-If there is an error, the function will return an error code, which can be tested using FSE_isError().
-
-`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
-`cSrcSize` must be strictly correct, otherwise decompression will fail.
-FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
-If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
-*/
-
-/* *** Dependency *** */
-#include "bitstream.h"
-
-/* *****************************************
-*  Static allocation
-*******************************************/
-/* FSE buffer bounds */
-#define FSE_NCOUNTBOUND 512
-#define FSE_BLOCKBOUND(size) (size + (size >> 7))
-#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
-
-/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
-#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1 << (maxTableLog - 1)) + ((maxSymbolValue + 1) * 2))
-#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1 << maxTableLog))
-
-/* *****************************************
-*  FSE advanced API
-*******************************************/
-/* FSE_count_wksp() :
- * Same as FSE_count(), but using an externally provided scratch buffer.
- * `workSpace` size must be table of >= `1024` unsigned
- */
-size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace);
-
-/* FSE_countFast_wksp() :
- * Same as FSE_countFast(), but using an externally provided scratch buffer.
- * `workSpace` must be a table of minimum `1024` unsigned
- */
-size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize, unsigned *workSpace);
-
-/*! FSE_count_simple
- * Same as FSE_countFast(), but does not use any additional memory (not even on stack).
- * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
-*/
-size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize);
-
-unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
-/**< same as FSE_optimalTableLog(), which used `minus==2` */
-
-size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits);
-/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
-
-size_t FSE_buildCTable_rle(FSE_CTable *ct, unsigned char symbolValue);
-/**< build a fake FSE_CTable, designed to compress always the same symbolValue */
-
-/* FSE_buildCTable_wksp() :
- * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
- * `wkspSize` must be >= `(1<<tableLog)`.
- */
-size_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, size_t wkspSize);
-
-size_t FSE_buildDTable_raw(FSE_DTable *dt, unsigned nbBits);
-/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
-
-size_t FSE_buildDTable_rle(FSE_DTable *dt, unsigned char symbolValue);
-/**< build a fake FSE_DTable, designed to always generate the same symbolValue */
-
-size_t FSE_decompress_wksp(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, unsigned maxLog, void *workspace, size_t workspaceSize);
-/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
-
-/* *****************************************
-*  FSE symbol compression API
-*******************************************/
-/*!
-   This API consists of small unitary functions, which highly benefit from being inlined.
-   Hence their body are included in next section.
-*/
-typedef struct {
-       ptrdiff_t value;
-       const void *stateTable;
-       const void *symbolTT;
-       unsigned stateLog;
-} FSE_CState_t;
-
-static void FSE_initCState(FSE_CState_t *CStatePtr, const FSE_CTable *ct);
-
-static void FSE_encodeSymbol(BIT_CStream_t *bitC, FSE_CState_t *CStatePtr, unsigned symbol);
-
-static void FSE_flushCState(BIT_CStream_t *bitC, const FSE_CState_t *CStatePtr);
-
-/**<
-These functions are inner components of FSE_compress_usingCTable().
-They allow the creation of custom streams, mixing multiple tables and bit sources.
-
-A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
-So the first symbol you will encode is the last you will decode, like a LIFO stack.
-
-You will need a few variables to track your CStream. They are :
-
-FSE_CTable    ct;         // Provided by FSE_buildCTable()
-BIT_CStream_t bitStream;  // bitStream tracking structure
-FSE_CState_t  state;      // State tracking structure (can have several)
-
-
-The first thing to do is to init bitStream and state.
-       size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
-       FSE_initCState(&state, ct);
-
-Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
-You can then encode your input data, byte after byte.
-FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
-Remember decoding will be done in reverse direction.
-       FSE_encodeByte(&bitStream, &state, symbol);
-
-At any time, you can also add any bit sequence.
-Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
-       BIT_addBits(&bitStream, bitField, nbBits);
-
-The above methods don't commit data to memory, they just store it into local register, for speed.
-Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
-Writing data to memory is a manual operation, performed by the flushBits function.
-       BIT_flushBits(&bitStream);
-
-Your last FSE encoding operation shall be to flush your last state value(s).
-       FSE_flushState(&bitStream, &state);
-
-Finally, you must close the bitStream.
-The function returns the size of CStream in bytes.
-If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
-If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
-       size_t size = BIT_closeCStream(&bitStream);
-*/
-
-/* *****************************************
-*  FSE symbol decompression API
-*******************************************/
-typedef struct {
-       size_t state;
-       const void *table; /* precise table may vary, depending on U16 */
-} FSE_DState_t;
-
-static void FSE_initDState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD, const FSE_DTable *dt);
-
-static unsigned char FSE_decodeSymbol(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD);
-
-static unsigned FSE_endOfDState(const FSE_DState_t *DStatePtr);
-
-/**<
-Let's now decompose FSE_decompress_usingDTable() into its unitary components.
-You will decode FSE-encoded symbols from the bitStream,
-and also any other bitFields you put in, **in reverse order**.
-
-You will need a few variables to track your bitStream. They are :
-
-BIT_DStream_t DStream;    // Stream context
-FSE_DState_t  DState;     // State context. Multiple ones are possible
-FSE_DTable*   DTablePtr;  // Decoding table, provided by FSE_buildDTable()
-
-The first thing to do is to init the bitStream.
-       errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
-
-You should then retrieve your initial state(s)
-(in reverse flushing order if you have several ones) :
-       errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
-
-You can then decode your data, symbol after symbol.
-For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
-Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
-       unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
-
-You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
-Note : maximum allowed nbBits is 25, for 32-bits compatibility
-       size_t bitField = BIT_readBits(&DStream, nbBits);
-
-All above operations only read from local register (which size depends on size_t).
-Refueling the register from memory is manually performed by the reload method.
-       endSignal = FSE_reloadDStream(&DStream);
-
-BIT_reloadDStream() result tells if there is still some more data to read from DStream.
-BIT_DStream_unfinished : there is still some data left into the DStream.
-BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
-BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
-BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
-
-When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
-to properly detect the exact end of stream.
-After each decoded symbol, check if DStream is fully consumed using this simple test :
-       BIT_reloadDStream(&DStream) >= BIT_DStream_completed
-
-When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
-Checking if DStream has reached its end is performed by :
-       BIT_endOfDStream(&DStream);
-Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
-       FSE_endOfDState(&DState);
-*/
-
-/* *****************************************
-*  FSE unsafe API
-*******************************************/
-static unsigned char FSE_decodeSymbolFast(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD);
-/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
-
-/* *****************************************
-*  Implementation of inlined functions
-*******************************************/
-typedef struct {
-       int deltaFindState;
-       U32 deltaNbBits;
-} FSE_symbolCompressionTransform; /* total 8 bytes */
-
-ZSTD_STATIC void FSE_initCState(FSE_CState_t *statePtr, const FSE_CTable *ct)
-{
-       const void *ptr = ct;
-       const U16 *u16ptr = (const U16 *)ptr;
-       const U32 tableLog = ZSTD_read16(ptr);
-       statePtr->value = (ptrdiff_t)1 << tableLog;
-       statePtr->stateTable = u16ptr + 2;
-       statePtr->symbolTT = ((const U32 *)ct + 1 + (tableLog ? (1 << (tableLog - 1)) : 1));
-       statePtr->stateLog = tableLog;
-}
-
-/*! FSE_initCState2() :
-*   Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
-*   uses the smallest state value possible, saving the cost of this symbol */
-ZSTD_STATIC void FSE_initCState2(FSE_CState_t *statePtr, const FSE_CTable *ct, U32 symbol)
-{
-       FSE_initCState(statePtr, ct);
-       {
-               const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform *)(statePtr->symbolTT))[symbol];
-               const U16 *stateTable = (const U16 *)(statePtr->stateTable);
-               U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1 << 15)) >> 16);
-               statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
-               statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
-       }
-}
-
-ZSTD_STATIC void FSE_encodeSymbol(BIT_CStream_t *bitC, FSE_CState_t *statePtr, U32 symbol)
-{
-       const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform *)(statePtr->symbolTT))[symbol];
-       const U16 *const stateTable = (const U16 *)(statePtr->stateTable);
-       U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
-       BIT_addBits(bitC, statePtr->value, nbBitsOut);
-       statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
-}
-
-ZSTD_STATIC void FSE_flushCState(BIT_CStream_t *bitC, const FSE_CState_t *statePtr)
-{
-       BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
-       BIT_flushBits(bitC);
-}
-
-/* ======    Decompression    ====== */
-
-typedef struct {
-       U16 tableLog;
-       U16 fastMode;
-} FSE_DTableHeader; /* sizeof U32 */
-
-typedef struct {
-       unsigned short newState;
-       unsigned char symbol;
-       unsigned char nbBits;
-} FSE_decode_t; /* size == U32 */
-
-ZSTD_STATIC void FSE_initDState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD, const FSE_DTable *dt)
-{
-       const void *ptr = dt;
-       const FSE_DTableHeader *const DTableH = (const FSE_DTableHeader *)ptr;
-       DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
-       BIT_reloadDStream(bitD);
-       DStatePtr->table = dt + 1;
-}
-
-ZSTD_STATIC BYTE FSE_peekSymbol(const FSE_DState_t *DStatePtr)
-{
-       FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state];
-       return DInfo.symbol;
-}
-
-ZSTD_STATIC void FSE_updateState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD)
-{
-       FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state];
-       U32 const nbBits = DInfo.nbBits;
-       size_t const lowBits = BIT_readBits(bitD, nbBits);
-       DStatePtr->state = DInfo.newState + lowBits;
-}
-
-ZSTD_STATIC BYTE FSE_decodeSymbol(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD)
-{
-       FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state];
-       U32 const nbBits = DInfo.nbBits;
-       BYTE const symbol = DInfo.symbol;
-       size_t const lowBits = BIT_readBits(bitD, nbBits);
-
-       DStatePtr->state = DInfo.newState + lowBits;
-       return symbol;
-}
-
-/*! FSE_decodeSymbolFast() :
-       unsafe, only works if no symbol has a probability > 50% */
-ZSTD_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD)
-{
-       FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state];
-       U32 const nbBits = DInfo.nbBits;
-       BYTE const symbol = DInfo.symbol;
-       size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
-
-       DStatePtr->state = DInfo.newState + lowBits;
-       return symbol;
-}
-
-ZSTD_STATIC unsigned FSE_endOfDState(const FSE_DState_t *DStatePtr) { return DStatePtr->state == 0; }
-
-/* **************************************************************
-*  Tuning parameters
-****************************************************************/
-/*!MEMORY_USAGE :
-*  Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
-*  Increasing memory usage improves compression ratio
-*  Reduced memory usage can improve speed, due to cache effect
-*  Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
-#ifndef FSE_MAX_MEMORY_USAGE
-#define FSE_MAX_MEMORY_USAGE 14
-#endif
-#ifndef FSE_DEFAULT_MEMORY_USAGE
-#define FSE_DEFAULT_MEMORY_USAGE 13
-#endif
-
-/*!FSE_MAX_SYMBOL_VALUE :
-*  Maximum symbol value authorized.
-*  Required for proper stack allocation */
-#ifndef FSE_MAX_SYMBOL_VALUE
-#define FSE_MAX_SYMBOL_VALUE 255
-#endif
-
-/* **************************************************************
-*  template functions type & suffix
-****************************************************************/
-#define FSE_FUNCTION_TYPE BYTE
-#define FSE_FUNCTION_EXTENSION
-#define FSE_DECODE_TYPE FSE_decode_t
-
-/* ***************************************************************
-*  Constants
-*****************************************************************/
-#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE - 2)
-#define FSE_MAX_TABLESIZE (1U << FSE_MAX_TABLELOG)
-#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE - 1)
-#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE - 2)
-#define FSE_MIN_TABLELOG 5
-
-#define FSE_TABLELOG_ABSOLUTE_MAX 15
-#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
-#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
-#endif
-
-#define FSE_TABLESTEP(tableSize) ((tableSize >> 1) + (tableSize >> 3) + 3)
-
-#endif /* FSE_H */
diff --git a/lib/zstd/fse_compress.c b/lib/zstd/fse_compress.c
deleted file mode 100644 (file)
index ef3d174..0000000
+++ /dev/null
@@ -1,795 +0,0 @@
-/*
- * FSE : Finite State Entropy encoder
- * Copyright (C) 2013-2015, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-
-/* **************************************************************
-*  Compiler specifics
-****************************************************************/
-#define FORCE_INLINE static __always_inline
-
-/* **************************************************************
-*  Includes
-****************************************************************/
-#include "bitstream.h"
-#include "fse.h"
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/math64.h>
-#include <linux/string.h> /* memcpy, memset */
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define FSE_STATIC_ASSERT(c)                                   \
-       {                                                      \
-               enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
-       } /* use only *after* variable declarations */
-
-/* **************************************************************
-*  Templates
-****************************************************************/
-/*
-  designed to be included
-  for type-specific functions (template emulation in C)
-  Objective is to write these functions only once, for improved maintenance
-*/
-
-/* safety checks */
-#ifndef FSE_FUNCTION_EXTENSION
-#error "FSE_FUNCTION_EXTENSION must be defined"
-#endif
-#ifndef FSE_FUNCTION_TYPE
-#error "FSE_FUNCTION_TYPE must be defined"
-#endif
-
-/* Function names */
-#define FSE_CAT(X, Y) X##Y
-#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y)
-#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y)
-
-/* Function templates */
-
-/* FSE_buildCTable_wksp() :
- * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
- * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
- * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
- */
-size_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize)
-{
-       U32 const tableSize = 1 << tableLog;
-       U32 const tableMask = tableSize - 1;
-       void *const ptr = ct;
-       U16 *const tableU16 = ((U16 *)ptr) + 2;
-       void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableLog ? tableSize >> 1 : 1);
-       FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
-       U32 const step = FSE_TABLESTEP(tableSize);
-       U32 highThreshold = tableSize - 1;
-
-       U32 *cumul;
-       FSE_FUNCTION_TYPE *tableSymbol;
-       size_t spaceUsed32 = 0;
-
-       cumul = (U32 *)workspace + spaceUsed32;
-       spaceUsed32 += FSE_MAX_SYMBOL_VALUE + 2;
-       tableSymbol = (FSE_FUNCTION_TYPE *)((U32 *)workspace + spaceUsed32);
-       spaceUsed32 += ALIGN(sizeof(FSE_FUNCTION_TYPE) * ((size_t)1 << tableLog), sizeof(U32)) >> 2;
-
-       if ((spaceUsed32 << 2) > workspaceSize)
-               return ERROR(tableLog_tooLarge);
-       workspace = (U32 *)workspace + spaceUsed32;
-       workspaceSize -= (spaceUsed32 << 2);
-
-       /* CTable header */
-       tableU16[-2] = (U16)tableLog;
-       tableU16[-1] = (U16)maxSymbolValue;
-
-       /* For explanations on how to distribute symbol values over the table :
-       *  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
-
-       /* symbol start positions */
-       {
-               U32 u;
-               cumul[0] = 0;
-               for (u = 1; u <= maxSymbolValue + 1; u++) {
-                       if (normalizedCounter[u - 1] == -1) { /* Low proba symbol */
-                               cumul[u] = cumul[u - 1] + 1;
-                               tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u - 1);
-                       } else {
-                               cumul[u] = cumul[u - 1] + normalizedCounter[u - 1];
-                       }
-               }
-               cumul[maxSymbolValue + 1] = tableSize + 1;
-       }
-
-       /* Spread symbols */
-       {
-               U32 position = 0;
-               U32 symbol;
-               for (symbol = 0; symbol <= maxSymbolValue; symbol++) {
-                       int nbOccurences;
-                       for (nbOccurences = 0; nbOccurences < normalizedCounter[symbol]; nbOccurences++) {
-                               tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
-                               position = (position + step) & tableMask;
-                               while (position > highThreshold)
-                                       position = (position + step) & tableMask; /* Low proba area */
-                       }
-               }
-
-               if (position != 0)
-                       return ERROR(GENERIC); /* Must have gone through all positions */
-       }
-
-       /* Build table */
-       {
-               U32 u;
-               for (u = 0; u < tableSize; u++) {
-                       FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
-                       tableU16[cumul[s]++] = (U16)(tableSize + u); /* TableU16 : sorted by symbol order; gives next state value */
-               }
-       }
-
-       /* Build Symbol Transformation Table */
-       {
-               unsigned total = 0;
-               unsigned s;
-               for (s = 0; s <= maxSymbolValue; s++) {
-                       switch (normalizedCounter[s]) {
-                       case 0: break;
-
-                       case -1:
-                       case 1:
-                               symbolTT[s].deltaNbBits = (tableLog << 16) - (1 << tableLog);
-                               symbolTT[s].deltaFindState = total - 1;
-                               total++;
-                               break;
-                       default: {
-                               U32 const maxBitsOut = tableLog - BIT_highbit32(normalizedCounter[s] - 1);
-                               U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
-                               symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
-                               symbolTT[s].deltaFindState = total - normalizedCounter[s];
-                               total += normalizedCounter[s];
-                       }
-                       }
-               }
-       }
-
-       return 0;
-}
-
-/*-**************************************************************
-*  FSE NCount encoding-decoding
-****************************************************************/
-size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
-{
-       size_t const maxHeaderSize = (((maxSymbolValue + 1) * tableLog) >> 3) + 3;
-       return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
-}
-
-static size_t FSE_writeNCount_generic(void *header, size_t headerBufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
-                                     unsigned writeIsSafe)
-{
-       BYTE *const ostart = (BYTE *)header;
-       BYTE *out = ostart;
-       BYTE *const oend = ostart + headerBufferSize;
-       int nbBits;
-       const int tableSize = 1 << tableLog;
-       int remaining;
-       int threshold;
-       U32 bitStream;
-       int bitCount;
-       unsigned charnum = 0;
-       int previous0 = 0;
-
-       bitStream = 0;
-       bitCount = 0;
-       /* Table Size */
-       bitStream += (tableLog - FSE_MIN_TABLELOG) << bitCount;
-       bitCount += 4;
-
-       /* Init */
-       remaining = tableSize + 1; /* +1 for extra accuracy */
-       threshold = tableSize;
-       nbBits = tableLog + 1;
-
-       while (remaining > 1) { /* stops at 1 */
-               if (previous0) {
-                       unsigned start = charnum;
-                       while (!normalizedCounter[charnum])
-                               charnum++;
-                       while (charnum >= start + 24) {
-                               start += 24;
-                               bitStream += 0xFFFFU << bitCount;
-                               if ((!writeIsSafe) && (out > oend - 2))
-                                       return ERROR(dstSize_tooSmall); /* Buffer overflow */
-                               out[0] = (BYTE)bitStream;
-                               out[1] = (BYTE)(bitStream >> 8);
-                               out += 2;
-                               bitStream >>= 16;
-                       }
-                       while (charnum >= start + 3) {
-                               start += 3;
-                               bitStream += 3 << bitCount;
-                               bitCount += 2;
-                       }
-                       bitStream += (charnum - start) << bitCount;
-                       bitCount += 2;
-                       if (bitCount > 16) {
-                               if ((!writeIsSafe) && (out > oend - 2))
-                                       return ERROR(dstSize_tooSmall); /* Buffer overflow */
-                               out[0] = (BYTE)bitStream;
-                               out[1] = (BYTE)(bitStream >> 8);
-                               out += 2;
-                               bitStream >>= 16;
-                               bitCount -= 16;
-                       }
-               }
-               {
-                       int count = normalizedCounter[charnum++];
-                       int const max = (2 * threshold - 1) - remaining;
-                       remaining -= count < 0 ? -count : count;
-                       count++; /* +1 for extra accuracy */
-                       if (count >= threshold)
-                               count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
-                       bitStream += count << bitCount;
-                       bitCount += nbBits;
-                       bitCount -= (count < max);
-                       previous0 = (count == 1);
-                       if (remaining < 1)
-                               return ERROR(GENERIC);
-                       while (remaining < threshold)
-                               nbBits--, threshold >>= 1;
-               }
-               if (bitCount > 16) {
-                       if ((!writeIsSafe) && (out > oend - 2))
-                               return ERROR(dstSize_tooSmall); /* Buffer overflow */
-                       out[0] = (BYTE)bitStream;
-                       out[1] = (BYTE)(bitStream >> 8);
-                       out += 2;
-                       bitStream >>= 16;
-                       bitCount -= 16;
-               }
-       }
-
-       /* flush remaining bitStream */
-       if ((!writeIsSafe) && (out > oend - 2))
-               return ERROR(dstSize_tooSmall); /* Buffer overflow */
-       out[0] = (BYTE)bitStream;
-       out[1] = (BYTE)(bitStream >> 8);
-       out += (bitCount + 7) / 8;
-
-       if (charnum > maxSymbolValue + 1)
-               return ERROR(GENERIC);
-
-       return (out - ostart);
-}
-
-size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
-{
-       if (tableLog > FSE_MAX_TABLELOG)
-               return ERROR(tableLog_tooLarge); /* Unsupported */
-       if (tableLog < FSE_MIN_TABLELOG)
-               return ERROR(GENERIC); /* Unsupported */
-
-       if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
-               return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
-
-       return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
-}
-
-/*-**************************************************************
-*  Counting histogram
-****************************************************************/
-/*! FSE_count_simple
-       This function counts byte values within `src`, and store the histogram into table `count`.
-       It doesn't use any additional memory.
-       But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
-       For this reason, prefer using a table `count` with 256 elements.
-       @return : count of most numerous element
-*/
-size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize)
-{
-       const BYTE *ip = (const BYTE *)src;
-       const BYTE *const end = ip + srcSize;
-       unsigned maxSymbolValue = *maxSymbolValuePtr;
-       unsigned max = 0;
-
-       memset(count, 0, (maxSymbolValue + 1) * sizeof(*count));
-       if (srcSize == 0) {
-               *maxSymbolValuePtr = 0;
-               return 0;
-       }
-
-       while (ip < end)
-               count[*ip++]++;
-
-       while (!count[maxSymbolValue])
-               maxSymbolValue--;
-       *maxSymbolValuePtr = maxSymbolValue;
-
-       {
-               U32 s;
-               for (s = 0; s <= maxSymbolValue; s++)
-                       if (count[s] > max)
-                               max = count[s];
-       }
-
-       return (size_t)max;
-}
-
-/* FSE_count_parallel_wksp() :
- * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
- * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
-static size_t FSE_count_parallel_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned checkMax,
-                                     unsigned *const workSpace)
-{
-       const BYTE *ip = (const BYTE *)source;
-       const BYTE *const iend = ip + sourceSize;
-       unsigned maxSymbolValue = *maxSymbolValuePtr;
-       unsigned max = 0;
-       U32 *const Counting1 = workSpace;
-       U32 *const Counting2 = Counting1 + 256;
-       U32 *const Counting3 = Counting2 + 256;
-       U32 *const Counting4 = Counting3 + 256;
-
-       memset(Counting1, 0, 4 * 256 * sizeof(unsigned));
-
-       /* safety checks */
-       if (!sourceSize) {
-               memset(count, 0, maxSymbolValue + 1);
-               *maxSymbolValuePtr = 0;
-               return 0;
-       }
-       if (!maxSymbolValue)
-               maxSymbolValue = 255; /* 0 == default */
-
-       /* by stripes of 16 bytes */
-       {
-               U32 cached = ZSTD_read32(ip);
-               ip += 4;
-               while (ip < iend - 15) {
-                       U32 c = cached;
-                       cached = ZSTD_read32(ip);
-                       ip += 4;
-                       Counting1[(BYTE)c]++;
-                       Counting2[(BYTE)(c >> 8)]++;
-                       Counting3[(BYTE)(c >> 16)]++;
-                       Counting4[c >> 24]++;
-                       c = cached;
-                       cached = ZSTD_read32(ip);
-                       ip += 4;
-                       Counting1[(BYTE)c]++;
-                       Counting2[(BYTE)(c >> 8)]++;
-                       Counting3[(BYTE)(c >> 16)]++;
-                       Counting4[c >> 24]++;
-                       c = cached;
-                       cached = ZSTD_read32(ip);
-                       ip += 4;
-                       Counting1[(BYTE)c]++;
-                       Counting2[(BYTE)(c >> 8)]++;
-                       Counting3[(BYTE)(c >> 16)]++;
-                       Counting4[c >> 24]++;
-                       c = cached;
-                       cached = ZSTD_read32(ip);
-                       ip += 4;
-                       Counting1[(BYTE)c]++;
-                       Counting2[(BYTE)(c >> 8)]++;
-                       Counting3[(BYTE)(c >> 16)]++;
-                       Counting4[c >> 24]++;
-               }
-               ip -= 4;
-       }
-
-       /* finish last symbols */
-       while (ip < iend)
-               Counting1[*ip++]++;
-
-       if (checkMax) { /* verify stats will fit into destination table */
-               U32 s;
-               for (s = 255; s > maxSymbolValue; s--) {
-                       Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
-                       if (Counting1[s])
-                               return ERROR(maxSymbolValue_tooSmall);
-               }
-       }
-
-       {
-               U32 s;
-               for (s = 0; s <= maxSymbolValue; s++) {
-                       count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
-                       if (count[s] > max)
-                               max = count[s];
-               }
-       }
-
-       while (!count[maxSymbolValue])
-               maxSymbolValue--;
-       *maxSymbolValuePtr = maxSymbolValue;
-       return (size_t)max;
-}
-
-/* FSE_countFast_wksp() :
- * Same as FSE_countFast(), but using an externally provided scratch buffer.
- * `workSpace` size must be table of >= `1024` unsigned */
-size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
-{
-       if (sourceSize < 1500)
-               return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
-       return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
-}
-
-/* FSE_count_wksp() :
- * Same as FSE_count(), but using an externally provided scratch buffer.
- * `workSpace` size must be table of >= `1024` unsigned */
-size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
-{
-       if (*maxSymbolValuePtr < 255)
-               return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
-       *maxSymbolValuePtr = 255;
-       return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
-}
-
-/*-**************************************************************
-*  FSE Compression Code
-****************************************************************/
-/*! FSE_sizeof_CTable() :
-       FSE_CTable is a variable size structure which contains :
-       `U16 tableLog;`
-       `U16 maxSymbolValue;`
-       `U16 nextStateNumber[1 << tableLog];`                         // This size is variable
-       `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];`  // This size is variable
-Allocation is manual (C standard does not support variable-size structures).
-*/
-size_t FSE_sizeof_CTable(unsigned maxSymbolValue, unsigned tableLog)
-{
-       if (tableLog > FSE_MAX_TABLELOG)
-               return ERROR(tableLog_tooLarge);
-       return FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue) * sizeof(U32);
-}
-
-/* provides the minimum logSize to safely represent a distribution */
-static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
-{
-       U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
-       U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
-       U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
-       return minBits;
-}
-
-unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
-{
-       U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
-       U32 tableLog = maxTableLog;
-       U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
-       if (tableLog == 0)
-               tableLog = FSE_DEFAULT_TABLELOG;
-       if (maxBitsSrc < tableLog)
-               tableLog = maxBitsSrc; /* Accuracy can be reduced */
-       if (minBits > tableLog)
-               tableLog = minBits; /* Need a minimum to safely represent all symbol values */
-       if (tableLog < FSE_MIN_TABLELOG)
-               tableLog = FSE_MIN_TABLELOG;
-       if (tableLog > FSE_MAX_TABLELOG)
-               tableLog = FSE_MAX_TABLELOG;
-       return tableLog;
-}
-
-unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
-{
-       return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
-}
-
-/* Secondary normalization method.
-   To be used when primary method fails. */
-
-static size_t FSE_normalizeM2(short *norm, U32 tableLog, const unsigned *count, size_t total, U32 maxSymbolValue)
-{
-       short const NOT_YET_ASSIGNED = -2;
-       U32 s;
-       U32 distributed = 0;
-       U32 ToDistribute;
-
-       /* Init */
-       U32 const lowThreshold = (U32)(total >> tableLog);
-       U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
-
-       for (s = 0; s <= maxSymbolValue; s++) {
-               if (count[s] == 0) {
-                       norm[s] = 0;
-                       continue;
-               }
-               if (count[s] <= lowThreshold) {
-                       norm[s] = -1;
-                       distributed++;
-                       total -= count[s];
-                       continue;
-               }
-               if (count[s] <= lowOne) {
-                       norm[s] = 1;
-                       distributed++;
-                       total -= count[s];
-                       continue;
-               }
-
-               norm[s] = NOT_YET_ASSIGNED;
-       }
-       ToDistribute = (1 << tableLog) - distributed;
-
-       if ((total / ToDistribute) > lowOne) {
-               /* risk of rounding to zero */
-               lowOne = (U32)((total * 3) / (ToDistribute * 2));
-               for (s = 0; s <= maxSymbolValue; s++) {
-                       if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
-                               norm[s] = 1;
-                               distributed++;
-                               total -= count[s];
-                               continue;
-                       }
-               }
-               ToDistribute = (1 << tableLog) - distributed;
-       }
-
-       if (distributed == maxSymbolValue + 1) {
-               /* all values are pretty poor;
-                  probably incompressible data (should have already been detected);
-                  find max, then give all remaining points to max */
-               U32 maxV = 0, maxC = 0;
-               for (s = 0; s <= maxSymbolValue; s++)
-                       if (count[s] > maxC)
-                               maxV = s, maxC = count[s];
-               norm[maxV] += (short)ToDistribute;
-               return 0;
-       }
-
-       if (total == 0) {
-               /* all of the symbols were low enough for the lowOne or lowThreshold */
-               for (s = 0; ToDistribute > 0; s = (s + 1) % (maxSymbolValue + 1))
-                       if (norm[s] > 0)
-                               ToDistribute--, norm[s]++;
-               return 0;
-       }
-
-       {
-               U64 const vStepLog = 62 - tableLog;
-               U64 const mid = (1ULL << (vStepLog - 1)) - 1;
-               U64 const rStep = div_u64((((U64)1 << vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */
-               U64 tmpTotal = mid;
-               for (s = 0; s <= maxSymbolValue; s++) {
-                       if (norm[s] == NOT_YET_ASSIGNED) {
-                               U64 const end = tmpTotal + (count[s] * rStep);
-                               U32 const sStart = (U32)(tmpTotal >> vStepLog);
-                               U32 const sEnd = (U32)(end >> vStepLog);
-                               U32 const weight = sEnd - sStart;
-                               if (weight < 1)
-                                       return ERROR(GENERIC);
-                               norm[s] = (short)weight;
-                               tmpTotal = end;
-                       }
-               }
-       }
-
-       return 0;
-}
-
-size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t total, unsigned maxSymbolValue)
-{
-       /* Sanity checks */
-       if (tableLog == 0)
-               tableLog = FSE_DEFAULT_TABLELOG;
-       if (tableLog < FSE_MIN_TABLELOG)
-               return ERROR(GENERIC); /* Unsupported size */
-       if (tableLog > FSE_MAX_TABLELOG)
-               return ERROR(tableLog_tooLarge); /* Unsupported size */
-       if (tableLog < FSE_minTableLog(total, maxSymbolValue))
-               return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
-
-       {
-               U32 const rtbTable[] = {0, 473195, 504333, 520860, 550000, 700000, 750000, 830000};
-               U64 const scale = 62 - tableLog;
-               U64 const step = div_u64((U64)1 << 62, (U32)total); /* <== here, one division ! */
-               U64 const vStep = 1ULL << (scale - 20);
-               int stillToDistribute = 1 << tableLog;
-               unsigned s;
-               unsigned largest = 0;
-               short largestP = 0;
-               U32 lowThreshold = (U32)(total >> tableLog);
-
-               for (s = 0; s <= maxSymbolValue; s++) {
-                       if (count[s] == total)
-                               return 0; /* rle special case */
-                       if (count[s] == 0) {
-                               normalizedCounter[s] = 0;
-                               continue;
-                       }
-                       if (count[s] <= lowThreshold) {
-                               normalizedCounter[s] = -1;
-                               stillToDistribute--;
-                       } else {
-                               short proba = (short)((count[s] * step) >> scale);
-                               if (proba < 8) {
-                                       U64 restToBeat = vStep * rtbTable[proba];
-                                       proba += (count[s] * step) - ((U64)proba << scale) > restToBeat;
-                               }
-                               if (proba > largestP)
-                                       largestP = proba, largest = s;
-                               normalizedCounter[s] = proba;
-                               stillToDistribute -= proba;
-                       }
-               }
-               if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
-                       /* corner case, need another normalization method */
-                       size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
-                       if (FSE_isError(errorCode))
-                               return errorCode;
-               } else
-                       normalizedCounter[largest] += (short)stillToDistribute;
-       }
-
-       return tableLog;
-}
-
-/* fake FSE_CTable, for raw (uncompressed) input */
-size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits)
-{
-       const unsigned tableSize = 1 << nbBits;
-       const unsigned tableMask = tableSize - 1;
-       const unsigned maxSymbolValue = tableMask;
-       void *const ptr = ct;
-       U16 *const tableU16 = ((U16 *)ptr) + 2;
-       void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableSize >> 1); /* assumption : tableLog >= 1 */
-       FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
-       unsigned s;
-
-       /* Sanity checks */
-       if (nbBits < 1)
-               return ERROR(GENERIC); /* min size */
-
-       /* header */
-       tableU16[-2] = (U16)nbBits;
-       tableU16[-1] = (U16)maxSymbolValue;
-
-       /* Build table */
-       for (s = 0; s < tableSize; s++)
-               tableU16[s] = (U16)(tableSize + s);
-
-       /* Build Symbol Transformation Table */
-       {
-               const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
-               for (s = 0; s <= maxSymbolValue; s++) {
-                       symbolTT[s].deltaNbBits = deltaNbBits;
-                       symbolTT[s].deltaFindState = s - 1;
-               }
-       }
-
-       return 0;
-}
-
-/* fake FSE_CTable, for rle input (always same symbol) */
-size_t FSE_buildCTable_rle(FSE_CTable *ct, BYTE symbolValue)
-{
-       void *ptr = ct;
-       U16 *tableU16 = ((U16 *)ptr) + 2;
-       void *FSCTptr = (U32 *)ptr + 2;
-       FSE_symbolCompressionTransform *symbolTT = (FSE_symbolCompressionTransform *)FSCTptr;
-
-       /* header */
-       tableU16[-2] = (U16)0;
-       tableU16[-1] = (U16)symbolValue;
-
-       /* Build table */
-       tableU16[0] = 0;
-       tableU16[1] = 0; /* just in case */
-
-       /* Build Symbol Transformation Table */
-       symbolTT[symbolValue].deltaNbBits = 0;
-       symbolTT[symbolValue].deltaFindState = 0;
-
-       return 0;
-}
-
-static size_t FSE_compress_usingCTable_generic(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct, const unsigned fast)
-{
-       const BYTE *const istart = (const BYTE *)src;
-       const BYTE *const iend = istart + srcSize;
-       const BYTE *ip = iend;
-
-       BIT_CStream_t bitC;
-       FSE_CState_t CState1, CState2;
-
-       /* init */
-       if (srcSize <= 2)
-               return 0;
-       {
-               size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
-               if (FSE_isError(initError))
-                       return 0; /* not enough space available to write a bitstream */
-       }
-
-#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
-
-       if (srcSize & 1) {
-               FSE_initCState2(&CState1, ct, *--ip);
-               FSE_initCState2(&CState2, ct, *--ip);
-               FSE_encodeSymbol(&bitC, &CState1, *--ip);
-               FSE_FLUSHBITS(&bitC);
-       } else {
-               FSE_initCState2(&CState2, ct, *--ip);
-               FSE_initCState2(&CState1, ct, *--ip);
-       }
-
-       /* join to mod 4 */
-       srcSize -= 2;
-       if ((sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) && (srcSize & 2)) { /* test bit 2 */
-               FSE_encodeSymbol(&bitC, &CState2, *--ip);
-               FSE_encodeSymbol(&bitC, &CState1, *--ip);
-               FSE_FLUSHBITS(&bitC);
-       }
-
-       /* 2 or 4 encoding per loop */
-       while (ip > istart) {
-
-               FSE_encodeSymbol(&bitC, &CState2, *--ip);
-
-               if (sizeof(bitC.bitContainer) * 8 < FSE_MAX_TABLELOG * 2 + 7) /* this test must be static */
-                       FSE_FLUSHBITS(&bitC);
-
-               FSE_encodeSymbol(&bitC, &CState1, *--ip);
-
-               if (sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) { /* this test must be static */
-                       FSE_encodeSymbol(&bitC, &CState2, *--ip);
-                       FSE_encodeSymbol(&bitC, &CState1, *--ip);
-               }
-
-               FSE_FLUSHBITS(&bitC);
-       }
-
-       FSE_flushCState(&bitC, &CState2);
-       FSE_flushCState(&bitC, &CState1);
-       return BIT_closeCStream(&bitC);
-}
-
-size_t FSE_compress_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct)
-{
-       unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
-
-       if (fast)
-               return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
-       else
-               return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
-}
-
-size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
diff --git a/lib/zstd/fse_decompress.c b/lib/zstd/fse_decompress.c
deleted file mode 100644 (file)
index 0b35353..0000000
+++ /dev/null
@@ -1,325 +0,0 @@
-/*
- * FSE : Finite State Entropy decoder
- * Copyright (C) 2013-2015, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-
-/* **************************************************************
-*  Compiler specifics
-****************************************************************/
-#define FORCE_INLINE static __always_inline
-
-/* **************************************************************
-*  Includes
-****************************************************************/
-#include "bitstream.h"
-#include "fse.h"
-#include "zstd_internal.h"
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/string.h> /* memcpy, memset */
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define FSE_isError ERR_isError
-#define FSE_STATIC_ASSERT(c)                                   \
-       {                                                      \
-               enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
-       } /* use only *after* variable declarations */
-
-/* **************************************************************
-*  Templates
-****************************************************************/
-/*
-  designed to be included
-  for type-specific functions (template emulation in C)
-  Objective is to write these functions only once, for improved maintenance
-*/
-
-/* safety checks */
-#ifndef FSE_FUNCTION_EXTENSION
-#error "FSE_FUNCTION_EXTENSION must be defined"
-#endif
-#ifndef FSE_FUNCTION_TYPE
-#error "FSE_FUNCTION_TYPE must be defined"
-#endif
-
-/* Function names */
-#define FSE_CAT(X, Y) X##Y
-#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y)
-#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y)
-
-/* Function templates */
-
-size_t FSE_buildDTable_wksp(FSE_DTable *dt, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize)
-{
-       void *const tdPtr = dt + 1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
-       FSE_DECODE_TYPE *const tableDecode = (FSE_DECODE_TYPE *)(tdPtr);
-       U16 *symbolNext = (U16 *)workspace;
-
-       U32 const maxSV1 = maxSymbolValue + 1;
-       U32 const tableSize = 1 << tableLog;
-       U32 highThreshold = tableSize - 1;
-
-       /* Sanity Checks */
-       if (workspaceSize < sizeof(U16) * (FSE_MAX_SYMBOL_VALUE + 1))
-               return ERROR(tableLog_tooLarge);
-       if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE)
-               return ERROR(maxSymbolValue_tooLarge);
-       if (tableLog > FSE_MAX_TABLELOG)
-               return ERROR(tableLog_tooLarge);
-
-       /* Init, lay down lowprob symbols */
-       {
-               FSE_DTableHeader DTableH;
-               DTableH.tableLog = (U16)tableLog;
-               DTableH.fastMode = 1;
-               {
-                       S16 const largeLimit = (S16)(1 << (tableLog - 1));
-                       U32 s;
-                       for (s = 0; s < maxSV1; s++) {
-                               if (normalizedCounter[s] == -1) {
-                                       tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
-                                       symbolNext[s] = 1;
-                               } else {
-                                       if (normalizedCounter[s] >= largeLimit)
-                                               DTableH.fastMode = 0;
-                                       symbolNext[s] = normalizedCounter[s];
-                               }
-                       }
-               }
-               memcpy(dt, &DTableH, sizeof(DTableH));
-       }
-
-       /* Spread symbols */
-       {
-               U32 const tableMask = tableSize - 1;
-               U32 const step = FSE_TABLESTEP(tableSize);
-               U32 s, position = 0;
-               for (s = 0; s < maxSV1; s++) {
-                       int i;
-                       for (i = 0; i < normalizedCounter[s]; i++) {
-                               tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
-                               position = (position + step) & tableMask;
-                               while (position > highThreshold)
-                                       position = (position + step) & tableMask; /* lowprob area */
-                       }
-               }
-               if (position != 0)
-                       return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
-       }
-
-       /* Build Decoding table */
-       {
-               U32 u;
-               for (u = 0; u < tableSize; u++) {
-                       FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
-                       U16 nextState = symbolNext[symbol]++;
-                       tableDecode[u].nbBits = (BYTE)(tableLog - BIT_highbit32((U32)nextState));
-                       tableDecode[u].newState = (U16)((nextState << tableDecode[u].nbBits) - tableSize);
-               }
-       }
-
-       return 0;
-}
-
-/*-*******************************************************
-*  Decompression (Byte symbols)
-*********************************************************/
-size_t FSE_buildDTable_rle(FSE_DTable *dt, BYTE symbolValue)
-{
-       void *ptr = dt;
-       FSE_DTableHeader *const DTableH = (FSE_DTableHeader *)ptr;
-       void *dPtr = dt + 1;
-       FSE_decode_t *const cell = (FSE_decode_t *)dPtr;
-
-       DTableH->tableLog = 0;
-       DTableH->fastMode = 0;
-
-       cell->newState = 0;
-       cell->symbol = symbolValue;
-       cell->nbBits = 0;
-
-       return 0;
-}
-
-size_t FSE_buildDTable_raw(FSE_DTable *dt, unsigned nbBits)
-{
-       void *ptr = dt;
-       FSE_DTableHeader *const DTableH = (FSE_DTableHeader *)ptr;
-       void *dPtr = dt + 1;
-       FSE_decode_t *const dinfo = (FSE_decode_t *)dPtr;
-       const unsigned tableSize = 1 << nbBits;
-       const unsigned tableMask = tableSize - 1;
-       const unsigned maxSV1 = tableMask + 1;
-       unsigned s;
-
-       /* Sanity checks */
-       if (nbBits < 1)
-               return ERROR(GENERIC); /* min size */
-
-       /* Build Decoding Table */
-       DTableH->tableLog = (U16)nbBits;
-       DTableH->fastMode = 1;
-       for (s = 0; s < maxSV1; s++) {
-               dinfo[s].newState = 0;
-               dinfo[s].symbol = (BYTE)s;
-               dinfo[s].nbBits = (BYTE)nbBits;
-       }
-
-       return 0;
-}
-
-FORCE_INLINE size_t FSE_decompress_usingDTable_generic(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt,
-                                                      const unsigned fast)
-{
-       BYTE *const ostart = (BYTE *)dst;
-       BYTE *op = ostart;
-       BYTE *const omax = op + maxDstSize;
-       BYTE *const olimit = omax - 3;
-
-       BIT_DStream_t bitD;
-       FSE_DState_t state1;
-       FSE_DState_t state2;
-
-       /* Init */
-       CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
-
-       FSE_initDState(&state1, &bitD, dt);
-       FSE_initDState(&state2, &bitD, dt);
-
-#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
-
-       /* 4 symbols per loop */
-       for (; (BIT_reloadDStream(&bitD) == BIT_DStream_unfinished) & (op < olimit); op += 4) {
-               op[0] = FSE_GETSYMBOL(&state1);
-
-               if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */
-                       BIT_reloadDStream(&bitD);
-
-               op[1] = FSE_GETSYMBOL(&state2);
-
-               if (FSE_MAX_TABLELOG * 4 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */
-               {
-                       if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) {
-                               op += 2;
-                               break;
-                       }
-               }
-
-               op[2] = FSE_GETSYMBOL(&state1);
-
-               if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */
-                       BIT_reloadDStream(&bitD);
-
-               op[3] = FSE_GETSYMBOL(&state2);
-       }
-
-       /* tail */
-       /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
-       while (1) {
-               if (op > (omax - 2))
-                       return ERROR(dstSize_tooSmall);
-               *op++ = FSE_GETSYMBOL(&state1);
-               if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow) {
-                       *op++ = FSE_GETSYMBOL(&state2);
-                       break;
-               }
-
-               if (op > (omax - 2))
-                       return ERROR(dstSize_tooSmall);
-               *op++ = FSE_GETSYMBOL(&state2);
-               if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow) {
-                       *op++ = FSE_GETSYMBOL(&state1);
-                       break;
-               }
-       }
-
-       return op - ostart;
-}
-
-size_t FSE_decompress_usingDTable(void *dst, size_t originalSize, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt)
-{
-       const void *ptr = dt;
-       const FSE_DTableHeader *DTableH = (const FSE_DTableHeader *)ptr;
-       const U32 fastMode = DTableH->fastMode;
-
-       /* select fast mode (static) */
-       if (fastMode)
-               return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
-       return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
-}
-
-size_t FSE_decompress_wksp(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, unsigned maxLog, void *workspace, size_t workspaceSize)
-{
-       const BYTE *const istart = (const BYTE *)cSrc;
-       const BYTE *ip = istart;
-       unsigned tableLog;
-       unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
-       size_t NCountLength;
-
-       FSE_DTable *dt;
-       short *counting;
-       size_t spaceUsed32 = 0;
-
-       FSE_STATIC_ASSERT(sizeof(FSE_DTable) == sizeof(U32));
-
-       dt = (FSE_DTable *)((U32 *)workspace + spaceUsed32);
-       spaceUsed32 += FSE_DTABLE_SIZE_U32(maxLog);
-       counting = (short *)((U32 *)workspace + spaceUsed32);
-       spaceUsed32 += ALIGN(sizeof(short) * (FSE_MAX_SYMBOL_VALUE + 1), sizeof(U32)) >> 2;
-
-       if ((spaceUsed32 << 2) > workspaceSize)
-               return ERROR(tableLog_tooLarge);
-       workspace = (U32 *)workspace + spaceUsed32;
-       workspaceSize -= (spaceUsed32 << 2);
-
-       /* normal FSE decoding mode */
-       NCountLength = FSE_readNCount(counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
-       if (FSE_isError(NCountLength))
-               return NCountLength;
-       // if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong);   /* too small input size; supposed to be already checked in NCountLength, only remaining
-       // case : NCountLength==cSrcSize */
-       if (tableLog > maxLog)
-               return ERROR(tableLog_tooLarge);
-       ip += NCountLength;
-       cSrcSize -= NCountLength;
-
-       CHECK_F(FSE_buildDTable_wksp(dt, counting, maxSymbolValue, tableLog, workspace, workspaceSize));
-
-       return FSE_decompress_usingDTable(dst, dstCapacity, ip, cSrcSize, dt); /* always return, even if it is an error code */
-}
diff --git a/lib/zstd/huf.h b/lib/zstd/huf.h
deleted file mode 100644 (file)
index 923218d..0000000
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Huffman coder, part of New Generation Entropy library
- * header file
- * Copyright (C) 2013-2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-#ifndef HUF_H_298734234
-#define HUF_H_298734234
-
-/* *** Dependencies *** */
-#include <linux/types.h> /* size_t */
-
-/* ***   Tool functions *** */
-#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */
-size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
-
-/* Error Management */
-unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
-
-/* ***   Advanced function   *** */
-
-/** HUF_compress4X_wksp() :
-*   Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of >= 1024 unsigned */
-size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace,
-                          size_t wkspSize); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */
-
-/* *** Dependencies *** */
-#include "mem.h" /* U32 */
-
-/* *** Constants *** */
-#define HUF_TABLELOG_MAX 12     /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
-#define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */
-#define HUF_SYMBOLVALUE_MAX 255
-
-#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
-#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
-#error "HUF_TABLELOG_MAX is too large !"
-#endif
-
-/* ****************************************
-*  Static allocation
-******************************************/
-/* HUF buffer bounds */
-#define HUF_CTABLEBOUND 129
-#define HUF_BLOCKBOUND(size) (size + (size >> 8) + 8)                   /* only true if incompressible pre-filtered with fast heuristic */
-#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
-
-/* static allocation of HUF's Compression Table */
-#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
-       U32 name##hb[maxSymbolValue + 1];              \
-       void *name##hv = &(name##hb);                  \
-       HUF_CElt *name = (HUF_CElt *)(name##hv) /* no final ; */
-
-/* static allocation of HUF's DTable */
-typedef U32 HUF_DTable;
-#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1 << (maxTableLog)))
-#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = {((U32)((maxTableLog)-1) * 0x01000001)}
-#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = {((U32)(maxTableLog)*0x01000001)}
-
-/* The workspace must have alignment at least 4 and be at least this large */
-#define HUF_COMPRESS_WORKSPACE_SIZE (6 << 10)
-#define HUF_COMPRESS_WORKSPACE_SIZE_U32 (HUF_COMPRESS_WORKSPACE_SIZE / sizeof(U32))
-
-/* The workspace must have alignment at least 4 and be at least this large */
-#define HUF_DECOMPRESS_WORKSPACE_SIZE (3 << 10)
-#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
-
-/* ****************************************
-*  Advanced decompression functions
-******************************************/
-size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize); /**< decodes RLE and uncompressed */
-size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
-                               size_t workspaceSize);                                                         /**< considers RLE and uncompressed as errors */
-size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
-                                  size_t workspaceSize); /**< single-symbol decoder */
-size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
-                                  size_t workspaceSize); /**< double-symbols decoder */
-
-/* ****************************************
-*  HUF detailed API
-******************************************/
-/*!
-HUF_compress() does the following:
-1. count symbol occurrence from source[] into table count[] using FSE_count()
-2. (optional) refine tableLog using HUF_optimalTableLog()
-3. build Huffman table from count using HUF_buildCTable()
-4. save Huffman table to memory buffer using HUF_writeCTable_wksp()
-5. encode the data stream using HUF_compress4X_usingCTable()
-
-The following API allows targeting specific sub-functions for advanced tasks.
-For example, it's possible to compress several blocks using the same 'CTable',
-or to save and regenerate 'CTable' using external methods.
-*/
-/* FSE_count() : find it within "fse.h" */
-unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
-typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
-size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, unsigned maxSymbolValue, unsigned huffLog, void *workspace, size_t workspaceSize);
-size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable);
-
-typedef enum {
-       HUF_repeat_none,  /**< Cannot use the previous table */
-       HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1,
-                            4}X_repeat */
-       HUF_repeat_valid  /**< Can use the previous table and it is assumed to be valid */
-} HUF_repeat;
-/** HUF_compress4X_repeat() :
-*   Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
-*   If it uses hufTable it does not modify hufTable or repeat.
-*   If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
-*   If preferRepeat then the old table will always be used if valid. */
-size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace,
-                            size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat,
-                            int preferRepeat); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */
-
-/** HUF_buildCTable_wksp() :
- *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
- *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
- */
-size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize);
-
-/*! HUF_readStats() :
-       Read compact Huffman tree, saved by HUF_writeCTable().
-       `huffWeight` is destination buffer.
-       @return : size read from `src` , or an error Code .
-       Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
-size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize,
-                         void *workspace, size_t workspaceSize);
-
-/** HUF_readCTable() :
-*   Loading a CTable saved with HUF_writeCTable() */
-size_t HUF_readCTable_wksp(HUF_CElt *CTable, unsigned maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize);
-
-/*
-HUF_decompress() does the following:
-1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
-2. build Huffman table from save, using HUF_readDTableXn()
-3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable
-*/
-
-/** HUF_selectDecoder() :
-*   Tells which decoder is likely to decode faster,
-*   based on a set of pre-determined metrics.
-*   @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
-*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
-U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize);
-
-size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize);
-size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize);
-
-size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
-size_t HUF_decompress4X2_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
-size_t HUF_decompress4X4_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
-
-/* single stream variants */
-
-size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace,
-                          size_t wkspSize); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */
-size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable);
-/** HUF_compress1X_repeat() :
-*   Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
-*   If it uses hufTable it does not modify hufTable or repeat.
-*   If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
-*   If preferRepeat then the old table will always be used if valid. */
-size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace,
-                            size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat,
-                            int preferRepeat); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */
-
-size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize);
-size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
-                                  size_t workspaceSize); /**< single-symbol decoder */
-size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace,
-                                  size_t workspaceSize); /**< double-symbols decoder */
-
-size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize,
-                                   const HUF_DTable *DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
-size_t HUF_decompress1X2_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
-size_t HUF_decompress1X4_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable);
-
-#endif /* HUF_H_298734234 */
diff --git a/lib/zstd/huf_compress.c b/lib/zstd/huf_compress.c
deleted file mode 100644 (file)
index fd32838..0000000
+++ /dev/null
@@ -1,773 +0,0 @@
-/*
- * Huffman encoder, part of New Generation Entropy library
- * Copyright (C) 2013-2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-
-/* **************************************************************
-*  Includes
-****************************************************************/
-#include "bitstream.h"
-#include "fse.h" /* header compression */
-#include "huf.h"
-#include <linux/kernel.h>
-#include <linux/string.h> /* memcpy, memset */
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define HUF_STATIC_ASSERT(c)                                   \
-       {                                                      \
-               enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
-       } /* use only *after* variable declarations */
-#define CHECK_V_F(e, f)     \
-       size_t const e = f; \
-       if (ERR_isError(e)) \
-       return f
-#define CHECK_F(f)                        \
-       {                                 \
-               CHECK_V_F(_var_err__, f); \
-       }
-
-/* **************************************************************
-*  Utils
-****************************************************************/
-unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
-{
-       return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
-}
-
-/* *******************************************************
-*  HUF : Huffman block compression
-*********************************************************/
-/* HUF_compressWeights() :
- * Same as FSE_compress(), but dedicated to huff0's weights compression.
- * The use case needs much less stack memory.
- * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
- */
-#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
-size_t HUF_compressWeights_wksp(void *dst, size_t dstSize, const void *weightTable, size_t wtSize, void *workspace, size_t workspaceSize)
-{
-       BYTE *const ostart = (BYTE *)dst;
-       BYTE *op = ostart;
-       BYTE *const oend = ostart + dstSize;
-
-       U32 maxSymbolValue = HUF_TABLELOG_MAX;
-       U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
-
-       FSE_CTable *CTable;
-       U32 *count;
-       S16 *norm;
-       size_t spaceUsed32 = 0;
-
-       HUF_STATIC_ASSERT(sizeof(FSE_CTable) == sizeof(U32));
-
-       CTable = (FSE_CTable *)((U32 *)workspace + spaceUsed32);
-       spaceUsed32 += FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX);
-       count = (U32 *)workspace + spaceUsed32;
-       spaceUsed32 += HUF_TABLELOG_MAX + 1;
-       norm = (S16 *)((U32 *)workspace + spaceUsed32);
-       spaceUsed32 += ALIGN(sizeof(S16) * (HUF_TABLELOG_MAX + 1), sizeof(U32)) >> 2;
-
-       if ((spaceUsed32 << 2) > workspaceSize)
-               return ERROR(tableLog_tooLarge);
-       workspace = (U32 *)workspace + spaceUsed32;
-       workspaceSize -= (spaceUsed32 << 2);
-
-       /* init conditions */
-       if (wtSize <= 1)
-               return 0; /* Not compressible */
-
-       /* Scan input and build symbol stats */
-       {
-               CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize));
-               if (maxCount == wtSize)
-                       return 1; /* only a single symbol in src : rle */
-               if (maxCount == 1)
-                       return 0; /* each symbol present maximum once => not compressible */
-       }
-
-       tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
-       CHECK_F(FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue));
-
-       /* Write table description header */
-       {
-               CHECK_V_F(hSize, FSE_writeNCount(op, oend - op, norm, maxSymbolValue, tableLog));
-               op += hSize;
-       }
-
-       /* Compress */
-       CHECK_F(FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, workspace, workspaceSize));
-       {
-               CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable));
-               if (cSize == 0)
-                       return 0; /* not enough space for compressed data */
-               op += cSize;
-       }
-
-       return op - ostart;
-}
-
-struct HUF_CElt_s {
-       U16 val;
-       BYTE nbBits;
-}; /* typedef'd to HUF_CElt within "huf.h" */
-
-/*! HUF_writeCTable_wksp() :
-       `CTable` : Huffman tree to save, using huf representation.
-       @return : size of saved CTable */
-size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, U32 maxSymbolValue, U32 huffLog, void *workspace, size_t workspaceSize)
-{
-       BYTE *op = (BYTE *)dst;
-       U32 n;
-
-       BYTE *bitsToWeight;
-       BYTE *huffWeight;
-       size_t spaceUsed32 = 0;
-
-       bitsToWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
-       spaceUsed32 += ALIGN(HUF_TABLELOG_MAX + 1, sizeof(U32)) >> 2;
-       huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
-       spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX, sizeof(U32)) >> 2;
-
-       if ((spaceUsed32 << 2) > workspaceSize)
-               return ERROR(tableLog_tooLarge);
-       workspace = (U32 *)workspace + spaceUsed32;
-       workspaceSize -= (spaceUsed32 << 2);
-
-       /* check conditions */
-       if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
-               return ERROR(maxSymbolValue_tooLarge);
-
-       /* convert to weight */
-       bitsToWeight[0] = 0;
-       for (n = 1; n < huffLog + 1; n++)
-               bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
-       for (n = 0; n < maxSymbolValue; n++)
-               huffWeight[n] = bitsToWeight[CTable[n].nbBits];
-
-       /* attempt weights compression by FSE */
-       {
-               CHECK_V_F(hSize, HUF_compressWeights_wksp(op + 1, maxDstSize - 1, huffWeight, maxSymbolValue, workspace, workspaceSize));
-               if ((hSize > 1) & (hSize < maxSymbolValue / 2)) { /* FSE compressed */
-                       op[0] = (BYTE)hSize;
-                       return hSize + 1;
-               }
-       }
-
-       /* write raw values as 4-bits (max : 15) */
-       if (maxSymbolValue > (256 - 128))
-               return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
-       if (((maxSymbolValue + 1) / 2) + 1 > maxDstSize)
-               return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
-       op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue - 1));
-       huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
-       for (n = 0; n < maxSymbolValue; n += 2)
-               op[(n / 2) + 1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n + 1]);
-       return ((maxSymbolValue + 1) / 2) + 1;
-}
-
-size_t HUF_readCTable_wksp(HUF_CElt *CTable, U32 maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
-{
-       U32 *rankVal;
-       BYTE *huffWeight;
-       U32 tableLog = 0;
-       U32 nbSymbols = 0;
-       size_t readSize;
-       size_t spaceUsed32 = 0;
-
-       rankVal = (U32 *)workspace + spaceUsed32;
-       spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
-       huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
-       spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
-
-       if ((spaceUsed32 << 2) > workspaceSize)
-               return ERROR(tableLog_tooLarge);
-       workspace = (U32 *)workspace + spaceUsed32;
-       workspaceSize -= (spaceUsed32 << 2);
-
-       /* get symbol weights */
-       readSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
-       if (ERR_isError(readSize))
-               return readSize;
-
-       /* check result */
-       if (tableLog > HUF_TABLELOG_MAX)
-               return ERROR(tableLog_tooLarge);
-       if (nbSymbols > maxSymbolValue + 1)
-               return ERROR(maxSymbolValue_tooSmall);
-
-       /* Prepare base value per rank */
-       {
-               U32 n, nextRankStart = 0;
-               for (n = 1; n <= tableLog; n++) {
-                       U32 curr = nextRankStart;
-                       nextRankStart += (rankVal[n] << (n - 1));
-                       rankVal[n] = curr;
-               }
-       }
-
-       /* fill nbBits */
-       {
-               U32 n;
-               for (n = 0; n < nbSymbols; n++) {
-                       const U32 w = huffWeight[n];
-                       CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
-               }
-       }
-
-       /* fill val */
-       {
-               U16 nbPerRank[HUF_TABLELOG_MAX + 2] = {0}; /* support w=0=>n=tableLog+1 */
-               U16 valPerRank[HUF_TABLELOG_MAX + 2] = {0};
-               {
-                       U32 n;
-                       for (n = 0; n < nbSymbols; n++)
-                               nbPerRank[CTable[n].nbBits]++;
-               }
-               /* determine stating value per rank */
-               valPerRank[tableLog + 1] = 0; /* for w==0 */
-               {
-                       U16 min = 0;
-                       U32 n;
-                       for (n = tableLog; n > 0; n--) { /* start at n=tablelog <-> w=1 */
-                               valPerRank[n] = min;     /* get starting value within each rank */
-                               min += nbPerRank[n];
-                               min >>= 1;
-                       }
-               }
-               /* assign value within rank, symbol order */
-               {
-                       U32 n;
-                       for (n = 0; n <= maxSymbolValue; n++)
-                               CTable[n].val = valPerRank[CTable[n].nbBits]++;
-               }
-       }
-
-       return readSize;
-}
-
-typedef struct nodeElt_s {
-       U32 count;
-       U16 parent;
-       BYTE byte;
-       BYTE nbBits;
-} nodeElt;
-
-static U32 HUF_setMaxHeight(nodeElt *huffNode, U32 lastNonNull, U32 maxNbBits)
-{
-       const U32 largestBits = huffNode[lastNonNull].nbBits;
-       if (largestBits <= maxNbBits)
-               return largestBits; /* early exit : no elt > maxNbBits */
-
-       /* there are several too large elements (at least >= 2) */
-       {
-               int totalCost = 0;
-               const U32 baseCost = 1 << (largestBits - maxNbBits);
-               U32 n = lastNonNull;
-
-               while (huffNode[n].nbBits > maxNbBits) {
-                       totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
-                       huffNode[n].nbBits = (BYTE)maxNbBits;
-                       n--;
-               } /* n stops at huffNode[n].nbBits <= maxNbBits */
-               while (huffNode[n].nbBits == maxNbBits)
-                       n--; /* n end at index of smallest symbol using < maxNbBits */
-
-               /* renorm totalCost */
-               totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
-
-               /* repay normalized cost */
-               {
-                       U32 const noSymbol = 0xF0F0F0F0;
-                       U32 rankLast[HUF_TABLELOG_MAX + 2];
-                       int pos;
-
-                       /* Get pos of last (smallest) symbol per rank */
-                       memset(rankLast, 0xF0, sizeof(rankLast));
-                       {
-                               U32 currNbBits = maxNbBits;
-                               for (pos = n; pos >= 0; pos--) {
-                                       if (huffNode[pos].nbBits >= currNbBits)
-                                               continue;
-                                       currNbBits = huffNode[pos].nbBits; /* < maxNbBits */
-                                       rankLast[maxNbBits - currNbBits] = pos;
-                               }
-                       }
-
-                       while (totalCost > 0) {
-                               U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
-                               for (; nBitsToDecrease > 1; nBitsToDecrease--) {
-                                       U32 highPos = rankLast[nBitsToDecrease];
-                                       U32 lowPos = rankLast[nBitsToDecrease - 1];
-                                       if (highPos == noSymbol)
-                                               continue;
-                                       if (lowPos == noSymbol)
-                                               break;
-                                       {
-                                               U32 const highTotal = huffNode[highPos].count;
-                                               U32 const lowTotal = 2 * huffNode[lowPos].count;
-                                               if (highTotal <= lowTotal)
-                                                       break;
-                                       }
-                               }
-                               /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
-                               /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
-                               while ((nBitsToDecrease <= HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
-                                       nBitsToDecrease++;
-                               totalCost -= 1 << (nBitsToDecrease - 1);
-                               if (rankLast[nBitsToDecrease - 1] == noSymbol)
-                                       rankLast[nBitsToDecrease - 1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
-                               huffNode[rankLast[nBitsToDecrease]].nbBits++;
-                               if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
-                                       rankLast[nBitsToDecrease] = noSymbol;
-                               else {
-                                       rankLast[nBitsToDecrease]--;
-                                       if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits - nBitsToDecrease)
-                                               rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
-                               }
-                       } /* while (totalCost > 0) */
-
-                       while (totalCost < 0) {                /* Sometimes, cost correction overshoot */
-                               if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0
-                                                                 (using maxNbBits) */
-                                       while (huffNode[n].nbBits == maxNbBits)
-                                               n--;
-                                       huffNode[n + 1].nbBits--;
-                                       rankLast[1] = n + 1;
-                                       totalCost++;
-                                       continue;
-                               }
-                               huffNode[rankLast[1] + 1].nbBits--;
-                               rankLast[1]++;
-                               totalCost++;
-                       }
-               }
-       } /* there are several too large elements (at least >= 2) */
-
-       return maxNbBits;
-}
-
-typedef struct {
-       U32 base;
-       U32 curr;
-} rankPos;
-
-static void HUF_sort(nodeElt *huffNode, const U32 *count, U32 maxSymbolValue)
-{
-       rankPos rank[32];
-       U32 n;
-
-       memset(rank, 0, sizeof(rank));
-       for (n = 0; n <= maxSymbolValue; n++) {
-               U32 r = BIT_highbit32(count[n] + 1);
-               rank[r].base++;
-       }
-       for (n = 30; n > 0; n--)
-               rank[n - 1].base += rank[n].base;
-       for (n = 0; n < 32; n++)
-               rank[n].curr = rank[n].base;
-       for (n = 0; n <= maxSymbolValue; n++) {
-               U32 const c = count[n];
-               U32 const r = BIT_highbit32(c + 1) + 1;
-               U32 pos = rank[r].curr++;
-               while ((pos > rank[r].base) && (c > huffNode[pos - 1].count))
-                       huffNode[pos] = huffNode[pos - 1], pos--;
-               huffNode[pos].count = c;
-               huffNode[pos].byte = (BYTE)n;
-       }
-}
-
-/** HUF_buildCTable_wksp() :
- *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
- *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned.
- */
-#define STARTNODE (HUF_SYMBOLVALUE_MAX + 1)
-typedef nodeElt huffNodeTable[2 * HUF_SYMBOLVALUE_MAX + 1 + 1];
-size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize)
-{
-       nodeElt *const huffNode0 = (nodeElt *)workSpace;
-       nodeElt *const huffNode = huffNode0 + 1;
-       U32 n, nonNullRank;
-       int lowS, lowN;
-       U16 nodeNb = STARTNODE;
-       U32 nodeRoot;
-
-       /* safety checks */
-       if (wkspSize < sizeof(huffNodeTable))
-               return ERROR(GENERIC); /* workSpace is not large enough */
-       if (maxNbBits == 0)
-               maxNbBits = HUF_TABLELOG_DEFAULT;
-       if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
-               return ERROR(GENERIC);
-       memset(huffNode0, 0, sizeof(huffNodeTable));
-
-       /* sort, decreasing order */
-       HUF_sort(huffNode, count, maxSymbolValue);
-
-       /* init for parents */
-       nonNullRank = maxSymbolValue;
-       while (huffNode[nonNullRank].count == 0)
-               nonNullRank--;
-       lowS = nonNullRank;
-       nodeRoot = nodeNb + lowS - 1;
-       lowN = nodeNb;
-       huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS - 1].count;
-       huffNode[lowS].parent = huffNode[lowS - 1].parent = nodeNb;
-       nodeNb++;
-       lowS -= 2;
-       for (n = nodeNb; n <= nodeRoot; n++)
-               huffNode[n].count = (U32)(1U << 30);
-       huffNode0[0].count = (U32)(1U << 31); /* fake entry, strong barrier */
-
-       /* create parents */
-       while (nodeNb <= nodeRoot) {
-               U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
-               U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
-               huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
-               huffNode[n1].parent = huffNode[n2].parent = nodeNb;
-               nodeNb++;
-       }
-
-       /* distribute weights (unlimited tree height) */
-       huffNode[nodeRoot].nbBits = 0;
-       for (n = nodeRoot - 1; n >= STARTNODE; n--)
-               huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1;
-       for (n = 0; n <= nonNullRank; n++)
-               huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1;
-
-       /* enforce maxTableLog */
-       maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
-
-       /* fill result into tree (val, nbBits) */
-       {
-               U16 nbPerRank[HUF_TABLELOG_MAX + 1] = {0};
-               U16 valPerRank[HUF_TABLELOG_MAX + 1] = {0};
-               if (maxNbBits > HUF_TABLELOG_MAX)
-                       return ERROR(GENERIC); /* check fit into table */
-               for (n = 0; n <= nonNullRank; n++)
-                       nbPerRank[huffNode[n].nbBits]++;
-               /* determine stating value per rank */
-               {
-                       U16 min = 0;
-                       for (n = maxNbBits; n > 0; n--) {
-                               valPerRank[n] = min; /* get starting value within each rank */
-                               min += nbPerRank[n];
-                               min >>= 1;
-                       }
-               }
-               for (n = 0; n <= maxSymbolValue; n++)
-                       tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
-               for (n = 0; n <= maxSymbolValue; n++)
-                       tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
-       }
-
-       return maxNbBits;
-}
-
-static size_t HUF_estimateCompressedSize(HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue)
-{
-       size_t nbBits = 0;
-       int s;
-       for (s = 0; s <= (int)maxSymbolValue; ++s) {
-               nbBits += CTable[s].nbBits * count[s];
-       }
-       return nbBits >> 3;
-}
-
-static int HUF_validateCTable(const HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue)
-{
-       int bad = 0;
-       int s;
-       for (s = 0; s <= (int)maxSymbolValue; ++s) {
-               bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
-       }
-       return !bad;
-}
-
-static void HUF_encodeSymbol(BIT_CStream_t *bitCPtr, U32 symbol, const HUF_CElt *CTable)
-{
-       BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
-}
-
-size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
-
-#define HUF_FLUSHBITS(s)  BIT_flushBits(s)
-
-#define HUF_FLUSHBITS_1(stream)                                            \
-       if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 2 + 7) \
-       HUF_FLUSHBITS(stream)
-
-#define HUF_FLUSHBITS_2(stream)                                            \
-       if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 4 + 7) \
-       HUF_FLUSHBITS(stream)
-
-size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable)
-{
-       const BYTE *ip = (const BYTE *)src;
-       BYTE *const ostart = (BYTE *)dst;
-       BYTE *const oend = ostart + dstSize;
-       BYTE *op = ostart;
-       size_t n;
-       BIT_CStream_t bitC;
-
-       /* init */
-       if (dstSize < 8)
-               return 0; /* not enough space to compress */
-       {
-               size_t const initErr = BIT_initCStream(&bitC, op, oend - op);
-               if (HUF_isError(initErr))
-                       return 0;
-       }
-
-       n = srcSize & ~3; /* join to mod 4 */
-       switch (srcSize & 3) {
-       case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC);
-               fallthrough;
-       case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC);
-               fallthrough;
-       case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC);
-               fallthrough;
-       case 0:
-       default:;
-       }
-
-       for (; n > 0; n -= 4) { /* note : n&3==0 at this stage */
-               HUF_encodeSymbol(&bitC, ip[n - 1], CTable);
-               HUF_FLUSHBITS_1(&bitC);
-               HUF_encodeSymbol(&bitC, ip[n - 2], CTable);
-               HUF_FLUSHBITS_2(&bitC);
-               HUF_encodeSymbol(&bitC, ip[n - 3], CTable);
-               HUF_FLUSHBITS_1(&bitC);
-               HUF_encodeSymbol(&bitC, ip[n - 4], CTable);
-               HUF_FLUSHBITS(&bitC);
-       }
-
-       return BIT_closeCStream(&bitC);
-}
-
-size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable)
-{
-       size_t const segmentSize = (srcSize + 3) / 4; /* first 3 segments */
-       const BYTE *ip = (const BYTE *)src;
-       const BYTE *const iend = ip + srcSize;
-       BYTE *const ostart = (BYTE *)dst;
-       BYTE *const oend = ostart + dstSize;
-       BYTE *op = ostart;
-
-       if (dstSize < 6 + 1 + 1 + 1 + 8)
-               return 0; /* minimum space to compress successfully */
-       if (srcSize < 12)
-               return 0; /* no saving possible : too small input */
-       op += 6;          /* jumpTable */
-
-       {
-               CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
-               if (cSize == 0)
-                       return 0;
-               ZSTD_writeLE16(ostart, (U16)cSize);
-               op += cSize;
-       }
-
-       ip += segmentSize;
-       {
-               CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
-               if (cSize == 0)
-                       return 0;
-               ZSTD_writeLE16(ostart + 2, (U16)cSize);
-               op += cSize;
-       }
-
-       ip += segmentSize;
-       {
-               CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable));
-               if (cSize == 0)
-                       return 0;
-               ZSTD_writeLE16(ostart + 4, (U16)cSize);
-               op += cSize;
-       }
-
-       ip += segmentSize;
-       {
-               CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, iend - ip, CTable));
-               if (cSize == 0)
-                       return 0;
-               op += cSize;
-       }
-
-       return op - ostart;
-}
-
-static size_t HUF_compressCTable_internal(BYTE *const ostart, BYTE *op, BYTE *const oend, const void *src, size_t srcSize, unsigned singleStream,
-                                         const HUF_CElt *CTable)
-{
-       size_t const cSize =
-           singleStream ? HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
-       if (HUF_isError(cSize)) {
-               return cSize;
-       }
-       if (cSize == 0) {
-               return 0;
-       } /* uncompressible */
-       op += cSize;
-       /* check compressibility */
-       if ((size_t)(op - ostart) >= srcSize - 1) {
-               return 0;
-       }
-       return op - ostart;
-}
-
-/* `workSpace` must a table of at least 1024 unsigned */
-static size_t HUF_compress_internal(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog,
-                                   unsigned singleStream, void *workSpace, size_t wkspSize, HUF_CElt *oldHufTable, HUF_repeat *repeat, int preferRepeat)
-{
-       BYTE *const ostart = (BYTE *)dst;
-       BYTE *const oend = ostart + dstSize;
-       BYTE *op = ostart;
-
-       U32 *count;
-       size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1);
-       HUF_CElt *CTable;
-       size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1);
-
-       /* checks & inits */
-       if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize)
-               return ERROR(GENERIC);
-       if (!srcSize)
-               return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
-       if (!dstSize)
-               return 0; /* cannot fit within dst budget */
-       if (srcSize > HUF_BLOCKSIZE_MAX)
-               return ERROR(srcSize_wrong); /* curr block size limit */
-       if (huffLog > HUF_TABLELOG_MAX)
-               return ERROR(tableLog_tooLarge);
-       if (!maxSymbolValue)
-               maxSymbolValue = HUF_SYMBOLVALUE_MAX;
-       if (!huffLog)
-               huffLog = HUF_TABLELOG_DEFAULT;
-
-       count = (U32 *)workSpace;
-       workSpace = (BYTE *)workSpace + countSize;
-       wkspSize -= countSize;
-       CTable = (HUF_CElt *)workSpace;
-       workSpace = (BYTE *)workSpace + CTableSize;
-       wkspSize -= CTableSize;
-
-       /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */
-       if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
-               return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
-       }
-
-       /* Scan input and build symbol stats */
-       {
-               CHECK_V_F(largest, FSE_count_wksp(count, &maxSymbolValue, (const BYTE *)src, srcSize, (U32 *)workSpace));
-               if (largest == srcSize) {
-                       *ostart = ((const BYTE *)src)[0];
-                       return 1;
-               } /* single symbol, rle */
-               if (largest <= (srcSize >> 7) + 1)
-                       return 0; /* Fast heuristic : not compressible enough */
-       }
-
-       /* Check validity of previous table */
-       if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) {
-               *repeat = HUF_repeat_none;
-       }
-       /* Heuristic : use existing table for small inputs */
-       if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
-               return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
-       }
-
-       /* Build Huffman Tree */
-       huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
-       {
-               CHECK_V_F(maxBits, HUF_buildCTable_wksp(CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize));
-               huffLog = (U32)maxBits;
-               /* Zero the unused symbols so we can check it for validity */
-               memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt));
-       }
-
-       /* Write table description header */
-       {
-               CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, CTable, maxSymbolValue, huffLog, workSpace, wkspSize));
-               /* Check if using the previous table will be beneficial */
-               if (repeat && *repeat != HUF_repeat_none) {
-                       size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue);
-                       size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue);
-                       if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
-                               return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable);
-                       }
-               }
-               /* Use the new table */
-               if (hSize + 12ul >= srcSize) {
-                       return 0;
-               }
-               op += hSize;
-               if (repeat) {
-                       *repeat = HUF_repeat_none;
-               }
-               if (oldHufTable) {
-                       memcpy(oldHufTable, CTable, CTableSize);
-               } /* Save the new table */
-       }
-       return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable);
-}
-
-size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
-                          size_t wkspSize)
-{
-       return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0);
-}
-
-size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
-                            size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat)
-{
-       return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat,
-                                    preferRepeat);
-}
-
-size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
-                          size_t wkspSize)
-{
-       return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0);
-}
-
-size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace,
-                            size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat)
-{
-       return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat,
-                                    preferRepeat);
-}
diff --git a/lib/zstd/huf_decompress.c b/lib/zstd/huf_decompress.c
deleted file mode 100644 (file)
index 6526482..0000000
+++ /dev/null
@@ -1,960 +0,0 @@
-/*
- * Huffman decoder, part of New Generation Entropy library
- * Copyright (C) 2013-2016, Yann Collet.
- *
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *   * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- *
- * You can contact the author at :
- * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- */
-
-/* **************************************************************
-*  Compiler specifics
-****************************************************************/
-#define FORCE_INLINE static __always_inline
-
-/* **************************************************************
-*  Dependencies
-****************************************************************/
-#include "bitstream.h" /* BIT_* */
-#include "fse.h"       /* header compression */
-#include "huf.h"
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/string.h> /* memcpy, memset */
-
-/* **************************************************************
-*  Error Management
-****************************************************************/
-#define HUF_STATIC_ASSERT(c)                                   \
-       {                                                      \
-               enum { HUF_static_assert = 1 / (int)(!!(c)) }; \
-       } /* use only *after* variable declarations */
-
-/*-***************************/
-/*  generic DTableDesc       */
-/*-***************************/
-
-typedef struct {
-       BYTE maxTableLog;
-       BYTE tableType;
-       BYTE tableLog;
-       BYTE reserved;
-} DTableDesc;
-
-static DTableDesc HUF_getDTableDesc(const HUF_DTable *table)
-{
-       DTableDesc dtd;
-       memcpy(&dtd, table, sizeof(dtd));
-       return dtd;
-}
-
-/*-***************************/
-/*  single-symbol decoding   */
-/*-***************************/
-
-typedef struct {
-       BYTE byte;
-       BYTE nbBits;
-} HUF_DEltX2; /* single-symbol decoding */
-
-size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
-{
-       U32 tableLog = 0;
-       U32 nbSymbols = 0;
-       size_t iSize;
-       void *const dtPtr = DTable + 1;
-       HUF_DEltX2 *const dt = (HUF_DEltX2 *)dtPtr;
-
-       U32 *rankVal;
-       BYTE *huffWeight;
-       size_t spaceUsed32 = 0;
-
-       rankVal = (U32 *)workspace + spaceUsed32;
-       spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1;
-       huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32);
-       spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
-
-       if ((spaceUsed32 << 2) > workspaceSize)
-               return ERROR(tableLog_tooLarge);
-       workspace = (U32 *)workspace + spaceUsed32;
-       workspaceSize -= (spaceUsed32 << 2);
-
-       HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
-       /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
-
-       iSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
-       if (HUF_isError(iSize))
-               return iSize;
-
-       /* Table header */
-       {
-               DTableDesc dtd = HUF_getDTableDesc(DTable);
-               if (tableLog > (U32)(dtd.maxTableLog + 1))
-                       return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */
-               dtd.tableType = 0;
-               dtd.tableLog = (BYTE)tableLog;
-               memcpy(DTable, &dtd, sizeof(dtd));
-       }
-
-       /* Calculate starting value for each rank */
-       {
-               U32 n, nextRankStart = 0;
-               for (n = 1; n < tableLog + 1; n++) {
-                       U32 const curr = nextRankStart;
-                       nextRankStart += (rankVal[n] << (n - 1));
-                       rankVal[n] = curr;
-               }
-       }
-
-       /* fill DTable */
-       {
-               U32 n;
-               for (n = 0; n < nbSymbols; n++) {
-                       U32 const w = huffWeight[n];
-                       U32 const length = (1 << w) >> 1;
-                       U32 u;
-                       HUF_DEltX2 D;
-                       D.byte = (BYTE)n;
-                       D.nbBits = (BYTE)(tableLog + 1 - w);
-                       for (u = rankVal[w]; u < rankVal[w] + length; u++)
-                               dt[u] = D;
-                       rankVal[w] += length;
-               }
-       }
-
-       return iSize;
-}
-
-static BYTE HUF_decodeSymbolX2(BIT_DStream_t *Dstream, const HUF_DEltX2 *dt, const U32 dtLog)
-{
-       size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
-       BYTE const c = dt[val].byte;
-       BIT_skipBits(Dstream, dt[val].nbBits);
-       return c;
-}
-
-#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)         \
-       if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
-       HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
-
-#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
-       if (ZSTD_64bits())                     \
-       HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
-
-FORCE_INLINE size_t HUF_decodeStreamX2(BYTE *p, BIT_DStream_t *const bitDPtr, BYTE *const pEnd, const HUF_DEltX2 *const dt, const U32 dtLog)
-{
-       BYTE *const pStart = p;
-
-       /* up to 4 symbols at a time */
-       while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd - 4)) {
-               HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
-               HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
-               HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
-               HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-       }
-
-       /* closer to the end */
-       while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
-               HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-
-       /* no more data to retrieve from bitstream, hence no need to reload */
-       while (p < pEnd)
-               HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
-
-       return pEnd - pStart;
-}
-
-static size_t HUF_decompress1X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-       BYTE *op = (BYTE *)dst;
-       BYTE *const oend = op + dstSize;
-       const void *dtPtr = DTable + 1;
-       const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
-       BIT_DStream_t bitD;
-       DTableDesc const dtd = HUF_getDTableDesc(DTable);
-       U32 const dtLog = dtd.tableLog;
-
-       {
-               size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
-               if (HUF_isError(errorCode))
-                       return errorCode;
-       }
-
-       HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
-
-       /* check */
-       if (!BIT_endOfDStream(&bitD))
-               return ERROR(corruption_detected);
-
-       return dstSize;
-}
-
-size_t HUF_decompress1X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-       DTableDesc dtd = HUF_getDTableDesc(DTable);
-       if (dtd.tableType != 0)
-               return ERROR(GENERIC);
-       return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-       const BYTE *ip = (const BYTE *)cSrc;
-
-       size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
-       if (HUF_isError(hSize))
-               return hSize;
-       if (hSize >= cSrcSize)
-               return ERROR(srcSize_wrong);
-       ip += hSize;
-       cSrcSize -= hSize;
-
-       return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
-}
-
-static size_t HUF_decompress4X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-       /* Check */
-       if (cSrcSize < 10)
-               return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
-
-       {
-               const BYTE *const istart = (const BYTE *)cSrc;
-               BYTE *const ostart = (BYTE *)dst;
-               BYTE *const oend = ostart + dstSize;
-               const void *const dtPtr = DTable + 1;
-               const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr;
-
-               /* Init */
-               BIT_DStream_t bitD1;
-               BIT_DStream_t bitD2;
-               BIT_DStream_t bitD3;
-               BIT_DStream_t bitD4;
-               size_t const length1 = ZSTD_readLE16(istart);
-               size_t const length2 = ZSTD_readLE16(istart + 2);
-               size_t const length3 = ZSTD_readLE16(istart + 4);
-               size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
-               const BYTE *const istart1 = istart + 6; /* jumpTable */
-               const BYTE *const istart2 = istart1 + length1;
-               const BYTE *const istart3 = istart2 + length2;
-               const BYTE *const istart4 = istart3 + length3;
-               const size_t segmentSize = (dstSize + 3) / 4;
-               BYTE *const opStart2 = ostart + segmentSize;
-               BYTE *const opStart3 = opStart2 + segmentSize;
-               BYTE *const opStart4 = opStart3 + segmentSize;
-               BYTE *op1 = ostart;
-               BYTE *op2 = opStart2;
-               BYTE *op3 = opStart3;
-               BYTE *op4 = opStart4;
-               U32 endSignal;
-               DTableDesc const dtd = HUF_getDTableDesc(DTable);
-               U32 const dtLog = dtd.tableLog;
-
-               if (length4 > cSrcSize)
-                       return ERROR(corruption_detected); /* overflow */
-               {
-                       size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
-                       if (HUF_isError(errorCode))
-                               return errorCode;
-               }
-               {
-                       size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
-                       if (HUF_isError(errorCode))
-                               return errorCode;
-               }
-               {
-                       size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
-                       if (HUF_isError(errorCode))
-                               return errorCode;
-               }
-               {
-                       size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
-                       if (HUF_isError(errorCode))
-                               return errorCode;
-               }
-
-               /* 16-32 symbols per loop (4-8 symbols per stream) */
-               endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
-               for (; (endSignal == BIT_DStream_unfinished) && (op4 < (oend - 7));) {
-                       HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-                       HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-                       HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-                       HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-                       HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
-                       HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
-                       HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
-                       HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
-                       HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-                       HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-                       HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-                       HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-                       HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
-                       HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
-                       HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
-                       HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
-                       endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
-               }
-
-               /* check corruption */
-               if (op1 > opStart2)
-                       return ERROR(corruption_detected);
-               if (op2 > opStart3)
-                       return ERROR(corruption_detected);
-               if (op3 > opStart4)
-                       return ERROR(corruption_detected);
-               /* note : op4 supposed already verified within main loop */
-
-               /* finish bitStreams one by one */
-               HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
-               HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
-               HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
-               HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
-
-               /* check */
-               endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
-               if (!endSignal)
-                       return ERROR(corruption_detected);
-
-               /* decoded size */
-               return dstSize;
-       }
-}
-
-size_t HUF_decompress4X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-       DTableDesc dtd = HUF_getDTableDesc(DTable);
-       if (dtd.tableType != 0)
-               return ERROR(GENERIC);
-       return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-       const BYTE *ip = (const BYTE *)cSrc;
-
-       size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
-       if (HUF_isError(hSize))
-               return hSize;
-       if (hSize >= cSrcSize)
-               return ERROR(srcSize_wrong);
-       ip += hSize;
-       cSrcSize -= hSize;
-
-       return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
-}
-
-/* *************************/
-/* double-symbols decoding */
-/* *************************/
-typedef struct {
-       U16 sequence;
-       BYTE nbBits;
-       BYTE length;
-} HUF_DEltX4; /* double-symbols decoding */
-
-typedef struct {
-       BYTE symbol;
-       BYTE weight;
-} sortedSymbol_t;
-
-/* HUF_fillDTableX4Level2() :
- * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
-static void HUF_fillDTableX4Level2(HUF_DEltX4 *DTable, U32 sizeLog, const U32 consumed, const U32 *rankValOrigin, const int minWeight,
-                                  const sortedSymbol_t *sortedSymbols, const U32 sortedListSize, U32 nbBitsBaseline, U16 baseSeq)
-{
-       HUF_DEltX4 DElt;
-       U32 rankVal[HUF_TABLELOG_MAX + 1];
-
-       /* get pre-calculated rankVal */
-       memcpy(rankVal, rankValOrigin, sizeof(rankVal));
-
-       /* fill skipped values */
-       if (minWeight > 1) {
-               U32 i, skipSize = rankVal[minWeight];
-               ZSTD_writeLE16(&(DElt.sequence), baseSeq);
-               DElt.nbBits = (BYTE)(consumed);
-               DElt.length = 1;
-               for (i = 0; i < skipSize; i++)
-                       DTable[i] = DElt;
-       }
-
-       /* fill DTable */
-       {
-               U32 s;
-               for (s = 0; s < sortedListSize; s++) { /* note : sortedSymbols already skipped */
-                       const U32 symbol = sortedSymbols[s].symbol;
-                       const U32 weight = sortedSymbols[s].weight;
-                       const U32 nbBits = nbBitsBaseline - weight;
-                       const U32 length = 1 << (sizeLog - nbBits);
-                       const U32 start = rankVal[weight];
-                       U32 i = start;
-                       const U32 end = start + length;
-
-                       ZSTD_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
-                       DElt.nbBits = (BYTE)(nbBits + consumed);
-                       DElt.length = 2;
-                       do {
-                               DTable[i++] = DElt;
-                       } while (i < end); /* since length >= 1 */
-
-                       rankVal[weight] += length;
-               }
-       }
-}
-
-typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1];
-typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
-
-static void HUF_fillDTableX4(HUF_DEltX4 *DTable, const U32 targetLog, const sortedSymbol_t *sortedList, const U32 sortedListSize, const U32 *rankStart,
-                            rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline)
-{
-       U32 rankVal[HUF_TABLELOG_MAX + 1];
-       const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
-       const U32 minBits = nbBitsBaseline - maxWeight;
-       U32 s;
-
-       memcpy(rankVal, rankValOrigin, sizeof(rankVal));
-
-       /* fill DTable */
-       for (s = 0; s < sortedListSize; s++) {
-               const U16 symbol = sortedList[s].symbol;
-               const U32 weight = sortedList[s].weight;
-               const U32 nbBits = nbBitsBaseline - weight;
-               const U32 start = rankVal[weight];
-               const U32 length = 1 << (targetLog - nbBits);
-
-               if (targetLog - nbBits >= minBits) { /* enough room for a second symbol */
-                       U32 sortedRank;
-                       int minWeight = nbBits + scaleLog;
-                       if (minWeight < 1)
-                               minWeight = 1;
-                       sortedRank = rankStart[minWeight];
-                       HUF_fillDTableX4Level2(DTable + start, targetLog - nbBits, nbBits, rankValOrigin[nbBits], minWeight, sortedList + sortedRank,
-                                              sortedListSize - sortedRank, nbBitsBaseline, symbol);
-               } else {
-                       HUF_DEltX4 DElt;
-                       ZSTD_writeLE16(&(DElt.sequence), symbol);
-                       DElt.nbBits = (BYTE)(nbBits);
-                       DElt.length = 1;
-                       {
-                               U32 const end = start + length;
-                               U32 u;
-                               for (u = start; u < end; u++)
-                                       DTable[u] = DElt;
-                       }
-               }
-               rankVal[weight] += length;
-       }
-}
-
-size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize)
-{
-       U32 tableLog, maxW, sizeOfSort, nbSymbols;
-       DTableDesc dtd = HUF_getDTableDesc(DTable);
-       U32 const maxTableLog = dtd.maxTableLog;
-       size_t iSize;
-       void *dtPtr = DTable + 1; /* force compiler to avoid strict-aliasing */
-       HUF_DEltX4 *const dt = (HUF_DEltX4 *)dtPtr;
-       U32 *rankStart;
-
-       rankValCol_t *rankVal;
-       U32 *rankStats;
-       U32 *rankStart0;
-       sortedSymbol_t *sortedSymbol;
-       BYTE *weightList;
-       size_t spaceUsed32 = 0;
-
-       HUF_STATIC_ASSERT((sizeof(rankValCol_t) & 3) == 0);
-
-       rankVal = (rankValCol_t *)((U32 *)workspace + spaceUsed32);
-       spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2;
-       rankStats = (U32 *)workspace + spaceUsed32;
-       spaceUsed32 += HUF_TABLELOG_MAX + 1;
-       rankStart0 = (U32 *)workspace + spaceUsed32;
-       spaceUsed32 += HUF_TABLELOG_MAX + 2;
-       sortedSymbol = (sortedSymbol_t *)((U32 *)workspace + spaceUsed32);
-       spaceUsed32 += ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2;
-       weightList = (BYTE *)((U32 *)workspace + spaceUsed32);
-       spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2;
-
-       if ((spaceUsed32 << 2) > workspaceSize)
-               return ERROR(tableLog_tooLarge);
-       workspace = (U32 *)workspace + spaceUsed32;
-       workspaceSize -= (spaceUsed32 << 2);
-
-       rankStart = rankStart0 + 1;
-       memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
-
-       HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
-       if (maxTableLog > HUF_TABLELOG_MAX)
-               return ERROR(tableLog_tooLarge);
-       /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
-
-       iSize = HUF_readStats_wksp(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize);
-       if (HUF_isError(iSize))
-               return iSize;
-
-       /* check result */
-       if (tableLog > maxTableLog)
-               return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
-
-       /* find maxWeight */
-       for (maxW = tableLog; rankStats[maxW] == 0; maxW--) {
-       } /* necessarily finds a solution before 0 */
-
-       /* Get start index of each weight */
-       {
-               U32 w, nextRankStart = 0;
-               for (w = 1; w < maxW + 1; w++) {
-                       U32 curr = nextRankStart;
-                       nextRankStart += rankStats[w];
-                       rankStart[w] = curr;
-               }
-               rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
-               sizeOfSort = nextRankStart;
-       }
-
-       /* sort symbols by weight */
-       {
-               U32 s;
-               for (s = 0; s < nbSymbols; s++) {
-                       U32 const w = weightList[s];
-                       U32 const r = rankStart[w]++;
-                       sortedSymbol[r].symbol = (BYTE)s;
-                       sortedSymbol[r].weight = (BYTE)w;
-               }
-               rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
-       }
-
-       /* Build rankVal */
-       {
-               U32 *const rankVal0 = rankVal[0];
-               {
-                       int const rescale = (maxTableLog - tableLog) - 1; /* tableLog <= maxTableLog */
-                       U32 nextRankVal = 0;
-                       U32 w;
-                       for (w = 1; w < maxW + 1; w++) {
-                               U32 curr = nextRankVal;
-                               nextRankVal += rankStats[w] << (w + rescale);
-                               rankVal0[w] = curr;
-                       }
-               }
-               {
-                       U32 const minBits = tableLog + 1 - maxW;
-                       U32 consumed;
-                       for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
-                               U32 *const rankValPtr = rankVal[consumed];
-                               U32 w;
-                               for (w = 1; w < maxW + 1; w++) {
-                                       rankValPtr[w] = rankVal0[w] >> consumed;
-                               }
-                       }
-               }
-       }
-
-       HUF_fillDTableX4(dt, maxTableLog, sortedSymbol, sizeOfSort, rankStart0, rankVal, maxW, tableLog + 1);
-
-       dtd.tableLog = (BYTE)maxTableLog;
-       dtd.tableType = 1;
-       memcpy(DTable, &dtd, sizeof(dtd));
-       return iSize;
-}
-
-static U32 HUF_decodeSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
-{
-       size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
-       memcpy(op, dt + val, 2);
-       BIT_skipBits(DStream, dt[val].nbBits);
-       return dt[val].length;
-}
-
-static U32 HUF_decodeLastSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog)
-{
-       size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
-       memcpy(op, dt + val, 1);
-       if (dt[val].length == 1)
-               BIT_skipBits(DStream, dt[val].nbBits);
-       else {
-               if (DStream->bitsConsumed < (sizeof(DStream->bitContainer) * 8)) {
-                       BIT_skipBits(DStream, dt[val].nbBits);
-                       if (DStream->bitsConsumed > (sizeof(DStream->bitContainer) * 8))
-                               /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
-                               DStream->bitsConsumed = (sizeof(DStream->bitContainer) * 8);
-               }
-       }
-       return 1;
-}
-
-#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr)         \
-       if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \
-       ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
-       if (ZSTD_64bits())                     \
-       ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
-
-FORCE_INLINE size_t HUF_decodeStreamX4(BYTE *p, BIT_DStream_t *bitDPtr, BYTE *const pEnd, const HUF_DEltX4 *const dt, const U32 dtLog)
-{
-       BYTE *const pStart = p;
-
-       /* up to 8 symbols at a time */
-       while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd - (sizeof(bitDPtr->bitContainer) - 1))) {
-               HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
-               HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
-               HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
-               HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
-       }
-
-       /* closer to end : up to 2 symbols at a time */
-       while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd - 2))
-               HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
-
-       while (p <= pEnd - 2)
-               HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
-
-       if (p < pEnd)
-               p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
-
-       return p - pStart;
-}
-
-static size_t HUF_decompress1X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-       BIT_DStream_t bitD;
-
-       /* Init */
-       {
-               size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
-               if (HUF_isError(errorCode))
-                       return errorCode;
-       }
-
-       /* decode */
-       {
-               BYTE *const ostart = (BYTE *)dst;
-               BYTE *const oend = ostart + dstSize;
-               const void *const dtPtr = DTable + 1; /* force compiler to not use strict-aliasing */
-               const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
-               DTableDesc const dtd = HUF_getDTableDesc(DTable);
-               HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
-       }
-
-       /* check */
-       if (!BIT_endOfDStream(&bitD))
-               return ERROR(corruption_detected);
-
-       /* decoded size */
-       return dstSize;
-}
-
-size_t HUF_decompress1X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-       DTableDesc dtd = HUF_getDTableDesc(DTable);
-       if (dtd.tableType != 1)
-               return ERROR(GENERIC);
-       return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-       const BYTE *ip = (const BYTE *)cSrc;
-
-       size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize);
-       if (HUF_isError(hSize))
-               return hSize;
-       if (hSize >= cSrcSize)
-               return ERROR(srcSize_wrong);
-       ip += hSize;
-       cSrcSize -= hSize;
-
-       return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx);
-}
-
-static size_t HUF_decompress4X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-       if (cSrcSize < 10)
-               return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
-
-       {
-               const BYTE *const istart = (const BYTE *)cSrc;
-               BYTE *const ostart = (BYTE *)dst;
-               BYTE *const oend = ostart + dstSize;
-               const void *const dtPtr = DTable + 1;
-               const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr;
-
-               /* Init */
-               BIT_DStream_t bitD1;
-               BIT_DStream_t bitD2;
-               BIT_DStream_t bitD3;
-               BIT_DStream_t bitD4;
-               size_t const length1 = ZSTD_readLE16(istart);
-               size_t const length2 = ZSTD_readLE16(istart + 2);
-               size_t const length3 = ZSTD_readLE16(istart + 4);
-               size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
-               const BYTE *const istart1 = istart + 6; /* jumpTable */
-               const BYTE *const istart2 = istart1 + length1;
-               const BYTE *const istart3 = istart2 + length2;
-               const BYTE *const istart4 = istart3 + length3;
-               size_t const segmentSize = (dstSize + 3) / 4;
-               BYTE *const opStart2 = ostart + segmentSize;
-               BYTE *const opStart3 = opStart2 + segmentSize;
-               BYTE *const opStart4 = opStart3 + segmentSize;
-               BYTE *op1 = ostart;
-               BYTE *op2 = opStart2;
-               BYTE *op3 = opStart3;
-               BYTE *op4 = opStart4;
-               U32 endSignal;
-               DTableDesc const dtd = HUF_getDTableDesc(DTable);
-               U32 const dtLog = dtd.tableLog;
-
-               if (length4 > cSrcSize)
-                       return ERROR(corruption_detected); /* overflow */
-               {
-                       size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
-                       if (HUF_isError(errorCode))
-                               return errorCode;
-               }
-               {
-                       size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
-                       if (HUF_isError(errorCode))
-                               return errorCode;
-               }
-               {
-                       size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
-                       if (HUF_isError(errorCode))
-                               return errorCode;
-               }
-               {
-                       size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
-                       if (HUF_isError(errorCode))
-                               return errorCode;
-               }
-
-               /* 16-32 symbols per loop (4-8 symbols per stream) */
-               endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
-               for (; (endSignal == BIT_DStream_unfinished) & (op4 < (oend - (sizeof(bitD4.bitContainer) - 1)));) {
-                       HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
-                       HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
-                       HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
-                       HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
-                       HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
-                       HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
-                       HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
-                       HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
-                       HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
-                       HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
-                       HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
-                       HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
-                       HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
-                       HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
-                       HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
-                       HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
-
-                       endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
-               }
-
-               /* check corruption */
-               if (op1 > opStart2)
-                       return ERROR(corruption_detected);
-               if (op2 > opStart3)
-                       return ERROR(corruption_detected);
-               if (op3 > opStart4)
-                       return ERROR(corruption_detected);
-               /* note : op4 already verified within main loop */
-
-               /* finish bitStreams one by one */
-               HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
-               HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
-               HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
-               HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
-
-               /* check */
-               {
-                       U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
-                       if (!endCheck)
-                               return ERROR(corruption_detected);
-               }
-
-               /* decoded size */
-               return dstSize;
-       }
-}
-
-size_t HUF_decompress4X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-       DTableDesc dtd = HUF_getDTableDesc(DTable);
-       if (dtd.tableType != 1)
-               return ERROR(GENERIC);
-       return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-       const BYTE *ip = (const BYTE *)cSrc;
-
-       size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize);
-       if (HUF_isError(hSize))
-               return hSize;
-       if (hSize >= cSrcSize)
-               return ERROR(srcSize_wrong);
-       ip += hSize;
-       cSrcSize -= hSize;
-
-       return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
-}
-
-/* ********************************/
-/* Generic decompression selector */
-/* ********************************/
-
-size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-       DTableDesc const dtd = HUF_getDTableDesc(DTable);
-       return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
-                            : HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
-}
-
-size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable)
-{
-       DTableDesc const dtd = HUF_getDTableDesc(DTable);
-       return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable)
-                            : HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
-}
-
-typedef struct {
-       U32 tableTime;
-       U32 decode256Time;
-} algo_time_t;
-static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = {
-    /* single, double, quad */
-    {{0, 0}, {1, 1}, {2, 2}},               /* Q==0 : impossible */
-    {{0, 0}, {1, 1}, {2, 2}},               /* Q==1 : impossible */
-    {{38, 130}, {1313, 74}, {2151, 38}},     /* Q == 2 : 12-18% */
-    {{448, 128}, {1353, 74}, {2238, 41}},    /* Q == 3 : 18-25% */
-    {{556, 128}, {1353, 74}, {2238, 47}},    /* Q == 4 : 25-32% */
-    {{714, 128}, {1418, 74}, {2436, 53}},    /* Q == 5 : 32-38% */
-    {{883, 128}, {1437, 74}, {2464, 61}},    /* Q == 6 : 38-44% */
-    {{897, 128}, {1515, 75}, {2622, 68}},    /* Q == 7 : 44-50% */
-    {{926, 128}, {1613, 75}, {2730, 75}},    /* Q == 8 : 50-56% */
-    {{947, 128}, {1729, 77}, {3359, 77}},    /* Q == 9 : 56-62% */
-    {{1107, 128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
-    {{1177, 128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
-    {{1242, 128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
-    {{1349, 128}, {2644, 106}, {5260, 106}}, /* Q ==13 : 81-87% */
-    {{1455, 128}, {2422, 124}, {4174, 124}}, /* Q ==14 : 87-93% */
-    {{722, 128}, {1891, 145}, {1936, 146}},  /* Q ==15 : 93-99% */
-};
-
-/** HUF_selectDecoder() :
-*   Tells which decoder is likely to decode faster,
-*   based on a set of pre-determined metrics.
-*   @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
-*   Assumption : 0 < cSrcSize < dstSize <= 128 KB */
-U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize)
-{
-       /* decoder timing evaluation */
-       U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
-       U32 const D256 = (U32)(dstSize >> 8);
-       U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
-       U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
-       DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */
-
-       return DTime1 < DTime0;
-}
-
-typedef size_t (*decompressionAlgo)(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize);
-
-size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-       /* validation checks */
-       if (dstSize == 0)
-               return ERROR(dstSize_tooSmall);
-       if (cSrcSize > dstSize)
-               return ERROR(corruption_detected); /* invalid */
-       if (cSrcSize == dstSize) {
-               memcpy(dst, cSrc, dstSize);
-               return dstSize;
-       } /* not compressed */
-       if (cSrcSize == 1) {
-               memset(dst, *(const BYTE *)cSrc, dstSize);
-               return dstSize;
-       } /* RLE */
-
-       {
-               U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-               return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
-                             : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
-       }
-}
-
-size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-       /* validation checks */
-       if (dstSize == 0)
-               return ERROR(dstSize_tooSmall);
-       if ((cSrcSize >= dstSize) || (cSrcSize <= 1))
-               return ERROR(corruption_detected); /* invalid */
-
-       {
-               U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-               return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
-                             : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
-       }
-}
-
-size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize)
-{
-       /* validation checks */
-       if (dstSize == 0)
-               return ERROR(dstSize_tooSmall);
-       if (cSrcSize > dstSize)
-               return ERROR(corruption_detected); /* invalid */
-       if (cSrcSize == dstSize) {
-               memcpy(dst, cSrc, dstSize);
-               return dstSize;
-       } /* not compressed */
-       if (cSrcSize == 1) {
-               memset(dst, *(const BYTE *)cSrc, dstSize);
-               return dstSize;
-       } /* RLE */
-
-       {
-               U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
-               return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize)
-                             : HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize);
-       }
-}
diff --git a/lib/zstd/mem.h b/lib/zstd/mem.h
deleted file mode 100644 (file)
index 93d7a2c..0000000
+++ /dev/null
@@ -1,151 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-#ifndef MEM_H_MODULE
-#define MEM_H_MODULE
-
-/*-****************************************
-*  Dependencies
-******************************************/
-#include <asm/unaligned.h>
-#include <linux/string.h> /* memcpy */
-#include <linux/types.h>  /* size_t, ptrdiff_t */
-
-/*-****************************************
-*  Compiler specifics
-******************************************/
-#define ZSTD_STATIC static inline
-
-/*-**************************************************************
-*  Basic Types
-*****************************************************************/
-typedef uint8_t BYTE;
-typedef uint16_t U16;
-typedef int16_t S16;
-typedef uint32_t U32;
-typedef int32_t S32;
-typedef uint64_t U64;
-typedef int64_t S64;
-typedef ptrdiff_t iPtrDiff;
-typedef uintptr_t uPtrDiff;
-
-/*-**************************************************************
-*  Memory I/O
-*****************************************************************/
-ZSTD_STATIC unsigned ZSTD_32bits(void) { return sizeof(size_t) == 4; }
-ZSTD_STATIC unsigned ZSTD_64bits(void) { return sizeof(size_t) == 8; }
-
-#if defined(__LITTLE_ENDIAN)
-#define ZSTD_LITTLE_ENDIAN 1
-#else
-#define ZSTD_LITTLE_ENDIAN 0
-#endif
-
-ZSTD_STATIC unsigned ZSTD_isLittleEndian(void) { return ZSTD_LITTLE_ENDIAN; }
-
-ZSTD_STATIC U16 ZSTD_read16(const void *memPtr) { return get_unaligned((const U16 *)memPtr); }
-
-ZSTD_STATIC U32 ZSTD_read32(const void *memPtr) { return get_unaligned((const U32 *)memPtr); }
-
-ZSTD_STATIC U64 ZSTD_read64(const void *memPtr) { return get_unaligned((const U64 *)memPtr); }
-
-ZSTD_STATIC size_t ZSTD_readST(const void *memPtr) { return get_unaligned((const size_t *)memPtr); }
-
-ZSTD_STATIC void ZSTD_write16(void *memPtr, U16 value) { put_unaligned(value, (U16 *)memPtr); }
-
-ZSTD_STATIC void ZSTD_write32(void *memPtr, U32 value) { put_unaligned(value, (U32 *)memPtr); }
-
-ZSTD_STATIC void ZSTD_write64(void *memPtr, U64 value) { put_unaligned(value, (U64 *)memPtr); }
-
-/*=== Little endian r/w ===*/
-
-ZSTD_STATIC U16 ZSTD_readLE16(const void *memPtr) { return get_unaligned_le16(memPtr); }
-
-ZSTD_STATIC void ZSTD_writeLE16(void *memPtr, U16 val) { put_unaligned_le16(val, memPtr); }
-
-ZSTD_STATIC U32 ZSTD_readLE24(const void *memPtr) { return ZSTD_readLE16(memPtr) + (((const BYTE *)memPtr)[2] << 16); }
-
-ZSTD_STATIC void ZSTD_writeLE24(void *memPtr, U32 val)
-{
-       ZSTD_writeLE16(memPtr, (U16)val);
-       ((BYTE *)memPtr)[2] = (BYTE)(val >> 16);
-}
-
-ZSTD_STATIC U32 ZSTD_readLE32(const void *memPtr) { return get_unaligned_le32(memPtr); }
-
-ZSTD_STATIC void ZSTD_writeLE32(void *memPtr, U32 val32) { put_unaligned_le32(val32, memPtr); }
-
-ZSTD_STATIC U64 ZSTD_readLE64(const void *memPtr) { return get_unaligned_le64(memPtr); }
-
-ZSTD_STATIC void ZSTD_writeLE64(void *memPtr, U64 val64) { put_unaligned_le64(val64, memPtr); }
-
-ZSTD_STATIC size_t ZSTD_readLEST(const void *memPtr)
-{
-       if (ZSTD_32bits())
-               return (size_t)ZSTD_readLE32(memPtr);
-       else
-               return (size_t)ZSTD_readLE64(memPtr);
-}
-
-ZSTD_STATIC void ZSTD_writeLEST(void *memPtr, size_t val)
-{
-       if (ZSTD_32bits())
-               ZSTD_writeLE32(memPtr, (U32)val);
-       else
-               ZSTD_writeLE64(memPtr, (U64)val);
-}
-
-/*=== Big endian r/w ===*/
-
-ZSTD_STATIC U32 ZSTD_readBE32(const void *memPtr) { return get_unaligned_be32(memPtr); }
-
-ZSTD_STATIC void ZSTD_writeBE32(void *memPtr, U32 val32) { put_unaligned_be32(val32, memPtr); }
-
-ZSTD_STATIC U64 ZSTD_readBE64(const void *memPtr) { return get_unaligned_be64(memPtr); }
-
-ZSTD_STATIC void ZSTD_writeBE64(void *memPtr, U64 val64) { put_unaligned_be64(val64, memPtr); }
-
-ZSTD_STATIC size_t ZSTD_readBEST(const void *memPtr)
-{
-       if (ZSTD_32bits())
-               return (size_t)ZSTD_readBE32(memPtr);
-       else
-               return (size_t)ZSTD_readBE64(memPtr);
-}
-
-ZSTD_STATIC void ZSTD_writeBEST(void *memPtr, size_t val)
-{
-       if (ZSTD_32bits())
-               ZSTD_writeBE32(memPtr, (U32)val);
-       else
-               ZSTD_writeBE64(memPtr, (U64)val);
-}
-
-/* function safe only for comparisons */
-ZSTD_STATIC U32 ZSTD_readMINMATCH(const void *memPtr, U32 length)
-{
-       switch (length) {
-       default:
-       case 4: return ZSTD_read32(memPtr);
-       case 3:
-               if (ZSTD_isLittleEndian())
-                       return ZSTD_read32(memPtr) << 8;
-               else
-                       return ZSTD_read32(memPtr) >> 8;
-       }
-}
-
-#endif /* MEM_H_MODULE */
diff --git a/lib/zstd/zstd_common.c b/lib/zstd/zstd_common.c
deleted file mode 100644 (file)
index a282624..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-/*-*************************************
-*  Dependencies
-***************************************/
-#include "error_private.h"
-#include "zstd_internal.h" /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */
-#include <linux/kernel.h>
-
-/*=**************************************************************
-*  Custom allocator
-****************************************************************/
-
-#define stack_push(stack, size)                                 \
-       ({                                                      \
-               void *const ptr = ZSTD_PTR_ALIGN((stack)->ptr); \
-               (stack)->ptr = (char *)ptr + (size);            \
-               (stack)->ptr <= (stack)->end ? ptr : NULL;      \
-       })
-
-ZSTD_customMem ZSTD_initStack(void *workspace, size_t workspaceSize)
-{
-       ZSTD_customMem stackMem = {ZSTD_stackAlloc, ZSTD_stackFree, workspace};
-       ZSTD_stack *stack = (ZSTD_stack *)workspace;
-       /* Verify preconditions */
-       if (!workspace || workspaceSize < sizeof(ZSTD_stack) || workspace != ZSTD_PTR_ALIGN(workspace)) {
-               ZSTD_customMem error = {NULL, NULL, NULL};
-               return error;
-       }
-       /* Initialize the stack */
-       stack->ptr = workspace;
-       stack->end = (char *)workspace + workspaceSize;
-       stack_push(stack, sizeof(ZSTD_stack));
-       return stackMem;
-}
-
-void *ZSTD_stackAllocAll(void *opaque, size_t *size)
-{
-       ZSTD_stack *stack = (ZSTD_stack *)opaque;
-       *size = (BYTE const *)stack->end - (BYTE *)ZSTD_PTR_ALIGN(stack->ptr);
-       return stack_push(stack, *size);
-}
-
-void *ZSTD_stackAlloc(void *opaque, size_t size)
-{
-       ZSTD_stack *stack = (ZSTD_stack *)opaque;
-       return stack_push(stack, size);
-}
-void ZSTD_stackFree(void *opaque, void *address)
-{
-       (void)opaque;
-       (void)address;
-}
-
-void *ZSTD_malloc(size_t size, ZSTD_customMem customMem) { return customMem.customAlloc(customMem.opaque, size); }
-
-void ZSTD_free(void *ptr, ZSTD_customMem customMem)
-{
-       if (ptr != NULL)
-               customMem.customFree(customMem.opaque, ptr);
-}
diff --git a/lib/zstd/zstd_compress_module.c b/lib/zstd/zstd_compress_module.c
new file mode 100644 (file)
index 0000000..65548a4
--- /dev/null
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/zstd.h>
+
+#include "common/zstd_deps.h"
+#include "common/zstd_internal.h"
+
+#define ZSTD_FORWARD_IF_ERR(ret)            \
+       do {                                \
+               size_t const __ret = (ret); \
+               if (ZSTD_isError(__ret))    \
+                       return __ret;       \
+       } while (0)
+
+static size_t zstd_cctx_init(zstd_cctx *cctx, const zstd_parameters *parameters,
+       unsigned long long pledged_src_size)
+{
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_reset(
+               cctx, ZSTD_reset_session_and_parameters));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setPledgedSrcSize(
+               cctx, pledged_src_size));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_windowLog, parameters->cParams.windowLog));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_hashLog, parameters->cParams.hashLog));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_chainLog, parameters->cParams.chainLog));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_searchLog, parameters->cParams.searchLog));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_minMatch, parameters->cParams.minMatch));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_targetLength, parameters->cParams.targetLength));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_strategy, parameters->cParams.strategy));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_contentSizeFlag, parameters->fParams.contentSizeFlag));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_checksumFlag, parameters->fParams.checksumFlag));
+       ZSTD_FORWARD_IF_ERR(ZSTD_CCtx_setParameter(
+               cctx, ZSTD_c_dictIDFlag, !parameters->fParams.noDictIDFlag));
+       return 0;
+}
+
+int zstd_min_clevel(void)
+{
+       return ZSTD_minCLevel();
+}
+EXPORT_SYMBOL(zstd_min_clevel);
+
+int zstd_max_clevel(void)
+{
+       return ZSTD_maxCLevel();
+}
+EXPORT_SYMBOL(zstd_max_clevel);
+
+size_t zstd_compress_bound(size_t src_size)
+{
+       return ZSTD_compressBound(src_size);
+}
+EXPORT_SYMBOL(zstd_compress_bound);
+
+zstd_parameters zstd_get_params(int level,
+       unsigned long long estimated_src_size)
+{
+       return ZSTD_getParams(level, estimated_src_size, 0);
+}
+EXPORT_SYMBOL(zstd_get_params);
+
+size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams)
+{
+       return ZSTD_estimateCCtxSize_usingCParams(*cparams);
+}
+EXPORT_SYMBOL(zstd_cctx_workspace_bound);
+
+zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size)
+{
+       if (workspace == NULL)
+               return NULL;
+       return ZSTD_initStaticCCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_cctx);
+
+size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity,
+       const void *src, size_t src_size, const zstd_parameters *parameters)
+{
+       ZSTD_FORWARD_IF_ERR(zstd_cctx_init(cctx, parameters, src_size));
+       return ZSTD_compress2(cctx, dst, dst_capacity, src, src_size);
+}
+EXPORT_SYMBOL(zstd_compress_cctx);
+
+size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams)
+{
+       return ZSTD_estimateCStreamSize_usingCParams(*cparams);
+}
+EXPORT_SYMBOL(zstd_cstream_workspace_bound);
+
+zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters,
+       unsigned long long pledged_src_size, void *workspace, size_t workspace_size)
+{
+       zstd_cstream *cstream;
+
+       if (workspace == NULL)
+               return NULL;
+
+       cstream = ZSTD_initStaticCStream(workspace, workspace_size);
+       if (cstream == NULL)
+               return NULL;
+
+       /* 0 means unknown in linux zstd API but means 0 in new zstd API */
+       if (pledged_src_size == 0)
+               pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
+
+       if (ZSTD_isError(zstd_cctx_init(cstream, parameters, pledged_src_size)))
+               return NULL;
+
+       return cstream;
+}
+EXPORT_SYMBOL(zstd_init_cstream);
+
+size_t zstd_reset_cstream(zstd_cstream *cstream,
+       unsigned long long pledged_src_size)
+{
+       return ZSTD_resetCStream(cstream, pledged_src_size);
+}
+EXPORT_SYMBOL(zstd_reset_cstream);
+
+size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output,
+       zstd_in_buffer *input)
+{
+       return ZSTD_compressStream(cstream, output, input);
+}
+EXPORT_SYMBOL(zstd_compress_stream);
+
+size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output)
+{
+       return ZSTD_flushStream(cstream, output);
+}
+EXPORT_SYMBOL(zstd_flush_stream);
+
+size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output)
+{
+       return ZSTD_endStream(cstream, output);
+}
+EXPORT_SYMBOL(zstd_end_stream);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Compressor");
diff --git a/lib/zstd/zstd_decompress_module.c b/lib/zstd/zstd_decompress_module.c
new file mode 100644 (file)
index 0000000..f4ed952
--- /dev/null
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/zstd.h>
+
+#include "common/zstd_deps.h"
+
+/* Common symbols. zstd_compress must depend on zstd_decompress. */
+
+unsigned int zstd_is_error(size_t code)
+{
+       return ZSTD_isError(code);
+}
+EXPORT_SYMBOL(zstd_is_error);
+
+zstd_error_code zstd_get_error_code(size_t code)
+{
+       return ZSTD_getErrorCode(code);
+}
+EXPORT_SYMBOL(zstd_get_error_code);
+
+const char *zstd_get_error_name(size_t code)
+{
+       return ZSTD_getErrorName(code);
+}
+EXPORT_SYMBOL(zstd_get_error_name);
+
+/* Decompression symbols. */
+
+size_t zstd_dctx_workspace_bound(void)
+{
+       return ZSTD_estimateDCtxSize();
+}
+EXPORT_SYMBOL(zstd_dctx_workspace_bound);
+
+zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size)
+{
+       if (workspace == NULL)
+               return NULL;
+       return ZSTD_initStaticDCtx(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dctx);
+
+size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity,
+       const void *src, size_t src_size)
+{
+       return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size);
+}
+EXPORT_SYMBOL(zstd_decompress_dctx);
+
+size_t zstd_dstream_workspace_bound(size_t max_window_size)
+{
+       return ZSTD_estimateDStreamSize(max_window_size);
+}
+EXPORT_SYMBOL(zstd_dstream_workspace_bound);
+
+zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace,
+       size_t workspace_size)
+{
+       if (workspace == NULL)
+               return NULL;
+       (void)max_window_size;
+       return ZSTD_initStaticDStream(workspace, workspace_size);
+}
+EXPORT_SYMBOL(zstd_init_dstream);
+
+size_t zstd_reset_dstream(zstd_dstream *dstream)
+{
+       return ZSTD_resetDStream(dstream);
+}
+EXPORT_SYMBOL(zstd_reset_dstream);
+
+size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output,
+       zstd_in_buffer *input)
+{
+       return ZSTD_decompressStream(dstream, output, input);
+}
+EXPORT_SYMBOL(zstd_decompress_stream);
+
+size_t zstd_find_frame_compressed_size(const void *src, size_t src_size)
+{
+       return ZSTD_findFrameCompressedSize(src, src_size);
+}
+EXPORT_SYMBOL(zstd_find_frame_compressed_size);
+
+size_t zstd_get_frame_header(zstd_frame_header *header, const void *src,
+       size_t src_size)
+{
+       return ZSTD_getFrameHeader(header, src, src_size);
+}
+EXPORT_SYMBOL(zstd_get_frame_header);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Decompressor");
diff --git a/lib/zstd/zstd_internal.h b/lib/zstd/zstd_internal.h
deleted file mode 100644 (file)
index dac7533..0000000
+++ /dev/null
@@ -1,273 +0,0 @@
-/**
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-#ifndef ZSTD_CCOMMON_H_MODULE
-#define ZSTD_CCOMMON_H_MODULE
-
-/*-*******************************************************
-*  Compiler specifics
-*********************************************************/
-#define FORCE_INLINE static __always_inline
-#define FORCE_NOINLINE static noinline
-
-/*-*************************************
-*  Dependencies
-***************************************/
-#include "error_private.h"
-#include "mem.h"
-#include <linux/compiler.h>
-#include <linux/kernel.h>
-#include <linux/xxhash.h>
-#include <linux/zstd.h>
-
-/*-*************************************
-*  shared macros
-***************************************/
-#define MIN(a, b) ((a) < (b) ? (a) : (b))
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
-#define CHECK_F(f)                       \
-       {                                \
-               size_t const errcod = f; \
-               if (ERR_isError(errcod)) \
-                       return errcod;   \
-       } /* check and Forward error code */
-#define CHECK_E(f, e)                    \
-       {                                \
-               size_t const errcod = f; \
-               if (ERR_isError(errcod)) \
-                       return ERROR(e); \
-       } /* check and send Error code */
-#define ZSTD_STATIC_ASSERT(c)                                   \
-       {                                                       \
-               enum { ZSTD_static_assert = 1 / (int)(!!(c)) }; \
-       }
-
-/*-*************************************
-*  Common constants
-***************************************/
-#define ZSTD_OPT_NUM (1 << 12)
-#define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7+ */
-
-#define ZSTD_REP_NUM 3               /* number of repcodes */
-#define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */
-#define ZSTD_REP_MOVE (ZSTD_REP_NUM - 1)
-#define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM)
-static const U32 repStartValue[ZSTD_REP_NUM] = {1, 4, 8};
-
-#define KB *(1 << 10)
-#define MB *(1 << 20)
-#define GB *(1U << 30)
-
-#define BIT7 128
-#define BIT6 64
-#define BIT5 32
-#define BIT4 16
-#define BIT1 2
-#define BIT0 1
-
-#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
-static const size_t ZSTD_fcs_fieldSize[4] = {0, 2, 4, 8};
-static const size_t ZSTD_did_fieldSize[4] = {0, 1, 2, 4};
-
-#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
-static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
-typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
-
-#define MIN_SEQUENCES_SIZE 1                                                                     /* nbSeq==0 */
-#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
-
-#define HufLog 12
-typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
-
-#define LONGNBSEQ 0x7F00
-
-#define MINMATCH 3
-#define EQUAL_READ32 4
-
-#define Litbits 8
-#define MaxLit ((1 << Litbits) - 1)
-#define MaxML 52
-#define MaxLL 35
-#define MaxOff 28
-#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
-#define MLFSELog 9
-#define LLFSELog 9
-#define OffFSELog 8
-
-static const U32 LL_bits[MaxLL + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-static const S16 LL_defaultNorm[MaxLL + 1] = {4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1};
-#define LL_DEFAULTNORMLOG 6 /* for static allocation */
-static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
-
-static const U32 ML_bits[MaxML + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0, 0,
-                                      0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
-static const S16 ML_defaultNorm[MaxML + 1] = {1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  1,  1,  1,  1,  1,  1, 1,
-                                             1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1};
-#define ML_DEFAULTNORMLOG 6 /* for static allocation */
-static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
-
-static const S16 OF_defaultNorm[MaxOff + 1] = {1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1};
-#define OF_DEFAULTNORMLOG 5 /* for static allocation */
-static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
-
-/*-*******************************************
-*  Shared functions to include for inlining
-*********************************************/
-ZSTD_STATIC void ZSTD_copy8(void *dst, const void *src) {
-       /*
-        * zstd relies heavily on gcc being able to analyze and inline this
-        * memcpy() call, since it is called in a tight loop. Preboot mode
-        * is compiled in freestanding mode, which stops gcc from analyzing
-        * memcpy(). Use __builtin_memcpy() to tell gcc to analyze this as a
-        * regular memcpy().
-        */
-       __builtin_memcpy(dst, src, 8);
-}
-/*! ZSTD_wildcopy() :
-*   custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
-#define WILDCOPY_OVERLENGTH 8
-ZSTD_STATIC void ZSTD_wildcopy(void *dst, const void *src, ptrdiff_t length)
-{
-       const BYTE* ip = (const BYTE*)src;
-       BYTE* op = (BYTE*)dst;
-       BYTE* const oend = op + length;
-#if defined(GCC_VERSION) && GCC_VERSION >= 70000 && GCC_VERSION < 70200
-       /*
-        * Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388.
-        * Avoid the bad case where the loop only runs once by handling the
-        * special case separately. This doesn't trigger the bug because it
-        * doesn't involve pointer/integer overflow.
-        */
-       if (length <= 8)
-               return ZSTD_copy8(dst, src);
-#endif
-       do {
-               ZSTD_copy8(op, ip);
-               op += 8;
-               ip += 8;
-       } while (op < oend);
-}
-
-/*-*******************************************
-*  Private interfaces
-*********************************************/
-typedef struct ZSTD_stats_s ZSTD_stats_t;
-
-typedef struct {
-       U32 off;
-       U32 len;
-} ZSTD_match_t;
-
-typedef struct {
-       U32 price;
-       U32 off;
-       U32 mlen;
-       U32 litlen;
-       U32 rep[ZSTD_REP_NUM];
-} ZSTD_optimal_t;
-
-typedef struct seqDef_s {
-       U32 offset;
-       U16 litLength;
-       U16 matchLength;
-} seqDef;
-
-typedef struct {
-       seqDef *sequencesStart;
-       seqDef *sequences;
-       BYTE *litStart;
-       BYTE *lit;
-       BYTE *llCode;
-       BYTE *mlCode;
-       BYTE *ofCode;
-       U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
-       U32 longLengthPos;
-       /* opt */
-       ZSTD_optimal_t *priceTable;
-       ZSTD_match_t *matchTable;
-       U32 *matchLengthFreq;
-       U32 *litLengthFreq;
-       U32 *litFreq;
-       U32 *offCodeFreq;
-       U32 matchLengthSum;
-       U32 matchSum;
-       U32 litLengthSum;
-       U32 litSum;
-       U32 offCodeSum;
-       U32 log2matchLengthSum;
-       U32 log2matchSum;
-       U32 log2litLengthSum;
-       U32 log2litSum;
-       U32 log2offCodeSum;
-       U32 factor;
-       U32 staticPrices;
-       U32 cachedPrice;
-       U32 cachedLitLength;
-       const BYTE *cachedLiterals;
-} seqStore_t;
-
-const seqStore_t *ZSTD_getSeqStore(const ZSTD_CCtx *ctx);
-void ZSTD_seqToCodes(const seqStore_t *seqStorePtr);
-int ZSTD_isSkipFrame(ZSTD_DCtx *dctx);
-
-/*= Custom memory allocation functions */
-typedef void *(*ZSTD_allocFunction)(void *opaque, size_t size);
-typedef void (*ZSTD_freeFunction)(void *opaque, void *address);
-typedef struct {
-       ZSTD_allocFunction customAlloc;
-       ZSTD_freeFunction customFree;
-       void *opaque;
-} ZSTD_customMem;
-
-void *ZSTD_malloc(size_t size, ZSTD_customMem customMem);
-void ZSTD_free(void *ptr, ZSTD_customMem customMem);
-
-/*====== stack allocation  ======*/
-
-typedef struct {
-       void *ptr;
-       const void *end;
-} ZSTD_stack;
-
-#define ZSTD_ALIGN(x) ALIGN(x, sizeof(size_t))
-#define ZSTD_PTR_ALIGN(p) PTR_ALIGN(p, sizeof(size_t))
-
-ZSTD_customMem ZSTD_initStack(void *workspace, size_t workspaceSize);
-
-void *ZSTD_stackAllocAll(void *opaque, size_t *size);
-void *ZSTD_stackAlloc(void *opaque, size_t size);
-void ZSTD_stackFree(void *opaque, void *address);
-
-/*======  common function  ======*/
-
-ZSTD_STATIC U32 ZSTD_highbit32(U32 val) { return 31 - __builtin_clz(val); }
-
-/* hidden functions */
-
-/* ZSTD_invalidateRepCodes() :
- * ensures next compression will not use repcodes from previous block.
- * Note : only works with regular variant;
- *        do not use with extDict variant ! */
-void ZSTD_invalidateRepCodes(ZSTD_CCtx *cctx);
-
-size_t ZSTD_freeCCtx(ZSTD_CCtx *cctx);
-size_t ZSTD_freeDCtx(ZSTD_DCtx *dctx);
-size_t ZSTD_freeCDict(ZSTD_CDict *cdict);
-size_t ZSTD_freeDDict(ZSTD_DDict *cdict);
-size_t ZSTD_freeCStream(ZSTD_CStream *zcs);
-size_t ZSTD_freeDStream(ZSTD_DStream *zds);
-
-#endif /* ZSTD_CCOMMON_H_MODULE */
diff --git a/lib/zstd/zstd_opt.h b/lib/zstd/zstd_opt.h
deleted file mode 100644 (file)
index 55e1b4c..0000000
+++ /dev/null
@@ -1,1014 +0,0 @@
-/**
- * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
- * All rights reserved.
- *
- * This source code is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of https://github.com/facebook/zstd.
- * An additional grant of patent rights can be found in the PATENTS file in the
- * same directory.
- *
- * This program is free software; you can redistribute it and/or modify it under
- * the terms of the GNU General Public License version 2 as published by the
- * Free Software Foundation. This program is dual-licensed; you may select
- * either version 2 of the GNU General Public License ("GPL") or BSD license
- * ("BSD").
- */
-
-/* Note : this file is intended to be included within zstd_compress.c */
-
-#ifndef ZSTD_OPT_H_91842398743
-#define ZSTD_OPT_H_91842398743
-
-#define ZSTD_LITFREQ_ADD 2
-#define ZSTD_FREQ_DIV 4
-#define ZSTD_MAX_PRICE (1 << 30)
-
-/*-*************************************
-*  Price functions for optimal parser
-***************************************/
-FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t *ssPtr)
-{
-       ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum + 1);
-       ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum + 1);
-       ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum + 1);
-       ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum + 1);
-       ssPtr->factor = 1 + ((ssPtr->litSum >> 5) / ssPtr->litLengthSum) + ((ssPtr->litSum << 1) / (ssPtr->litSum + ssPtr->matchSum));
-}
-
-ZSTD_STATIC void ZSTD_rescaleFreqs(seqStore_t *ssPtr, const BYTE *src, size_t srcSize)
-{
-       unsigned u;
-
-       ssPtr->cachedLiterals = NULL;
-       ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
-       ssPtr->staticPrices = 0;
-
-       if (ssPtr->litLengthSum == 0) {
-               if (srcSize <= 1024)
-                       ssPtr->staticPrices = 1;
-
-               for (u = 0; u <= MaxLit; u++)
-                       ssPtr->litFreq[u] = 0;
-               for (u = 0; u < srcSize; u++)
-                       ssPtr->litFreq[src[u]]++;
-
-               ssPtr->litSum = 0;
-               ssPtr->litLengthSum = MaxLL + 1;
-               ssPtr->matchLengthSum = MaxML + 1;
-               ssPtr->offCodeSum = (MaxOff + 1);
-               ssPtr->matchSum = (ZSTD_LITFREQ_ADD << Litbits);
-
-               for (u = 0; u <= MaxLit; u++) {
-                       ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u] >> ZSTD_FREQ_DIV);
-                       ssPtr->litSum += ssPtr->litFreq[u];
-               }
-               for (u = 0; u <= MaxLL; u++)
-                       ssPtr->litLengthFreq[u] = 1;
-               for (u = 0; u <= MaxML; u++)
-                       ssPtr->matchLengthFreq[u] = 1;
-               for (u = 0; u <= MaxOff; u++)
-                       ssPtr->offCodeFreq[u] = 1;
-       } else {
-               ssPtr->matchLengthSum = 0;
-               ssPtr->litLengthSum = 0;
-               ssPtr->offCodeSum = 0;
-               ssPtr->matchSum = 0;
-               ssPtr->litSum = 0;
-
-               for (u = 0; u <= MaxLit; u++) {
-                       ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u] >> (ZSTD_FREQ_DIV + 1));
-                       ssPtr->litSum += ssPtr->litFreq[u];
-               }
-               for (u = 0; u <= MaxLL; u++) {
-                       ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u] >> (ZSTD_FREQ_DIV + 1));
-                       ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
-               }
-               for (u = 0; u <= MaxML; u++) {
-                       ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u] >> ZSTD_FREQ_DIV);
-                       ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
-                       ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
-               }
-               ssPtr->matchSum *= ZSTD_LITFREQ_ADD;
-               for (u = 0; u <= MaxOff; u++) {
-                       ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u] >> ZSTD_FREQ_DIV);
-                       ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
-               }
-       }
-
-       ZSTD_setLog2Prices(ssPtr);
-}
-
-FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t *ssPtr, U32 litLength, const BYTE *literals)
-{
-       U32 price, u;
-
-       if (ssPtr->staticPrices)
-               return ZSTD_highbit32((U32)litLength + 1) + (litLength * 6);
-
-       if (litLength == 0)
-               return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0] + 1);
-
-       /* literals */
-       if (ssPtr->cachedLiterals == literals) {
-               U32 const additional = litLength - ssPtr->cachedLitLength;
-               const BYTE *literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength;
-               price = ssPtr->cachedPrice + additional * ssPtr->log2litSum;
-               for (u = 0; u < additional; u++)
-                       price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]] + 1);
-               ssPtr->cachedPrice = price;
-               ssPtr->cachedLitLength = litLength;
-       } else {
-               price = litLength * ssPtr->log2litSum;
-               for (u = 0; u < litLength; u++)
-                       price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]] + 1);
-
-               if (litLength >= 12) {
-                       ssPtr->cachedLiterals = literals;
-                       ssPtr->cachedPrice = price;
-                       ssPtr->cachedLitLength = litLength;
-               }
-       }
-
-       /* literal Length */
-       {
-               const BYTE LL_deltaCode = 19;
-               const BYTE llCode = (litLength > 63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
-               price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode] + 1);
-       }
-
-       return price;
-}
-
-FORCE_INLINE U32 ZSTD_getPrice(seqStore_t *seqStorePtr, U32 litLength, const BYTE *literals, U32 offset, U32 matchLength, const int ultra)
-{
-       /* offset */
-       U32 price;
-       BYTE const offCode = (BYTE)ZSTD_highbit32(offset + 1);
-
-       if (seqStorePtr->staticPrices)
-               return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength + 1) + 16 + offCode;
-
-       price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode] + 1);
-       if (!ultra && offCode >= 20)
-               price += (offCode - 19) * 2;
-
-       /* match Length */
-       {
-               const BYTE ML_deltaCode = 36;
-               const BYTE mlCode = (matchLength > 127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
-               price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode] + 1);
-       }
-
-       return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor;
-}
-
-ZSTD_STATIC void ZSTD_updatePrice(seqStore_t *seqStorePtr, U32 litLength, const BYTE *literals, U32 offset, U32 matchLength)
-{
-       U32 u;
-
-       /* literals */
-       seqStorePtr->litSum += litLength * ZSTD_LITFREQ_ADD;
-       for (u = 0; u < litLength; u++)
-               seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD;
-
-       /* literal Length */
-       {
-               const BYTE LL_deltaCode = 19;
-               const BYTE llCode = (litLength > 63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
-               seqStorePtr->litLengthFreq[llCode]++;
-               seqStorePtr->litLengthSum++;
-       }
-
-       /* match offset */
-       {
-               BYTE const offCode = (BYTE)ZSTD_highbit32(offset + 1);
-               seqStorePtr->offCodeSum++;
-               seqStorePtr->offCodeFreq[offCode]++;
-       }
-
-       /* match Length */
-       {
-               const BYTE ML_deltaCode = 36;
-               const BYTE mlCode = (matchLength > 127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
-               seqStorePtr->matchLengthFreq[mlCode]++;
-               seqStorePtr->matchLengthSum++;
-       }
-
-       ZSTD_setLog2Prices(seqStorePtr);
-}
-
-#define SET_PRICE(pos, mlen_, offset_, litlen_, price_)           \
-       {                                                         \
-               while (last_pos < pos) {                          \
-                       opt[last_pos + 1].price = ZSTD_MAX_PRICE; \
-                       last_pos++;                               \
-               }                                                 \
-               opt[pos].mlen = mlen_;                            \
-               opt[pos].off = offset_;                           \
-               opt[pos].litlen = litlen_;                        \
-               opt[pos].price = price_;                          \
-       }
-
-/* Update hashTable3 up to ip (excluded)
-   Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE
-U32 ZSTD_insertAndFindFirstIndexHash3(ZSTD_CCtx *zc, const BYTE *ip)
-{
-       U32 *const hashTable3 = zc->hashTable3;
-       U32 const hashLog3 = zc->hashLog3;
-       const BYTE *const base = zc->base;
-       U32 idx = zc->nextToUpdate3;
-       const U32 target = zc->nextToUpdate3 = (U32)(ip - base);
-       const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3);
-
-       while (idx < target) {
-               hashTable3[ZSTD_hash3Ptr(base + idx, hashLog3)] = idx;
-               idx++;
-       }
-
-       return hashTable3[hash3];
-}
-
-/*-*************************************
-*  Binary Tree search
-***************************************/
-static U32 ZSTD_insertBtAndGetAllMatches(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, U32 nbCompares, const U32 mls, U32 extDict,
-                                        ZSTD_match_t *matches, const U32 minMatchLen)
-{
-       const BYTE *const base = zc->base;
-       const U32 curr = (U32)(ip - base);
-       const U32 hashLog = zc->params.cParams.hashLog;
-       const size_t h = ZSTD_hashPtr(ip, hashLog, mls);
-       U32 *const hashTable = zc->hashTable;
-       U32 matchIndex = hashTable[h];
-       U32 *const bt = zc->chainTable;
-       const U32 btLog = zc->params.cParams.chainLog - 1;
-       const U32 btMask = (1U << btLog) - 1;
-       size_t commonLengthSmaller = 0, commonLengthLarger = 0;
-       const BYTE *const dictBase = zc->dictBase;
-       const U32 dictLimit = zc->dictLimit;
-       const BYTE *const dictEnd = dictBase + dictLimit;
-       const BYTE *const prefixStart = base + dictLimit;
-       const U32 btLow = btMask >= curr ? 0 : curr - btMask;
-       const U32 windowLow = zc->lowLimit;
-       U32 *smallerPtr = bt + 2 * (curr & btMask);
-       U32 *largerPtr = bt + 2 * (curr & btMask) + 1;
-       U32 matchEndIdx = curr + 8;
-       U32 dummy32; /* to be nullified at the end */
-       U32 mnum = 0;
-
-       const U32 minMatch = (mls == 3) ? 3 : 4;
-       size_t bestLength = minMatchLen - 1;
-
-       if (minMatch == 3) { /* HC3 match finder */
-               U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(zc, ip);
-               if (matchIndex3 > windowLow && (curr - matchIndex3 < (1 << 18))) {
-                       const BYTE *match;
-                       size_t currMl = 0;
-                       if ((!extDict) || matchIndex3 >= dictLimit) {
-                               match = base + matchIndex3;
-                               if (match[bestLength] == ip[bestLength])
-                                       currMl = ZSTD_count(ip, match, iLimit);
-                       } else {
-                               match = dictBase + matchIndex3;
-                               if (ZSTD_readMINMATCH(match, MINMATCH) ==
-                                   ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
-                                       currMl = ZSTD_count_2segments(ip + MINMATCH, match + MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
-                       }
-
-                       /* save best solution */
-                       if (currMl > bestLength) {
-                               bestLength = currMl;
-                               matches[mnum].off = ZSTD_REP_MOVE_OPT + curr - matchIndex3;
-                               matches[mnum].len = (U32)currMl;
-                               mnum++;
-                               if (currMl > ZSTD_OPT_NUM)
-                                       goto update;
-                               if (ip + currMl == iLimit)
-                                       goto update; /* best possible, and avoid read overflow*/
-                       }
-               }
-       }
-
-       hashTable[h] = curr; /* Update Hash Table */
-
-       while (nbCompares-- && (matchIndex > windowLow)) {
-               U32 *nextPtr = bt + 2 * (matchIndex & btMask);
-               size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
-               const BYTE *match;
-
-               if ((!extDict) || (matchIndex + matchLength >= dictLimit)) {
-                       match = base + matchIndex;
-                       if (match[matchLength] == ip[matchLength]) {
-                               matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iLimit) + 1;
-                       }
-               } else {
-                       match = dictBase + matchIndex;
-                       matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iLimit, dictEnd, prefixStart);
-                       if (matchIndex + matchLength >= dictLimit)
-                               match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
-               }
-
-               if (matchLength > bestLength) {
-                       if (matchLength > matchEndIdx - matchIndex)
-                               matchEndIdx = matchIndex + (U32)matchLength;
-                       bestLength = matchLength;
-                       matches[mnum].off = ZSTD_REP_MOVE_OPT + curr - matchIndex;
-                       matches[mnum].len = (U32)matchLength;
-                       mnum++;
-                       if (matchLength > ZSTD_OPT_NUM)
-                               break;
-                       if (ip + matchLength == iLimit) /* equal : no way to know if inf or sup */
-                               break;                  /* drop, to guarantee consistency (miss a little bit of compression) */
-               }
-
-               if (match[matchLength] < ip[matchLength]) {
-                       /* match is smaller than curr */
-                       *smallerPtr = matchIndex;         /* update smaller idx */
-                       commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
-                       if (matchIndex <= btLow) {
-                               smallerPtr = &dummy32;
-                               break;
-                       }                         /* beyond tree size, stop the search */
-                       smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */
-                       matchIndex = nextPtr[1];  /* new matchIndex larger than previous (closer to curr) */
-               } else {
-                       /* match is larger than curr */
-                       *largerPtr = matchIndex;
-                       commonLengthLarger = matchLength;
-                       if (matchIndex <= btLow) {
-                               largerPtr = &dummy32;
-                               break;
-                       } /* beyond tree size, stop the search */
-                       largerPtr = nextPtr;
-                       matchIndex = nextPtr[0];
-               }
-       }
-
-       *smallerPtr = *largerPtr = 0;
-
-update:
-       zc->nextToUpdate = (matchEndIdx > curr + 8) ? matchEndIdx - 8 : curr + 1;
-       return mnum;
-}
-
-/** Tree updater, providing best match */
-static U32 ZSTD_BtGetAllMatches(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, const U32 maxNbAttempts, const U32 mls, ZSTD_match_t *matches,
-                               const U32 minMatchLen)
-{
-       if (ip < zc->base + zc->nextToUpdate)
-               return 0; /* skipped area */
-       ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
-       return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen);
-}
-
-static U32 ZSTD_BtGetAllMatches_selectMLS(ZSTD_CCtx *zc, /* Index table will be updated */
-                                         const BYTE *ip, const BYTE *const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch,
-                                         ZSTD_match_t *matches, const U32 minMatchLen)
-{
-       switch (matchLengthSearch) {
-       case 3: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
-       default:
-       case 4: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
-       case 5: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
-       case 7:
-       case 6: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
-       }
-}
-
-/** Tree updater, providing best match */
-static U32 ZSTD_BtGetAllMatches_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, const U32 maxNbAttempts, const U32 mls,
-                                       ZSTD_match_t *matches, const U32 minMatchLen)
-{
-       if (ip < zc->base + zc->nextToUpdate)
-               return 0; /* skipped area */
-       ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
-       return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen);
-}
-
-static U32 ZSTD_BtGetAllMatches_selectMLS_extDict(ZSTD_CCtx *zc, /* Index table will be updated */
-                                                 const BYTE *ip, const BYTE *const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch,
-                                                 ZSTD_match_t *matches, const U32 minMatchLen)
-{
-       switch (matchLengthSearch) {
-       case 3: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
-       default:
-       case 4: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
-       case 5: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
-       case 7:
-       case 6: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
-       }
-}
-
-/*-*******************************
-*  Optimal parser
-*********************************/
-FORCE_INLINE
-void ZSTD_compressBlock_opt_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const int ultra)
-{
-       seqStore_t *seqStorePtr = &(ctx->seqStore);
-       const BYTE *const istart = (const BYTE *)src;
-       const BYTE *ip = istart;
-       const BYTE *anchor = istart;
-       const BYTE *const iend = istart + srcSize;
-       const BYTE *const ilimit = iend - 8;
-       const BYTE *const base = ctx->base;
-       const BYTE *const prefixStart = base + ctx->dictLimit;
-
-       const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
-       const U32 sufficient_len = ctx->params.cParams.targetLength;
-       const U32 mls = ctx->params.cParams.searchLength;
-       const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
-
-       ZSTD_optimal_t *opt = seqStorePtr->priceTable;
-       ZSTD_match_t *matches = seqStorePtr->matchTable;
-       const BYTE *inr;
-       U32 offset, rep[ZSTD_REP_NUM];
-
-       /* init */
-       ctx->nextToUpdate3 = ctx->nextToUpdate;
-       ZSTD_rescaleFreqs(seqStorePtr, (const BYTE *)src, srcSize);
-       ip += (ip == prefixStart);
-       {
-               U32 i;
-               for (i = 0; i < ZSTD_REP_NUM; i++)
-                       rep[i] = ctx->rep[i];
-       }
-
-       /* Match Loop */
-       while (ip < ilimit) {
-               U32 cur, match_num, last_pos, litlen, price;
-               U32 u, mlen, best_mlen, best_off, litLength;
-               memset(opt, 0, sizeof(ZSTD_optimal_t));
-               last_pos = 0;
-               litlen = (U32)(ip - anchor);
-
-               /* check repCode */
-               {
-                       U32 i, last_i = ZSTD_REP_CHECK + (ip == anchor);
-                       for (i = (ip == anchor); i < last_i; i++) {
-                               const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
-                               if ((repCur > 0) && (repCur < (S32)(ip - prefixStart)) &&
-                                   (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) {
-                                       mlen = (U32)ZSTD_count(ip + minMatch, ip + minMatch - repCur, iend) + minMatch;
-                                       if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
-                                               best_mlen = mlen;
-                                               best_off = i;
-                                               cur = 0;
-                                               last_pos = 1;
-                                               goto _storeSequence;
-                                       }
-                                       best_off = i - (ip == anchor);
-                                       do {
-                                               price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
-                                               if (mlen > last_pos || price < opt[mlen].price)
-                                                       SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
-                                               mlen--;
-                                       } while (mlen >= minMatch);
-                               }
-                       }
-               }
-
-               match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch);
-
-               if (!last_pos && !match_num) {
-                       ip++;
-                       continue;
-               }
-
-               if (match_num && (matches[match_num - 1].len > sufficient_len || matches[match_num - 1].len >= ZSTD_OPT_NUM)) {
-                       best_mlen = matches[match_num - 1].len;
-                       best_off = matches[match_num - 1].off;
-                       cur = 0;
-                       last_pos = 1;
-                       goto _storeSequence;
-               }
-
-               /* set prices using matches at position = 0 */
-               best_mlen = (last_pos) ? last_pos : minMatch;
-               for (u = 0; u < match_num; u++) {
-                       mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen;
-                       best_mlen = matches[u].len;
-                       while (mlen <= best_mlen) {
-                               price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra);
-                               if (mlen > last_pos || price < opt[mlen].price)
-                                       SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */
-                               mlen++;
-                       }
-               }
-
-               if (last_pos < minMatch) {
-                       ip++;
-                       continue;
-               }
-
-               /* initialize opt[0] */
-               {
-                       U32 i;
-                       for (i = 0; i < ZSTD_REP_NUM; i++)
-                               opt[0].rep[i] = rep[i];
-               }
-               opt[0].mlen = 1;
-               opt[0].litlen = litlen;
-
-               /* check further positions */
-               for (cur = 1; cur <= last_pos; cur++) {
-                       inr = ip + cur;
-
-                       if (opt[cur - 1].mlen == 1) {
-                               litlen = opt[cur - 1].litlen + 1;
-                               if (cur > litlen) {
-                                       price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - litlen);
-                               } else
-                                       price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
-                       } else {
-                               litlen = 1;
-                               price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - 1);
-                       }
-
-                       if (cur > last_pos || price <= opt[cur].price)
-                               SET_PRICE(cur, 1, 0, litlen, price);
-
-                       if (cur == last_pos)
-                               break;
-
-                       if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
-                               continue;
-
-                       mlen = opt[cur].mlen;
-                       if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
-                               opt[cur].rep[2] = opt[cur - mlen].rep[1];
-                               opt[cur].rep[1] = opt[cur - mlen].rep[0];
-                               opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
-                       } else {
-                               opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur - mlen].rep[1] : opt[cur - mlen].rep[2];
-                               opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur - mlen].rep[0] : opt[cur - mlen].rep[1];
-                               opt[cur].rep[0] =
-                                   ((opt[cur].off == ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur - mlen].rep[0] - 1) : (opt[cur - mlen].rep[opt[cur].off]);
-                       }
-
-                       best_mlen = minMatch;
-                       {
-                               U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
-                               for (i = (opt[cur].mlen != 1); i < last_i; i++) { /* check rep */
-                                       const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
-                                       if ((repCur > 0) && (repCur < (S32)(inr - prefixStart)) &&
-                                           (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) {
-                                               mlen = (U32)ZSTD_count(inr + minMatch, inr + minMatch - repCur, iend) + minMatch;
-
-                                               if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
-                                                       best_mlen = mlen;
-                                                       best_off = i;
-                                                       last_pos = cur + 1;
-                                                       goto _storeSequence;
-                                               }
-
-                                               best_off = i - (opt[cur].mlen != 1);
-                                               if (mlen > best_mlen)
-                                                       best_mlen = mlen;
-
-                                               do {
-                                                       if (opt[cur].mlen == 1) {
-                                                               litlen = opt[cur].litlen;
-                                                               if (cur > litlen) {
-                                                                       price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr - litlen,
-                                                                                                                       best_off, mlen - MINMATCH, ultra);
-                                                               } else
-                                                                       price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
-                                                       } else {
-                                                               litlen = 0;
-                                                               price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
-                                                       }
-
-                                                       if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
-                                                               SET_PRICE(cur + mlen, mlen, i, litlen, price);
-                                                       mlen--;
-                                               } while (mlen >= minMatch);
-                                       }
-                               }
-                       }
-
-                       match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen);
-
-                       if (match_num > 0 && (matches[match_num - 1].len > sufficient_len || cur + matches[match_num - 1].len >= ZSTD_OPT_NUM)) {
-                               best_mlen = matches[match_num - 1].len;
-                               best_off = matches[match_num - 1].off;
-                               last_pos = cur + 1;
-                               goto _storeSequence;
-                       }
-
-                       /* set prices using matches at position = cur */
-                       for (u = 0; u < match_num; u++) {
-                               mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen;
-                               best_mlen = matches[u].len;
-
-                               while (mlen <= best_mlen) {
-                                       if (opt[cur].mlen == 1) {
-                                               litlen = opt[cur].litlen;
-                                               if (cur > litlen)
-                                                       price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip + cur - litlen,
-                                                                                                       matches[u].off - 1, mlen - MINMATCH, ultra);
-                                               else
-                                                       price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra);
-                                       } else {
-                                               litlen = 0;
-                                               price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off - 1, mlen - MINMATCH, ultra);
-                                       }
-
-                                       if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
-                                               SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
-
-                                       mlen++;
-                               }
-                       }
-               }
-
-               best_mlen = opt[last_pos].mlen;
-               best_off = opt[last_pos].off;
-               cur = last_pos - best_mlen;
-
-       /* store sequence */
-_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
-               opt[0].mlen = 1;
-
-               while (1) {
-                       mlen = opt[cur].mlen;
-                       offset = opt[cur].off;
-                       opt[cur].mlen = best_mlen;
-                       opt[cur].off = best_off;
-                       best_mlen = mlen;
-                       best_off = offset;
-                       if (mlen > cur)
-                               break;
-                       cur -= mlen;
-               }
-
-               for (u = 0; u <= last_pos;) {
-                       u += opt[u].mlen;
-               }
-
-               for (cur = 0; cur < last_pos;) {
-                       mlen = opt[cur].mlen;
-                       if (mlen == 1) {
-                               ip++;
-                               cur++;
-                               continue;
-                       }
-                       offset = opt[cur].off;
-                       cur += mlen;
-                       litLength = (U32)(ip - anchor);
-
-                       if (offset > ZSTD_REP_MOVE_OPT) {
-                               rep[2] = rep[1];
-                               rep[1] = rep[0];
-                               rep[0] = offset - ZSTD_REP_MOVE_OPT;
-                               offset--;
-                       } else {
-                               if (offset != 0) {
-                                       best_off = (offset == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
-                                       if (offset != 1)
-                                               rep[2] = rep[1];
-                                       rep[1] = rep[0];
-                                       rep[0] = best_off;
-                               }
-                               if (litLength == 0)
-                                       offset--;
-                       }
-
-                       ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH);
-                       ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH);
-                       anchor = ip = ip + mlen;
-               }
-       } /* for (cur=0; cur < last_pos; ) */
-
-       /* Save reps for next block */
-       {
-               int i;
-               for (i = 0; i < ZSTD_REP_NUM; i++)
-                       ctx->repToConfirm[i] = rep[i];
-       }
-
-       /* Last Literals */
-       {
-               size_t const lastLLSize = iend - anchor;
-               memcpy(seqStorePtr->lit, anchor, lastLLSize);
-               seqStorePtr->lit += lastLLSize;
-       }
-}
-
-FORCE_INLINE
-void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const int ultra)
-{
-       seqStore_t *seqStorePtr = &(ctx->seqStore);
-       const BYTE *const istart = (const BYTE *)src;
-       const BYTE *ip = istart;
-       const BYTE *anchor = istart;
-       const BYTE *const iend = istart + srcSize;
-       const BYTE *const ilimit = iend - 8;
-       const BYTE *const base = ctx->base;
-       const U32 lowestIndex = ctx->lowLimit;
-       const U32 dictLimit = ctx->dictLimit;
-       const BYTE *const prefixStart = base + dictLimit;
-       const BYTE *const dictBase = ctx->dictBase;
-       const BYTE *const dictEnd = dictBase + dictLimit;
-
-       const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
-       const U32 sufficient_len = ctx->params.cParams.targetLength;
-       const U32 mls = ctx->params.cParams.searchLength;
-       const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
-
-       ZSTD_optimal_t *opt = seqStorePtr->priceTable;
-       ZSTD_match_t *matches = seqStorePtr->matchTable;
-       const BYTE *inr;
-
-       /* init */
-       U32 offset, rep[ZSTD_REP_NUM];
-       {
-               U32 i;
-               for (i = 0; i < ZSTD_REP_NUM; i++)
-                       rep[i] = ctx->rep[i];
-       }
-
-       ctx->nextToUpdate3 = ctx->nextToUpdate;
-       ZSTD_rescaleFreqs(seqStorePtr, (const BYTE *)src, srcSize);
-       ip += (ip == prefixStart);
-
-       /* Match Loop */
-       while (ip < ilimit) {
-               U32 cur, match_num, last_pos, litlen, price;
-               U32 u, mlen, best_mlen, best_off, litLength;
-               U32 curr = (U32)(ip - base);
-               memset(opt, 0, sizeof(ZSTD_optimal_t));
-               last_pos = 0;
-               opt[0].litlen = (U32)(ip - anchor);
-
-               /* check repCode */
-               {
-                       U32 i, last_i = ZSTD_REP_CHECK + (ip == anchor);
-                       for (i = (ip == anchor); i < last_i; i++) {
-                               const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i];
-                               const U32 repIndex = (U32)(curr - repCur);
-                               const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-                               const BYTE *const repMatch = repBase + repIndex;
-                               if ((repCur > 0 && repCur <= (S32)curr) &&
-                                   (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-                                   && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch))) {
-                                       /* repcode detected we should take it */
-                                       const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                                       mlen = (U32)ZSTD_count_2segments(ip + minMatch, repMatch + minMatch, iend, repEnd, prefixStart) + minMatch;
-
-                                       if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
-                                               best_mlen = mlen;
-                                               best_off = i;
-                                               cur = 0;
-                                               last_pos = 1;
-                                               goto _storeSequence;
-                                       }
-
-                                       best_off = i - (ip == anchor);
-                                       litlen = opt[0].litlen;
-                                       do {
-                                               price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
-                                               if (mlen > last_pos || price < opt[mlen].price)
-                                                       SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
-                                               mlen--;
-                                       } while (mlen >= minMatch);
-                               }
-                       }
-               }
-
-               match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */
-
-               if (!last_pos && !match_num) {
-                       ip++;
-                       continue;
-               }
-
-               {
-                       U32 i;
-                       for (i = 0; i < ZSTD_REP_NUM; i++)
-                               opt[0].rep[i] = rep[i];
-               }
-               opt[0].mlen = 1;
-
-               if (match_num && (matches[match_num - 1].len > sufficient_len || matches[match_num - 1].len >= ZSTD_OPT_NUM)) {
-                       best_mlen = matches[match_num - 1].len;
-                       best_off = matches[match_num - 1].off;
-                       cur = 0;
-                       last_pos = 1;
-                       goto _storeSequence;
-               }
-
-               best_mlen = (last_pos) ? last_pos : minMatch;
-
-               /* set prices using matches at position = 0 */
-               for (u = 0; u < match_num; u++) {
-                       mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen;
-                       best_mlen = matches[u].len;
-                       litlen = opt[0].litlen;
-                       while (mlen <= best_mlen) {
-                               price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra);
-                               if (mlen > last_pos || price < opt[mlen].price)
-                                       SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
-                               mlen++;
-                       }
-               }
-
-               if (last_pos < minMatch) {
-                       ip++;
-                       continue;
-               }
-
-               /* check further positions */
-               for (cur = 1; cur <= last_pos; cur++) {
-                       inr = ip + cur;
-
-                       if (opt[cur - 1].mlen == 1) {
-                               litlen = opt[cur - 1].litlen + 1;
-                               if (cur > litlen) {
-                                       price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - litlen);
-                               } else
-                                       price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
-                       } else {
-                               litlen = 1;
-                               price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - 1);
-                       }
-
-                       if (cur > last_pos || price <= opt[cur].price)
-                               SET_PRICE(cur, 1, 0, litlen, price);
-
-                       if (cur == last_pos)
-                               break;
-
-                       if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
-                               continue;
-
-                       mlen = opt[cur].mlen;
-                       if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
-                               opt[cur].rep[2] = opt[cur - mlen].rep[1];
-                               opt[cur].rep[1] = opt[cur - mlen].rep[0];
-                               opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
-                       } else {
-                               opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur - mlen].rep[1] : opt[cur - mlen].rep[2];
-                               opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur - mlen].rep[0] : opt[cur - mlen].rep[1];
-                               opt[cur].rep[0] =
-                                   ((opt[cur].off == ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur - mlen].rep[0] - 1) : (opt[cur - mlen].rep[opt[cur].off]);
-                       }
-
-                       best_mlen = minMatch;
-                       {
-                               U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
-                               for (i = (mlen != 1); i < last_i; i++) {
-                                       const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
-                                       const U32 repIndex = (U32)(curr + cur - repCur);
-                                       const BYTE *const repBase = repIndex < dictLimit ? dictBase : base;
-                                       const BYTE *const repMatch = repBase + repIndex;
-                                       if ((repCur > 0 && repCur <= (S32)(curr + cur)) &&
-                                           (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
-                                           && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch))) {
-                                               /* repcode detected */
-                                               const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend;
-                                               mlen = (U32)ZSTD_count_2segments(inr + minMatch, repMatch + minMatch, iend, repEnd, prefixStart) + minMatch;
-
-                                               if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
-                                                       best_mlen = mlen;
-                                                       best_off = i;
-                                                       last_pos = cur + 1;
-                                                       goto _storeSequence;
-                                               }
-
-                                               best_off = i - (opt[cur].mlen != 1);
-                                               if (mlen > best_mlen)
-                                                       best_mlen = mlen;
-
-                                               do {
-                                                       if (opt[cur].mlen == 1) {
-                                                               litlen = opt[cur].litlen;
-                                                               if (cur > litlen) {
-                                                                       price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr - litlen,
-                                                                                                                       best_off, mlen - MINMATCH, ultra);
-                                                               } else
-                                                                       price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
-                                                       } else {
-                                                               litlen = 0;
-                                                               price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
-                                                       }
-
-                                                       if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
-                                                               SET_PRICE(cur + mlen, mlen, i, litlen, price);
-                                                       mlen--;
-                                               } while (mlen >= minMatch);
-                                       }
-                               }
-                       }
-
-                       match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
-
-                       if (match_num > 0 && (matches[match_num - 1].len > sufficient_len || cur + matches[match_num - 1].len >= ZSTD_OPT_NUM)) {
-                               best_mlen = matches[match_num - 1].len;
-                               best_off = matches[match_num - 1].off;
-                               last_pos = cur + 1;
-                               goto _storeSequence;
-                       }
-
-                       /* set prices using matches at position = cur */
-                       for (u = 0; u < match_num; u++) {
-                               mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen;
-                               best_mlen = matches[u].len;
-
-                               while (mlen <= best_mlen) {
-                                       if (opt[cur].mlen == 1) {
-                                               litlen = opt[cur].litlen;
-                                               if (cur > litlen)
-                                                       price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip + cur - litlen,
-                                                                                                       matches[u].off - 1, mlen - MINMATCH, ultra);
-                                               else
-                                                       price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra);
-                                       } else {
-                                               litlen = 0;
-                                               price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off - 1, mlen - MINMATCH, ultra);
-                                       }
-
-                                       if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
-                                               SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
-
-                                       mlen++;
-                               }
-                       }
-               } /* for (cur = 1; cur <= last_pos; cur++) */
-
-               best_mlen = opt[last_pos].mlen;
-               best_off = opt[last_pos].off;
-               cur = last_pos - best_mlen;
-
-       /* store sequence */
-_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
-               opt[0].mlen = 1;
-
-               while (1) {
-                       mlen = opt[cur].mlen;
-                       offset = opt[cur].off;
-                       opt[cur].mlen = best_mlen;
-                       opt[cur].off = best_off;
-                       best_mlen = mlen;
-                       best_off = offset;
-                       if (mlen > cur)
-                               break;
-                       cur -= mlen;
-               }
-
-               for (u = 0; u <= last_pos;) {
-                       u += opt[u].mlen;
-               }
-
-               for (cur = 0; cur < last_pos;) {
-                       mlen = opt[cur].mlen;
-                       if (mlen == 1) {
-                               ip++;
-                               cur++;
-                               continue;
-                       }
-                       offset = opt[cur].off;
-                       cur += mlen;
-                       litLength = (U32)(ip - anchor);
-
-                       if (offset > ZSTD_REP_MOVE_OPT) {
-                               rep[2] = rep[1];
-                               rep[1] = rep[0];
-                               rep[0] = offset - ZSTD_REP_MOVE_OPT;
-                               offset--;
-                       } else {
-                               if (offset != 0) {
-                                       best_off = (offset == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]);
-                                       if (offset != 1)
-                                               rep[2] = rep[1];
-                                       rep[1] = rep[0];
-                                       rep[0] = best_off;
-                               }
-
-                               if (litLength == 0)
-                                       offset--;
-                       }
-
-                       ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH);
-                       ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH);
-                       anchor = ip = ip + mlen;
-               }
-       } /* for (cur=0; cur < last_pos; ) */
-
-       /* Save reps for next block */
-       {
-               int i;
-               for (i = 0; i < ZSTD_REP_NUM; i++)
-                       ctx->repToConfirm[i] = rep[i];
-       }
-
-       /* Last Literals */
-       {
-               size_t lastLLSize = iend - anchor;
-               memcpy(seqStorePtr->lit, anchor, lastLLSize);
-               seqStorePtr->lit += lastLLSize;
-       }
-}
-
-#endif /* ZSTD_OPT_H_91842398743 */
index f64ebb6..07c875f 100644 (file)
@@ -58,7 +58,6 @@
 #include <linux/ratelimit.h>
 #include <linux/page-isolation.h>
 #include <linux/pagewalk.h>
-#include <linux/shmem_fs.h>
 #include "internal.h"
 #include "ras/ras_event.h"
 
@@ -868,7 +867,6 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p)
 {
        int ret;
        struct address_space *mapping;
-       bool extra_pins;
 
        delete_from_lru_cache(p);
 
@@ -898,23 +896,17 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p)
        }
 
        /*
-        * The shmem page is kept in page cache instead of truncating
-        * so is expected to have an extra refcount after error-handling.
-        */
-       extra_pins = shmem_mapping(mapping);
-
-       /*
         * Truncation is a bit tricky. Enable it per file system for now.
         *
         * Open: to take i_rwsem or not for this? Right now we don't.
         */
        ret = truncate_error_page(p, page_to_pfn(p), mapping);
-       if (has_extra_refcount(ps, p, extra_pins))
-               ret = MF_FAILED;
-
 out:
        unlock_page(p);
 
+       if (has_extra_refcount(ps, p, false))
+               ret = MF_FAILED;
+
        return ret;
 }
 
index 2d498bb..a613f8e 100644 (file)
@@ -2967,7 +2967,7 @@ EXPORT_SYMBOL_GPL(folio_wait_writeback_killable);
  */
 void folio_wait_stable(struct folio *folio)
 {
-       if (folio->mapping->host->i_sb->s_iflags & SB_I_STABLE_WRITES)
+       if (folio_inode(folio)->i_sb->s_iflags & SB_I_STABLE_WRITES)
                folio_wait_writeback(folio);
 }
 EXPORT_SYMBOL_GPL(folio_wait_stable);
index f0eee4e..dc038ce 100644 (file)
@@ -2456,7 +2456,6 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
        struct inode *inode = mapping->host;
        struct shmem_inode_info *info = SHMEM_I(inode);
        pgoff_t index = pos >> PAGE_SHIFT;
-       int ret = 0;
 
        /* i_rwsem is held by caller */
        if (unlikely(info->seals & (F_SEAL_GROW |
@@ -2467,15 +2466,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
                        return -EPERM;
        }
 
-       ret = shmem_getpage(inode, index, pagep, SGP_WRITE);
-
-       if (*pagep && PageHWPoison(*pagep)) {
-               unlock_page(*pagep);
-               put_page(*pagep);
-               ret = -EIO;
-       }
-
-       return ret;
+       return shmem_getpage(inode, index, pagep, SGP_WRITE);
 }
 
 static int
@@ -2562,12 +2553,6 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
                        if (sgp == SGP_CACHE)
                                set_page_dirty(page);
                        unlock_page(page);
-
-                       if (PageHWPoison(page)) {
-                               put_page(page);
-                               error = -EIO;
-                               break;
-                       }
                }
 
                /*
@@ -3107,8 +3092,7 @@ static const char *shmem_get_link(struct dentry *dentry,
                page = find_get_page(inode->i_mapping, 0);
                if (!page)
                        return ERR_PTR(-ECHILD);
-               if (PageHWPoison(page) ||
-                   !PageUptodate(page)) {
+               if (!PageUptodate(page)) {
                        put_page(page);
                        return ERR_PTR(-ECHILD);
                }
@@ -3116,11 +3100,6 @@ static const char *shmem_get_link(struct dentry *dentry,
                error = shmem_getpage(inode, 0, &page, SGP_READ);
                if (error)
                        return ERR_PTR(error);
-               if (page && PageHWPoison(page)) {
-                       unlock_page(page);
-                       put_page(page);
-                       return ERR_PTR(-ECHILD);
-               }
                unlock_page(page);
        }
        set_delayed_call(done, shmem_put_link, page);
@@ -3771,13 +3750,6 @@ static void shmem_destroy_inodecache(void)
        kmem_cache_destroy(shmem_inode_cachep);
 }
 
-/* Keep the page in page cache instead of truncating it */
-static int shmem_error_remove_page(struct address_space *mapping,
-                                  struct page *page)
-{
-       return 0;
-}
-
 const struct address_space_operations shmem_aops = {
        .writepage      = shmem_writepage,
        .set_page_dirty = __set_page_dirty_no_writeback,
@@ -3788,7 +3760,7 @@ const struct address_space_operations shmem_aops = {
 #ifdef CONFIG_MIGRATION
        .migratepage    = migrate_page,
 #endif
-       .error_remove_page = shmem_error_remove_page,
+       .error_remove_page = generic_error_remove_page,
 };
 EXPORT_SYMBOL(shmem_aops);
 
@@ -4199,10 +4171,6 @@ struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
                page = ERR_PTR(error);
        else
                unlock_page(page);
-
-       if (PageHWPoison(page))
-               page = ERR_PTR(-EIO);
-
        return page;
 #else
        /*
index 0780c2a..ac6f036 100644 (file)
@@ -232,11 +232,6 @@ static int mcontinue_atomic_pte(struct mm_struct *dst_mm,
                goto out;
        }
 
-       if (PageHWPoison(page)) {
-               ret = -EIO;
-               goto out_release;
-       }
-
        ret = mfill_atomic_install_pte(dst_mm, dst_pmd, dst_vma, dst_addr,
                                       page, false, wp_copy);
        if (ret)
index 013cbdb..6a6898e 100644 (file)
@@ -1153,12 +1153,11 @@ static int build_initial_monmap(struct ceph_mon_client *monc)
 
 int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
 {
-       int err = 0;
+       int err;
 
        dout("init\n");
        memset(monc, 0, sizeof(*monc));
        monc->client = cl;
-       monc->monmap = NULL;
        mutex_init(&monc->mutex);
 
        err = build_initial_monmap(monc);
index ff8624a..1c58155 100644 (file)
@@ -5310,14 +5310,14 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc)
        ceph_msgpool_destroy(&osdc->msgpool_op_reply);
 }
 
-static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
-                                    u64 src_snapid, u64 src_version,
-                                    struct ceph_object_id *src_oid,
-                                    struct ceph_object_locator *src_oloc,
-                                    u32 src_fadvise_flags,
-                                    u32 dst_fadvise_flags,
-                                    u32 truncate_seq, u64 truncate_size,
-                                    u8 copy_from_flags)
+int osd_req_op_copy_from_init(struct ceph_osd_request *req,
+                             u64 src_snapid, u64 src_version,
+                             struct ceph_object_id *src_oid,
+                             struct ceph_object_locator *src_oloc,
+                             u32 src_fadvise_flags,
+                             u32 dst_fadvise_flags,
+                             u32 truncate_seq, u64 truncate_size,
+                             u8 copy_from_flags)
 {
        struct ceph_osd_req_op *op;
        struct page **pages;
@@ -5346,49 +5346,7 @@ static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
                                 op->indata_len, 0, false, true);
        return 0;
 }
-
-int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
-                       u64 src_snapid, u64 src_version,
-                       struct ceph_object_id *src_oid,
-                       struct ceph_object_locator *src_oloc,
-                       u32 src_fadvise_flags,
-                       struct ceph_object_id *dst_oid,
-                       struct ceph_object_locator *dst_oloc,
-                       u32 dst_fadvise_flags,
-                       u32 truncate_seq, u64 truncate_size,
-                       u8 copy_from_flags)
-{
-       struct ceph_osd_request *req;
-       int ret;
-
-       req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL);
-       if (!req)
-               return -ENOMEM;
-
-       req->r_flags = CEPH_OSD_FLAG_WRITE;
-
-       ceph_oloc_copy(&req->r_t.base_oloc, dst_oloc);
-       ceph_oid_copy(&req->r_t.base_oid, dst_oid);
-
-       ret = osd_req_op_copy_from_init(req, src_snapid, src_version, src_oid,
-                                       src_oloc, src_fadvise_flags,
-                                       dst_fadvise_flags, truncate_seq,
-                                       truncate_size, copy_from_flags);
-       if (ret)
-               goto out;
-
-       ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
-       if (ret)
-               goto out;
-
-       ceph_osdc_start_request(osdc, req, false);
-       ret = ceph_osdc_wait_request(osdc, req);
-
-out:
-       ceph_osdc_put_request(req);
-       return ret;
-}
-EXPORT_SYMBOL(ceph_osdc_copy_from);
+EXPORT_SYMBOL(osd_req_op_copy_from_init);
 
 int __init ceph_osdc_setup(void)
 {
index 5ba4f94..5ad72db 100644 (file)
@@ -4229,7 +4229,9 @@ static void __devlink_flash_update_notify(struct devlink *devlink,
        WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE &&
                cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
                cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
-       WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
+
+       if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+               return;
 
        msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!msg)
index e471c9b..6102f09 100644 (file)
@@ -7162,6 +7162,8 @@ sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 #endif
        case BPF_FUNC_sk_storage_get:
                return &bpf_sk_storage_get_cg_sock_proto;
+       case BPF_FUNC_ktime_get_coarse_ns:
+               return &bpf_ktime_get_coarse_ns_proto;
        default:
                return bpf_base_func_proto(func_id);
        }
@@ -10327,6 +10329,8 @@ sk_reuseport_func_proto(enum bpf_func_id func_id,
                return &sk_reuseport_load_bytes_relative_proto;
        case BPF_FUNC_get_socket_cookie:
                return &bpf_get_socket_ptr_cookie_proto;
+       case BPF_FUNC_ktime_get_coarse_ns:
+               return &bpf_ktime_get_coarse_ns_proto;
        default:
                return bpf_base_func_proto(func_id);
        }
@@ -10833,6 +10837,8 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
        case BPF_FUNC_skc_to_unix_sock:
                func = &bpf_skc_to_unix_sock_proto;
                break;
+       case BPF_FUNC_ktime_get_coarse_ns:
+               return &bpf_ktime_get_coarse_ns_proto;
        default:
                return bpf_base_func_proto(func_id);
        }
index 47931c8..72ba027 100644 (file)
@@ -1779,6 +1779,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl)
 {
        neigh_tables[index] = NULL;
        /* It is not clean... Fix it to unload IPv6 module safely */
+       cancel_delayed_work_sync(&tbl->managed_work);
        cancel_delayed_work_sync(&tbl->gc_work);
        del_timer_sync(&tbl->proxy_timer);
        pneigh_queue_purge(&tbl->proxy_queue);
index 9b60e43..1a69784 100644 (file)
@@ -49,12 +49,6 @@ static int page_pool_init(struct page_pool *pool,
         * which is the XDP_TX use-case.
         */
        if (pool->p.flags & PP_FLAG_DMA_MAP) {
-               /* DMA-mapping is not supported on 32-bit systems with
-                * 64-bit DMA mapping.
-                */
-               if (sizeof(dma_addr_t) > sizeof(unsigned long))
-                       return -EOPNOTSUPP;
-
                if ((pool->p.dma_dir != DMA_FROM_DEVICE) &&
                    (pool->p.dma_dir != DMA_BIDIRECTIONAL))
                        return -EINVAL;
@@ -75,6 +69,10 @@ static int page_pool_init(struct page_pool *pool,
                 */
        }
 
+       if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
+           pool->p.flags & PP_FLAG_PAGE_FRAG)
+               return -EINVAL;
+
        if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
                return -ENOMEM;
 
index 2cf02b4..4bb9401 100644 (file)
@@ -205,6 +205,8 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
                    offsetof(struct tcp_congestion_ops, release))
                        return &bpf_sk_getsockopt_proto;
                return NULL;
+       case BPF_FUNC_ktime_get_coarse_ns:
+               return &bpf_ktime_get_coarse_ns_proto;
        default:
                return bpf_base_func_proto(func_id);
        }
index 9e81007..5dbd4b5 100644 (file)
@@ -1899,15 +1899,36 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
 /* if any FIB entries reference this nexthop, any dst entries
  * need to be regenerated
  */
-static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
+static void nh_rt_cache_flush(struct net *net, struct nexthop *nh,
+                             struct nexthop *replaced_nh)
 {
        struct fib6_info *f6i;
+       struct nh_group *nhg;
+       int i;
 
        if (!list_empty(&nh->fi_list))
                rt_cache_flush(net);
 
        list_for_each_entry(f6i, &nh->f6i_list, nh_list)
                ipv6_stub->fib6_update_sernum(net, f6i);
+
+       /* if an IPv6 group was replaced, we have to release all old
+        * dsts to make sure all refcounts are released
+        */
+       if (!replaced_nh->is_group)
+               return;
+
+       /* new dsts must use only the new nexthop group */
+       synchronize_net();
+
+       nhg = rtnl_dereference(replaced_nh->nh_grp);
+       for (i = 0; i < nhg->num_nh; i++) {
+               struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+               struct nh_info *nhi = rtnl_dereference(nhge->nh->nh_info);
+
+               if (nhi->family == AF_INET6)
+                       ipv6_stub->fib6_nh_release_dsts(&nhi->fib6_nh);
+       }
 }
 
 static int replace_nexthop_grp(struct net *net, struct nexthop *old,
@@ -2247,7 +2268,7 @@ static int replace_nexthop(struct net *net, struct nexthop *old,
                err = replace_nexthop_single(net, old, new, extack);
 
        if (!err) {
-               nh_rt_cache_flush(net, old);
+               nh_rt_cache_flush(net, old, new);
 
                __remove_nexthop(net, new, NULL);
                nexthop_put(new);
@@ -2544,11 +2565,15 @@ static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
        /* sets nh_dev if successful */
        err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
                                      extack);
-       if (err)
+       if (err) {
+               /* IPv6 is not enabled, don't call fib6_nh_release */
+               if (err == -EAFNOSUPPORT)
+                       goto out;
                ipv6_stub->fib6_nh_release(fib6_nh);
-       else
+       } else {
                nh->nh_flags = fib6_nh->fib_nh_flags;
-
+       }
+out:
        return err;
 }
 
index 319dd7b..8bcecdd 100644 (file)
@@ -1807,6 +1807,17 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
                skb = skb_recv_udp(sk, 0, 1, &err);
                if (!skb)
                        return err;
+
+               if (udp_lib_checksum_complete(skb)) {
+                       __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS,
+                                       IS_UDPLITE(sk));
+                       __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS,
+                                       IS_UDPLITE(sk));
+                       atomic_inc(&sk->sk_drops);
+                       kfree_skb(skb);
+                       continue;
+               }
+
                used = recv_actor(desc, skb, 0, skb->len);
                if (used <= 0) {
                        if (!copied)
index 0c4da16..dab4a04 100644 (file)
@@ -1026,6 +1026,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
        .ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
        .fib6_nh_init      = fib6_nh_init,
        .fib6_nh_release   = fib6_nh_release,
+       .fib6_nh_release_dsts = fib6_nh_release_dsts,
        .fib6_update_sernum = fib6_update_sernum_stub,
        .fib6_rt_update    = fib6_rt_update,
        .ip6_del_rt        = ip6_del_rt,
index ed2f061..f0bac6f 100644 (file)
@@ -808,6 +808,12 @@ int esp6_input_done2(struct sk_buff *skb, int err)
                struct tcphdr *th;
 
                offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off);
+
+               if (offset < 0) {
+                       err = -EINVAL;
+                       goto out;
+               }
+
                uh = (void *)(skb->data + offset);
                th = (void *)(skb->data + offset);
                hdr_len += offset;
index 2f044a4..ff4e83e 100644 (file)
@@ -174,7 +174,7 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff
 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
        /* Policy lookup after SNAT yielded a new policy */
        if (skb_dst(skb)->xfrm) {
-               IPCB(skb)->flags |= IPSKB_REROUTED;
+               IP6CB(skb)->flags |= IP6SKB_REROUTED;
                return dst_output(net, sk, skb);
        }
 #endif
index 3ae25b8..42d60c7 100644 (file)
@@ -3680,6 +3680,25 @@ void fib6_nh_release(struct fib6_nh *fib6_nh)
        fib_nh_common_release(&fib6_nh->nh_common);
 }
 
+void fib6_nh_release_dsts(struct fib6_nh *fib6_nh)
+{
+       int cpu;
+
+       if (!fib6_nh->rt6i_pcpu)
+               return;
+
+       for_each_possible_cpu(cpu) {
+               struct rt6_info *pcpu_rt, **ppcpu_rt;
+
+               ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
+               pcpu_rt = xchg(ppcpu_rt, NULL);
+               if (pcpu_rt) {
+                       dst_dev_put(&pcpu_rt->dst);
+                       dst_release(&pcpu_rt->dst);
+               }
+       }
+}
+
 static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
                                              gfp_t gfp_flags,
                                              struct netlink_ext_ack *extack)
index e2b791c..bd3d319 100644 (file)
@@ -80,7 +80,8 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata,
        }
 
        /* also validate MU-MIMO change */
-       monitor_sdata = rtnl_dereference(local->monitor_sdata);
+       monitor_sdata = wiphy_dereference(local->hw.wiphy,
+                                         local->monitor_sdata);
 
        if (!monitor_sdata &&
            (params->vht_mumimo_groups || params->vht_mumimo_follow_addr))
@@ -840,7 +841,8 @@ static int ieee80211_set_monitor_channel(struct wiphy *wiphy,
 
        mutex_lock(&local->mtx);
        if (local->use_chanctx) {
-               sdata = rtnl_dereference(local->monitor_sdata);
+               sdata = wiphy_dereference(local->hw.wiphy,
+                                         local->monitor_sdata);
                if (sdata) {
                        ieee80211_vif_release_channel(sdata);
                        ret = ieee80211_vif_use_channel(sdata, chandef,
@@ -2707,7 +2709,8 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
                sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
 
                if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
-                       sdata = rtnl_dereference(local->monitor_sdata);
+                       sdata = wiphy_dereference(local->hw.wiphy,
+                                                 local->monitor_sdata);
                        if (!sdata)
                                return -EOPNOTSUPP;
                }
@@ -2767,7 +2770,8 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
        mutex_unlock(&local->iflist_mtx);
 
        if (has_monitor) {
-               sdata = rtnl_dereference(local->monitor_sdata);
+               sdata = wiphy_dereference(local->hw.wiphy,
+                                         local->monitor_sdata);
                if (sdata) {
                        sdata->user_power_level = local->user_power_level;
                        if (txp_type != sdata->vif.bss_conf.txpower_type)
index 9a2145c..20aa5cc 100644 (file)
@@ -588,7 +588,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
         */
        if (local->suspended) {
                WARN_ON(local->wowlan);
-               WARN_ON(rtnl_dereference(local->monitor_sdata));
+               WARN_ON(rcu_access_pointer(local->monitor_sdata));
                return;
        }
 
@@ -961,6 +961,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
                return 0;
 
        ASSERT_RTNL();
+       lockdep_assert_wiphy(local->hw.wiphy);
 
        if (local->monitor_sdata)
                return 0;
@@ -1028,6 +1029,7 @@ void ieee80211_del_virtual_monitor(struct ieee80211_local *local)
                return;
 
        ASSERT_RTNL();
+       lockdep_assert_wiphy(local->hw.wiphy);
 
        mutex_lock(&local->iflist_mtx);
 
index fb3aaa3..b71a142 100644 (file)
@@ -72,19 +72,19 @@ static inline void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local,
 #endif
 
 static inline void
-ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, __le16 fc, int bytes)
+ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, int bytes)
 {
 #ifdef CONFIG_MAC80211_LEDS
-       if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active))
+       if (atomic_read(&local->tpt_led_active))
                local->tpt_led_trigger->tx_bytes += bytes;
 #endif
 }
 
 static inline void
-ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, __le16 fc, int bytes)
+ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, int bytes)
 {
 #ifdef CONFIG_MAC80211_LEDS
-       if (ieee80211_is_data(fc) && atomic_read(&local->tpt_led_active))
+       if (atomic_read(&local->tpt_led_active))
                local->tpt_led_trigger->rx_bytes += bytes;
 #endif
 }
index fc5c608..9541a4c 100644 (file)
@@ -364,7 +364,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
         * the compiler to think we have walked past the end of the
         * struct member.
         */
-       pos = (void *)&rthdr->it_optional[it_present - rthdr->it_optional];
+       pos = (void *)&rthdr->it_optional[it_present + 1 - rthdr->it_optional];
 
        /* the order of the following fields is important */
 
@@ -1952,7 +1952,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
                int keyid = rx->sta->ptk_idx;
                sta_ptk = rcu_dereference(rx->sta->ptk[keyid]);
 
-               if (ieee80211_has_protected(fc)) {
+               if (ieee80211_has_protected(fc) &&
+                   !(status->flag & RX_FLAG_IV_STRIPPED)) {
                        cs = rx->sta->cipher_scheme;
                        keyid = ieee80211_get_keyid(rx->skb, cs);
 
@@ -4863,6 +4864,7 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
        struct ieee80211_rate *rate = NULL;
        struct ieee80211_supported_band *sband;
        struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+       struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 
        WARN_ON_ONCE(softirq_count() == 0);
 
@@ -4959,9 +4961,9 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
        if (!(status->flag & RX_FLAG_8023))
                skb = ieee80211_rx_monitor(local, skb, rate);
        if (skb) {
-               ieee80211_tpt_led_trig_rx(local,
-                                         ((struct ieee80211_hdr *)skb->data)->frame_control,
-                                         skb->len);
+               if ((status->flag & RX_FLAG_8023) ||
+                       ieee80211_is_data_present(hdr->frame_control))
+                       ieee80211_tpt_led_trig_rx(local, skb->len);
 
                if (status->flag & RX_FLAG_8023)
                        __ieee80211_rx_handle_8023(hw, pubsta, skb, list);
index a756a19..278945e 100644 (file)
@@ -1721,21 +1721,19 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local,
  * Returns false if the frame couldn't be transmitted but was queued instead.
  */
 static bool __ieee80211_tx(struct ieee80211_local *local,
-                          struct sk_buff_head *skbs, int led_len,
-                          struct sta_info *sta, bool txpending)
+                          struct sk_buff_head *skbs, struct sta_info *sta,
+                          bool txpending)
 {
        struct ieee80211_tx_info *info;
        struct ieee80211_sub_if_data *sdata;
        struct ieee80211_vif *vif;
        struct sk_buff *skb;
        bool result;
-       __le16 fc;
 
        if (WARN_ON(skb_queue_empty(skbs)))
                return true;
 
        skb = skb_peek(skbs);
-       fc = ((struct ieee80211_hdr *)skb->data)->frame_control;
        info = IEEE80211_SKB_CB(skb);
        sdata = vif_to_sdata(info->control.vif);
        if (sta && !sta->uploaded)
@@ -1769,8 +1767,6 @@ static bool __ieee80211_tx(struct ieee80211_local *local,
 
        result = ieee80211_tx_frags(local, vif, sta, skbs, txpending);
 
-       ieee80211_tpt_led_trig_tx(local, fc, led_len);
-
        WARN_ON_ONCE(!skb_queue_empty(skbs));
 
        return result;
@@ -1920,7 +1916,6 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
        ieee80211_tx_result res_prepare;
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
        bool result = true;
-       int led_len;
 
        if (unlikely(skb->len < 10)) {
                dev_kfree_skb(skb);
@@ -1928,7 +1923,6 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
        }
 
        /* initialises tx */
-       led_len = skb->len;
        res_prepare = ieee80211_tx_prepare(sdata, &tx, sta, skb);
 
        if (unlikely(res_prepare == TX_DROP)) {
@@ -1951,8 +1945,7 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
                return true;
 
        if (!invoke_tx_handlers_late(&tx))
-               result = __ieee80211_tx(local, &tx.skbs, led_len,
-                                       tx.sta, txpending);
+               result = __ieee80211_tx(local, &tx.skbs, tx.sta, txpending);
 
        return result;
 }
@@ -4175,6 +4168,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
        struct ieee80211_local *local = sdata->local;
        struct sta_info *sta;
        struct sk_buff *next;
+       int len = skb->len;
 
        if (unlikely(skb->len < ETH_HLEN)) {
                kfree_skb(skb);
@@ -4221,10 +4215,8 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
                }
        } else {
                /* we cannot process non-linear frames on this path */
-               if (skb_linearize(skb)) {
-                       kfree_skb(skb);
-                       goto out;
-               }
+               if (skb_linearize(skb))
+                       goto out_free;
 
                /* the frame could be fragmented, software-encrypted, and other
                 * things so we cannot really handle checksum offload with it -
@@ -4258,7 +4250,10 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
        goto out;
  out_free:
        kfree_skb(skb);
+       len = 0;
  out:
+       if (len)
+               ieee80211_tpt_led_trig_tx(local, len);
        rcu_read_unlock();
 }
 
@@ -4396,8 +4391,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 }
 
 static bool ieee80211_tx_8023(struct ieee80211_sub_if_data *sdata,
-                             struct sk_buff *skb, int led_len,
-                             struct sta_info *sta,
+                             struct sk_buff *skb, struct sta_info *sta,
                              bool txpending)
 {
        struct ieee80211_local *local = sdata->local;
@@ -4410,6 +4404,8 @@ static bool ieee80211_tx_8023(struct ieee80211_sub_if_data *sdata,
        if (sta)
                sk_pacing_shift_update(skb->sk, local->hw.tx_sk_pacing_shift);
 
+       ieee80211_tpt_led_trig_tx(local, skb->len);
+
        if (ieee80211_queue_skb(local, sdata, sta, skb))
                return true;
 
@@ -4498,7 +4494,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
        if (key)
                info->control.hw_key = &key->conf;
 
-       ieee80211_tx_8023(sdata, skb, skb->len, sta, false);
+       ieee80211_tx_8023(sdata, skb, sta, false);
 
        return;
 
@@ -4637,7 +4633,7 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
                if (IS_ERR(sta) || (sta && !sta->uploaded))
                        sta = NULL;
 
-               result = ieee80211_tx_8023(sdata, skb, skb->len, sta, true);
+               result = ieee80211_tx_8023(sdata, skb, sta, true);
        } else {
                struct sk_buff_head skbs;
 
@@ -4647,7 +4643,7 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
                hdr = (struct ieee80211_hdr *)skb->data;
                sta = sta_info_get(sdata, hdr->addr1);
 
-               result = __ieee80211_tx(local, &skbs, skb->len, sta, true);
+               result = __ieee80211_tx(local, &skbs, sta, true);
        }
 
        return result;
index 39fa2a5..43df2f0 100644 (file)
@@ -796,7 +796,7 @@ static void __iterate_interfaces(struct ieee80211_local *local,
 
        sdata = rcu_dereference_check(local->monitor_sdata,
                                      lockdep_is_held(&local->iflist_mtx) ||
-                                     lockdep_rtnl_is_held());
+                                     lockdep_is_held(&local->hw.wiphy->mtx));
        if (sdata &&
            (iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL || !active_only ||
             sdata->flags & IEEE80211_SDATA_IN_DRIVER))
@@ -2381,7 +2381,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
                                   IEEE80211_TPT_LEDTRIG_FL_RADIO, 0);
 
        /* add interfaces */
-       sdata = rtnl_dereference(local->monitor_sdata);
+       sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata);
        if (sdata) {
                /* in HW restart it exists already */
                WARN_ON(local->resuming);
@@ -2426,7 +2426,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
                                WARN_ON(drv_add_chanctx(local, ctx));
                mutex_unlock(&local->chanctx_mtx);
 
-               sdata = rtnl_dereference(local->monitor_sdata);
+               sdata = wiphy_dereference(local->hw.wiphy,
+                                         local->monitor_sdata);
                if (sdata && ieee80211_sdata_running(sdata))
                        ieee80211_assign_chanctx(local, sdata);
        }
index 9ea6004..62c6733 100644 (file)
@@ -143,7 +143,6 @@ u16 ieee80211_select_queue_80211(struct ieee80211_sub_if_data *sdata,
 u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
                             struct sta_info *sta, struct sk_buff *skb)
 {
-       struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
        struct mac80211_qos_map *qos_map;
        bool qos;
 
@@ -156,7 +155,7 @@ u16 __ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
        else
                qos = false;
 
-       if (!qos || (info->control.flags & IEEE80211_TX_CTRL_DONT_REORDER)) {
+       if (!qos) {
                skb->priority = 0; /* required for correct WPA/11i MIC */
                return IEEE80211_AC_BE;
        }
index 7c3420a..fe98e4f 100644 (file)
@@ -422,28 +422,6 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
        return false;
 }
 
-/* MP_JOIN client subflow must wait for 4th ack before sending any data:
- * TCP can't schedule delack timer before the subflow is fully established.
- * MPTCP uses the delack timer to do 3rd ack retransmissions
- */
-static void schedule_3rdack_retransmission(struct sock *sk)
-{
-       struct inet_connection_sock *icsk = inet_csk(sk);
-       struct tcp_sock *tp = tcp_sk(sk);
-       unsigned long timeout;
-
-       /* reschedule with a timeout above RTT, as we must look only for drop */
-       if (tp->srtt_us)
-               timeout = tp->srtt_us << 1;
-       else
-               timeout = TCP_TIMEOUT_INIT;
-
-       WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
-       icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
-       icsk->icsk_ack.timeout = timeout;
-       sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
-}
-
 static void clear_3rdack_retransmission(struct sock *sk)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
@@ -526,7 +504,15 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
                *size = TCPOLEN_MPTCP_MPJ_ACK;
                pr_debug("subflow=%p", subflow);
 
-               schedule_3rdack_retransmission(sk);
+               /* we can use the full delegate action helper only from BH context
+                * If we are in process context - sk is flushing the backlog at
+                * socket lock release time - just set the appropriate flag, will
+                * be handled by the release callback
+                */
+               if (sock_owned_by_user(sk))
+                       set_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status);
+               else
+                       mptcp_subflow_delegate(subflow, MPTCP_DELEGATE_ACK);
                return true;
        }
        return false;
index b7e32e3..c82a76d 100644 (file)
@@ -1596,7 +1596,8 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
                        if (!xmit_ssk)
                                goto out;
                        if (xmit_ssk != ssk) {
-                               mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
+                               mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk),
+                                                      MPTCP_DELEGATE_SEND);
                                goto out;
                        }
 
@@ -2943,7 +2944,7 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
                if (xmit_ssk == ssk)
                        __mptcp_subflow_push_pending(sk, ssk);
                else if (xmit_ssk)
-                       mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk));
+                       mptcp_subflow_delegate(mptcp_subflow_ctx(xmit_ssk), MPTCP_DELEGATE_SEND);
        } else {
                set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
        }
@@ -2993,18 +2994,50 @@ static void mptcp_release_cb(struct sock *sk)
        __mptcp_update_rmem(sk);
 }
 
+/* MP_JOIN client subflow must wait for 4th ack before sending any data:
+ * TCP can't schedule delack timer before the subflow is fully established.
+ * MPTCP uses the delack timer to do 3rd ack retransmissions
+ */
+static void schedule_3rdack_retransmission(struct sock *ssk)
+{
+       struct inet_connection_sock *icsk = inet_csk(ssk);
+       struct tcp_sock *tp = tcp_sk(ssk);
+       unsigned long timeout;
+
+       if (mptcp_subflow_ctx(ssk)->fully_established)
+               return;
+
+       /* reschedule with a timeout above RTT, as we must look only for drop */
+       if (tp->srtt_us)
+               timeout = usecs_to_jiffies(tp->srtt_us >> (3 - 1));
+       else
+               timeout = TCP_TIMEOUT_INIT;
+       timeout += jiffies;
+
+       WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER);
+       icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
+       icsk->icsk_ack.timeout = timeout;
+       sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout);
+}
+
 void mptcp_subflow_process_delegated(struct sock *ssk)
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
        struct sock *sk = subflow->conn;
 
-       mptcp_data_lock(sk);
-       if (!sock_owned_by_user(sk))
-               __mptcp_subflow_push_pending(sk, ssk);
-       else
-               set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
-       mptcp_data_unlock(sk);
-       mptcp_subflow_delegated_done(subflow);
+       if (test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
+               mptcp_data_lock(sk);
+               if (!sock_owned_by_user(sk))
+                       __mptcp_subflow_push_pending(sk, ssk);
+               else
+                       set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags);
+               mptcp_data_unlock(sk);
+               mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND);
+       }
+       if (test_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status)) {
+               schedule_3rdack_retransmission(ssk);
+               mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_ACK);
+       }
 }
 
 static int mptcp_hash(struct sock *sk)
index 67a61ac..d87cc04 100644 (file)
@@ -387,6 +387,7 @@ struct mptcp_delegated_action {
 DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
 
 #define MPTCP_DELEGATE_SEND            0
+#define MPTCP_DELEGATE_ACK             1
 
 /* MPTCP subflow context */
 struct mptcp_subflow_context {
@@ -492,23 +493,23 @@ static inline void mptcp_add_pending_subflow(struct mptcp_sock *msk,
 
 void mptcp_subflow_process_delegated(struct sock *ssk);
 
-static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow)
+static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
 {
        struct mptcp_delegated_action *delegated;
        bool schedule;
 
+       /* the caller held the subflow bh socket lock */
+       lockdep_assert_in_softirq();
+
        /* The implied barrier pairs with mptcp_subflow_delegated_done(), and
         * ensures the below list check sees list updates done prior to status
         * bit changes
         */
-       if (!test_and_set_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
+       if (!test_and_set_bit(action, &subflow->delegated_status)) {
                /* still on delegated list from previous scheduling */
                if (!list_empty(&subflow->delegated_node))
                        return;
 
-               /* the caller held the subflow bh socket lock */
-               lockdep_assert_in_softirq();
-
                delegated = this_cpu_ptr(&mptcp_delegated_actions);
                schedule = list_empty(&delegated->head);
                list_add_tail(&subflow->delegated_node, &delegated->head);
@@ -533,16 +534,16 @@ mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
 
 static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow)
 {
-       return test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
+       return !!READ_ONCE(subflow->delegated_status);
 }
 
-static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow)
+static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow, int action)
 {
        /* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before
         * touching the status bit
         */
        smp_wmb();
-       clear_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status);
+       clear_bit(action, &subflow->delegated_status);
 }
 
 int mptcp_is_enabled(const struct net *net);
index ba9ae48..dda8b76 100644 (file)
@@ -18,6 +18,8 @@
 #include "internal.h"
 #include "ncsi-pkt.h"
 
+static const int padding_bytes = 26;
+
 u32 ncsi_calculate_checksum(unsigned char *data, int len)
 {
        u32 checksum = 0;
@@ -213,12 +215,17 @@ static int ncsi_cmd_handler_oem(struct sk_buff *skb,
 {
        struct ncsi_cmd_oem_pkt *cmd;
        unsigned int len;
+       int payload;
+       /* NC-SI spec DSP_0222_1.2.0, section 8.2.2.2
+        * requires payload to be padded with 0 to
+        * 32-bit boundary before the checksum field.
+        * Ensure the padding bytes are accounted for in
+        * skb allocation
+        */
 
+       payload = ALIGN(nca->payload, 4);
        len = sizeof(struct ncsi_cmd_pkt_hdr) + 4;
-       if (nca->payload < 26)
-               len += 26;
-       else
-               len += nca->payload;
+       len += max(payload, padding_bytes);
 
        cmd = skb_put_zero(skb, len);
        memcpy(&cmd->mfr_id, nca->data, nca->payload);
@@ -272,6 +279,7 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
        struct net_device *dev = nd->dev;
        int hlen = LL_RESERVED_SPACE(dev);
        int tlen = dev->needed_tailroom;
+       int payload;
        int len = hlen + tlen;
        struct sk_buff *skb;
        struct ncsi_request *nr;
@@ -281,14 +289,14 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
                return NULL;
 
        /* NCSI command packet has 16-bytes header, payload, 4 bytes checksum.
+        * Payload needs padding so that the checksum field following payload is
+        * aligned to 32-bit boundary.
         * The packet needs padding if its payload is less than 26 bytes to
         * meet 64 bytes minimal ethernet frame length.
         */
        len += sizeof(struct ncsi_cmd_pkt_hdr) + 4;
-       if (nca->payload < 26)
-               len += 26;
-       else
-               len += nca->payload;
+       payload = ALIGN(nca->payload, 4);
+       len += max(payload, padding_bytes);
 
        /* Allocate skb */
        skb = alloc_skb(len, GFP_ATOMIC);
index e93c937..51ad557 100644 (file)
@@ -1919,7 +1919,6 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
        struct ip_vs_proto_data *pd;
        struct ip_vs_conn *cp;
        int ret, pkts;
-       int conn_reuse_mode;
        struct sock *sk;
        int af = state->pf;
 
@@ -1997,15 +1996,16 @@ ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state
        cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
                             ipvs, af, skb, &iph);
 
-       conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
-       if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+       if (!iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+               int conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
                bool old_ct = false, resched = false;
 
                if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
                    unlikely(!atomic_read(&cp->dest->weight))) {
                        resched = true;
                        old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
-               } else if (is_new_conn_expected(cp, conn_reuse_mode)) {
+               } else if (conn_reuse_mode &&
+                          is_new_conn_expected(cp, conn_reuse_mode)) {
                        old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
                        if (!atomic_read(&cp->n_control)) {
                                resched = true;
index f1e5443..c7708bd 100644 (file)
@@ -1011,11 +1011,9 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
                                                   CTA_TUPLE_REPLY,
                                                   filter->family,
                                                   &filter->zone,
-                                                  filter->orig_flags);
-               if (err < 0) {
-                       err = -EINVAL;
+                                                  filter->reply_flags);
+               if (err < 0)
                        goto err_filter;
-               }
        }
 
        return filter;
index d6bf1b2..b561e0a 100644 (file)
@@ -65,11 +65,11 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
                       sizeof(struct in6_addr));
                if (memcmp(&key->enc_ipv6.src, &in6addr_any,
                           sizeof(struct in6_addr)))
-                       memset(&key->enc_ipv6.src, 0xff,
+                       memset(&mask->enc_ipv6.src, 0xff,
                               sizeof(struct in6_addr));
                if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
                           sizeof(struct in6_addr)))
-                       memset(&key->enc_ipv6.dst, 0xff,
+                       memset(&mask->enc_ipv6.dst, 0xff,
                               sizeof(struct in6_addr));
                enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
                key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
index cbfe4e4..bd68993 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/icmpv6.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
-#include <linux/ip.h>
 #include <net/sctp/checksum.h>
 
 static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off,
index 2f7cf5e..0f8bb0b 100644 (file)
@@ -85,9 +85,9 @@ static ssize_t idletimer_tg_show(struct device *dev,
        mutex_unlock(&list_mutex);
 
        if (time_after(expires, jiffies) || ktimespec.tv_sec > 0)
-               return snprintf(buf, PAGE_SIZE, "%ld\n", time_diff);
+               return sysfs_emit(buf, "%ld\n", time_diff);
 
-       return snprintf(buf, PAGE_SIZE, "0\n");
+       return sysfs_emit(buf, "0\n");
 }
 
 static void idletimer_tg_work(struct work_struct *work)
index 3c645c1..dc7a240 100644 (file)
@@ -94,13 +94,13 @@ int nfc_dev_up(struct nfc_dev *dev)
 
        device_lock(&dev->dev);
 
-       if (dev->rfkill && rfkill_blocked(dev->rfkill)) {
-               rc = -ERFKILL;
+       if (!device_is_registered(&dev->dev)) {
+               rc = -ENODEV;
                goto error;
        }
 
-       if (!device_is_registered(&dev->dev)) {
-               rc = -ENODEV;
+       if (dev->rfkill && rfkill_blocked(dev->rfkill)) {
+               rc = -ERFKILL;
                goto error;
        }
 
@@ -1125,11 +1125,7 @@ int nfc_register_device(struct nfc_dev *dev)
        if (rc)
                pr_err("Could not register llcp device\n");
 
-       rc = nfc_genl_device_added(dev);
-       if (rc)
-               pr_debug("The userspace won't be notified that the device %s was added\n",
-                        dev_name(&dev->dev));
-
+       device_lock(&dev->dev);
        dev->rfkill = rfkill_alloc(dev_name(&dev->dev), &dev->dev,
                                   RFKILL_TYPE_NFC, &nfc_rfkill_ops, dev);
        if (dev->rfkill) {
@@ -1138,6 +1134,12 @@ int nfc_register_device(struct nfc_dev *dev)
                        dev->rfkill = NULL;
                }
        }
+       device_unlock(&dev->dev);
+
+       rc = nfc_genl_device_added(dev);
+       if (rc)
+               pr_debug("The userspace won't be notified that the device %s was added\n",
+                        dev_name(&dev->dev));
 
        return 0;
 }
@@ -1154,10 +1156,17 @@ void nfc_unregister_device(struct nfc_dev *dev)
 
        pr_debug("dev_name=%s\n", dev_name(&dev->dev));
 
+       rc = nfc_genl_device_removed(dev);
+       if (rc)
+               pr_debug("The userspace won't be notified that the device %s "
+                        "was removed\n", dev_name(&dev->dev));
+
+       device_lock(&dev->dev);
        if (dev->rfkill) {
                rfkill_unregister(dev->rfkill);
                rfkill_destroy(dev->rfkill);
        }
+       device_unlock(&dev->dev);
 
        if (dev->ops->check_presence) {
                device_lock(&dev->dev);
@@ -1167,11 +1176,6 @@ void nfc_unregister_device(struct nfc_dev *dev)
                cancel_work_sync(&dev->check_pres_work);
        }
 
-       rc = nfc_genl_device_removed(dev);
-       if (rc)
-               pr_debug("The userspace won't be notified that the device %s "
-                        "was removed\n", dev_name(&dev->dev));
-
        nfc_llcp_unregister_device(dev);
 
        mutex_lock(&nfc_devlist_mutex);
index 6fd873a..d253738 100644 (file)
@@ -144,12 +144,15 @@ inline int nci_request(struct nci_dev *ndev,
 {
        int rc;
 
-       if (!test_bit(NCI_UP, &ndev->flags))
-               return -ENETDOWN;
-
        /* Serialize all requests */
        mutex_lock(&ndev->req_lock);
-       rc = __nci_request(ndev, req, opt, timeout);
+       /* check the state after obtaing the lock against any races
+        * from nci_close_device when the device gets removed.
+        */
+       if (test_bit(NCI_UP, &ndev->flags))
+               rc = __nci_request(ndev, req, opt, timeout);
+       else
+               rc = -ENETDOWN;
        mutex_unlock(&ndev->req_lock);
 
        return rc;
@@ -473,6 +476,11 @@ static int nci_open_device(struct nci_dev *ndev)
 
        mutex_lock(&ndev->req_lock);
 
+       if (test_bit(NCI_UNREG, &ndev->flags)) {
+               rc = -ENODEV;
+               goto done;
+       }
+
        if (test_bit(NCI_UP, &ndev->flags)) {
                rc = -EALREADY;
                goto done;
@@ -545,6 +553,10 @@ done:
 static int nci_close_device(struct nci_dev *ndev)
 {
        nci_req_cancel(ndev, ENODEV);
+
+       /* This mutex needs to be held as a barrier for
+        * caller nci_unregister_device
+        */
        mutex_lock(&ndev->req_lock);
 
        if (!test_and_clear_bit(NCI_UP, &ndev->flags)) {
@@ -582,8 +594,8 @@ static int nci_close_device(struct nci_dev *ndev)
 
        del_timer_sync(&ndev->cmd_timer);
 
-       /* Clear flags */
-       ndev->flags = 0;
+       /* Clear flags except NCI_UNREG */
+       ndev->flags &= BIT(NCI_UNREG);
 
        mutex_unlock(&ndev->req_lock);
 
@@ -1266,6 +1278,12 @@ void nci_unregister_device(struct nci_dev *ndev)
 {
        struct nci_conn_info *conn_info, *n;
 
+       /* This set_bit is not protected with specialized barrier,
+        * However, it is fine because the mutex_lock(&ndev->req_lock);
+        * in nci_close_device() will help to emit one.
+        */
+       set_bit(NCI_UNREG, &ndev->flags);
+
        nci_close_device(ndev);
 
        destroy_workqueue(ndev->cmd_wq);
index d64b0ee..efc963a 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/if_arp.h>
 #include <net/net_namespace.h>
 #include <net/netlink.h>
+#include <net/dst.h>
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
 #include <linux/tc_act/tc_mirred.h>
@@ -228,6 +229,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
        bool want_ingress;
        bool is_redirect;
        bool expects_nh;
+       bool at_ingress;
        int m_eaction;
        int mac_len;
        bool at_nh;
@@ -263,7 +265,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
         * ingress - that covers the TC S/W datapath.
         */
        is_redirect = tcf_mirred_is_act_redirect(m_eaction);
-       use_reinsert = skb_at_tc_ingress(skb) && is_redirect &&
+       at_ingress = skb_at_tc_ingress(skb);
+       use_reinsert = at_ingress && is_redirect &&
                       tcf_mirred_can_reinsert(retval);
        if (!use_reinsert) {
                skb2 = skb_clone(skb, GFP_ATOMIC);
@@ -271,10 +274,12 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
                        goto out;
        }
 
+       want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
+
        /* All mirred/redirected skbs should clear previous ct info */
        nf_reset_ct(skb2);
-
-       want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
+       if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */
+               skb_dst_drop(skb2);
 
        expects_nh = want_ingress || !m_mac_header_xmit;
        at_nh = skb->data == skb_network_header(skb);
index b61c802..2692cba 100644 (file)
@@ -585,7 +585,7 @@ static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
                 * to clcsocket->wq during the fallback.
                 */
                spin_lock_irqsave(&smc_wait->lock, flags);
-               spin_lock(&clc_wait->lock);
+               spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING);
                list_splice_init(&smc_wait->head, &clc_wait->head);
                spin_unlock(&clc_wait->lock);
                spin_unlock_irqrestore(&smc_wait->lock, flags);
index 0f9ffba..3715d2f 100644 (file)
@@ -228,6 +228,12 @@ again:
                        /* send close request */
                        rc = smc_close_final(conn);
                        sk->sk_state = SMC_PEERCLOSEWAIT1;
+
+                       /* actively shutdown clcsock before peer close it,
+                        * prevent peer from entering TIME_WAIT state.
+                        */
+                       if (smc->clcsock && smc->clcsock->sk)
+                               rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
                } else {
                        /* peer event has changed the state */
                        goto again;
@@ -354,9 +360,9 @@ static void smc_close_passive_work(struct work_struct *work)
        if (rxflags->peer_conn_abort) {
                /* peer has not received all data */
                smc_close_passive_abort_received(smc);
-               release_sock(&smc->sk);
+               release_sock(sk);
                cancel_delayed_work_sync(&conn->tx_work);
-               lock_sock(&smc->sk);
+               lock_sock(sk);
                goto wakeup;
        }
 
index e701651..b4d9419 100644 (file)
@@ -597,6 +597,10 @@ static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey,
        tmp->cloned = NULL;
        tmp->authsize = TIPC_AES_GCM_TAG_SIZE;
        tmp->key = kmemdup(ukey, tipc_aead_key_size(ukey), GFP_KERNEL);
+       if (!tmp->key) {
+               tipc_aead_free(&tmp->rcu);
+               return -ENOMEM;
+       }
        memcpy(&tmp->salt, ukey->key + keylen, TIPC_AES_GCM_SALT_SIZE);
        atomic_set(&tmp->users, 0);
        atomic64_set(&tmp->seqno, 0);
index 78e08e8..b0bfc78 100644 (file)
@@ -2882,9 +2882,6 @@ static int unix_shutdown(struct socket *sock, int mode)
 
        unix_state_lock(sk);
        sk->sk_shutdown |= mode;
-       if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
-           mode == SHUTDOWN_MASK)
-               sk->sk_state = TCP_CLOSE;
        other = unix_peer(sk);
        if (other)
                sock_hold(other);
index 4f7c99d..3f82b2f 100644 (file)
@@ -731,6 +731,7 @@ static unsigned int features[] = {
 static struct virtio_driver virtio_vsock_driver = {
        .feature_table = features,
        .feature_table_size = ARRAY_SIZE(features),
+       .suppress_used_validation = true,
        .driver.name = KBUILD_MODNAME,
        .driver.owner = THIS_MODULE,
        .id_table = id_table,
index 81232b7..a27b3b5 100644 (file)
@@ -936,33 +936,37 @@ nl80211_packet_pattern_policy[MAX_NL80211_PKTPAT + 1] = {
        [NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 },
 };
 
-int nl80211_prepare_wdev_dump(struct netlink_callback *cb,
-                             struct cfg80211_registered_device **rdev,
-                             struct wireless_dev **wdev)
+static int nl80211_prepare_wdev_dump(struct netlink_callback *cb,
+                                    struct cfg80211_registered_device **rdev,
+                                    struct wireless_dev **wdev,
+                                    struct nlattr **attrbuf)
 {
        int err;
 
        if (!cb->args[0]) {
-               struct nlattr **attrbuf;
+               struct nlattr **attrbuf_free = NULL;
 
-               attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf),
-                                 GFP_KERNEL);
-               if (!attrbuf)
-                       return -ENOMEM;
+               if (!attrbuf) {
+                       attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf),
+                                         GFP_KERNEL);
+                       if (!attrbuf)
+                               return -ENOMEM;
+                       attrbuf_free = attrbuf;
+               }
 
                err = nlmsg_parse_deprecated(cb->nlh,
                                             GENL_HDRLEN + nl80211_fam.hdrsize,
                                             attrbuf, nl80211_fam.maxattr,
                                             nl80211_policy, NULL);
                if (err) {
-                       kfree(attrbuf);
+                       kfree(attrbuf_free);
                        return err;
                }
 
                rtnl_lock();
                *wdev = __cfg80211_wdev_from_attrs(NULL, sock_net(cb->skb->sk),
                                                   attrbuf);
-               kfree(attrbuf);
+               kfree(attrbuf_free);
                if (IS_ERR(*wdev)) {
                        rtnl_unlock();
                        return PTR_ERR(*wdev);
@@ -6197,7 +6201,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
        int sta_idx = cb->args[2];
        int err;
 
-       err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
+       err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL);
        if (err)
                return err;
        /* nl80211_prepare_wdev_dump acquired it in the successful case */
@@ -7092,7 +7096,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
        int path_idx = cb->args[2];
        int err;
 
-       err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
+       err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL);
        if (err)
                return err;
        /* nl80211_prepare_wdev_dump acquired it in the successful case */
@@ -7292,7 +7296,7 @@ static int nl80211_dump_mpp(struct sk_buff *skb,
        int path_idx = cb->args[2];
        int err;
 
-       err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
+       err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL);
        if (err)
                return err;
        /* nl80211_prepare_wdev_dump acquired it in the successful case */
@@ -9718,7 +9722,7 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
        int start = cb->args[2], idx = 0;
        int err;
 
-       err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
+       err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL);
        if (err)
                return err;
        /* nl80211_prepare_wdev_dump acquired it in the successful case */
@@ -9851,7 +9855,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
        if (!attrbuf)
                return -ENOMEM;
 
-       res = nl80211_prepare_wdev_dump(cb, &rdev, &wdev);
+       res = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, attrbuf);
        if (res) {
                kfree(attrbuf);
                return res;
index a3f3877..d642e3b 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Portions of this file
- * Copyright (C) 2018, 2020 Intel Corporation
+ * Copyright (C) 2018, 2020-2021 Intel Corporation
  */
 #ifndef __NET_WIRELESS_NL80211_H
 #define __NET_WIRELESS_NL80211_H
@@ -22,10 +22,6 @@ static inline u64 wdev_id(struct wireless_dev *wdev)
               ((u64)wiphy_to_rdev(wdev->wiphy)->wiphy_idx << 32);
 }
 
-int nl80211_prepare_wdev_dump(struct netlink_callback *cb,
-                             struct cfg80211_registered_device **rdev,
-                             struct wireless_dev **wdev);
-
 int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
                          struct genl_info *info,
                          struct cfg80211_chan_def *chandef);
index 5ff1f87..41ea65d 100644 (file)
@@ -1046,6 +1046,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 
                switch (otype) {
                case NL80211_IFTYPE_AP:
+               case NL80211_IFTYPE_P2P_GO:
                        cfg80211_stop_ap(rdev, dev, true);
                        break;
                case NL80211_IFTYPE_ADHOC:
index 90c4e1e..bc4ad48 100644 (file)
@@ -500,7 +500,7 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
                pool->free_list_cnt--;
                xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk,
                                        free_list_node);
-               list_del(&xskb->free_list_node);
+               list_del_init(&xskb->free_list_node);
        }
 
        xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
@@ -568,7 +568,7 @@ static u32 xp_alloc_reused(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u3
        i = nb_entries;
        while (i--) {
                xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk, free_list_node);
-               list_del(&xskb->free_list_node);
+               list_del_init(&xskb->free_list_node);
 
                *xdp = &xskb->xdp;
                xdp++;
@@ -615,6 +615,9 @@ EXPORT_SYMBOL(xp_can_alloc);
 
 void xp_free(struct xdp_buff_xsk *xskb)
 {
+       if (!list_empty(&xskb->free_list_node))
+               return;
+
        xskb->pool->free_list_cnt++;
        list_add(&xskb->free_list_node, &xskb->pool->free_list);
 }
index 722b3fa..1752a46 100644 (file)
@@ -9,8 +9,6 @@
  * Include file for sample Host Bandwidth Manager (HBM) BPF programs
  */
 #define KBUILD_MODNAME "foo"
-#include <stddef.h>
-#include <stdbool.h>
 #include <uapi/linux/bpf.h>
 #include <uapi/linux/if_ether.h>
 #include <uapi/linux/if_packet.h>
index d84e694..a81704d 100644 (file)
@@ -309,7 +309,6 @@ int main(int argc, char **argv)
        const char *mprog_filename = NULL, *mprog_name = NULL;
        struct xdp_redirect_cpu *skel;
        struct bpf_map_info info = {};
-       char ifname_buf[IF_NAMESIZE];
        struct bpf_cpumap_val value;
        __u32 infosz = sizeof(info);
        int ret = EXIT_FAIL_OPTION;
@@ -390,10 +389,10 @@ int main(int argc, char **argv)
                case 'd':
                        if (strlen(optarg) >= IF_NAMESIZE) {
                                fprintf(stderr, "-d/--dev name too long\n");
+                               usage(argv, long_options, __doc__, mask, true, skel->obj);
                                goto end_cpu;
                        }
-                       safe_strncpy(ifname_buf, optarg, strlen(ifname_buf));
-                       ifindex = if_nametoindex(ifname_buf);
+                       ifindex = if_nametoindex(optarg);
                        if (!ifindex)
                                ifindex = strtoul(optarg, NULL, 0);
                        if (!ifindex) {
index b32d821..8740838 100644 (file)
@@ -120,7 +120,10 @@ struct sample_output {
                __u64 xmit;
        } totals;
        struct {
-               __u64 pps;
+               union {
+                       __u64 pps;
+                       __u64 num;
+               };
                __u64 drop;
                __u64 err;
        } rx_cnt;
@@ -1322,7 +1325,7 @@ int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic,
 
 static void sample_summary_print(void)
 {
-       double period = sample_out.rx_cnt.pps;
+       double num = sample_out.rx_cnt.num;
 
        if (sample_out.totals.rx) {
                double pkts = sample_out.totals.rx;
@@ -1330,7 +1333,7 @@ static void sample_summary_print(void)
                print_always("  Packets received    : %'-10llu\n",
                             sample_out.totals.rx);
                print_always("  Average packets/s   : %'-10.0f\n",
-                            sample_round(pkts / period));
+                            sample_round(pkts / num));
        }
        if (sample_out.totals.redir) {
                double pkts = sample_out.totals.redir;
@@ -1338,7 +1341,7 @@ static void sample_summary_print(void)
                print_always("  Packets redirected  : %'-10llu\n",
                             sample_out.totals.redir);
                print_always("  Average redir/s     : %'-10.0f\n",
-                            sample_round(pkts / period));
+                            sample_round(pkts / num));
        }
        if (sample_out.totals.drop)
                print_always("  Rx dropped          : %'-10llu\n",
@@ -1355,7 +1358,7 @@ static void sample_summary_print(void)
                print_always("  Packets transmitted : %'-10llu\n",
                             sample_out.totals.xmit);
                print_always("  Average transmit/s  : %'-10.0f\n",
-                            sample_round(pkts / period));
+                            sample_round(pkts / num));
        }
 }
 
@@ -1422,7 +1425,7 @@ static int sample_stats_collect(struct stats_record *rec)
        return 0;
 }
 
-static void sample_summary_update(struct sample_output *out, int interval)
+static void sample_summary_update(struct sample_output *out)
 {
        sample_out.totals.rx += out->totals.rx;
        sample_out.totals.redir += out->totals.redir;
@@ -1430,12 +1433,11 @@ static void sample_summary_update(struct sample_output *out, int interval)
        sample_out.totals.drop_xmit += out->totals.drop_xmit;
        sample_out.totals.err += out->totals.err;
        sample_out.totals.xmit += out->totals.xmit;
-       sample_out.rx_cnt.pps += interval;
+       sample_out.rx_cnt.num++;
 }
 
 static void sample_stats_print(int mask, struct stats_record *cur,
-                              struct stats_record *prev, char *prog_name,
-                              int interval)
+                              struct stats_record *prev, char *prog_name)
 {
        struct sample_output out = {};
 
@@ -1452,7 +1454,7 @@ static void sample_stats_print(int mask, struct stats_record *cur,
        else if (mask & SAMPLE_DEVMAP_XMIT_CNT_MULTI)
                stats_get_devmap_xmit_multi(cur, prev, 0, &out,
                                            mask & SAMPLE_DEVMAP_XMIT_CNT);
-       sample_summary_update(&out, interval);
+       sample_summary_update(&out);
 
        stats_print(prog_name, mask, cur, prev, &out);
 }
@@ -1495,7 +1497,7 @@ static void swap(struct stats_record **a, struct stats_record **b)
 }
 
 static int sample_timer_cb(int timerfd, struct stats_record **rec,
-                          struct stats_record **prev, int interval)
+                          struct stats_record **prev)
 {
        char line[64] = "Summary";
        int ret;
@@ -1524,7 +1526,7 @@ static int sample_timer_cb(int timerfd, struct stats_record **rec,
                snprintf(line, sizeof(line), "%s->%s", f ?: "?", t ?: "?");
        }
 
-       sample_stats_print(sample_mask, *rec, *prev, line, interval);
+       sample_stats_print(sample_mask, *rec, *prev, line);
        return 0;
 }
 
@@ -1579,7 +1581,7 @@ int sample_run(int interval, void (*post_cb)(void *), void *ctx)
                if (pfd[0].revents & POLLIN)
                        ret = sample_signal_cb();
                else if (pfd[1].revents & POLLIN)
-                       ret = sample_timer_cb(timerfd, &rec, &prev, interval);
+                       ret = sample_timer_cb(timerfd, &rec, &prev);
 
                if (ret)
                        break;
diff --git a/scripts/coccinelle/misc/do_div.cocci b/scripts/coccinelle/misc/do_div.cocci
new file mode 100644 (file)
index 0000000..79db083
--- /dev/null
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/// do_div() does a 64-by-32 division.
+/// When the divisor is long, unsigned long, u64, or s64,
+/// do_div() truncates it to 32 bits, this means it can test
+/// non-zero and be truncated to 0 for division on 64bit platforms.
+///
+//# This makes an effort to find those inappropriate do_div() calls.
+//
+// Confidence: Moderate
+// Copyright: (C) 2020 Wen Yang, Alibaba.
+// Comments:
+// Options: --no-includes --include-headers
+
+virtual context
+virtual org
+virtual report
+
+@initialize:python@
+@@
+
+def get_digit_type_and_value(str):
+    is_digit = False
+    value = 0
+
+    try:
+        if (str.isdigit()):
+           is_digit = True
+           value =  int(str, 0)
+        elif (str.upper().endswith('ULL')):
+           is_digit = True
+           value = int(str[:-3], 0)
+        elif (str.upper().endswith('LL')):
+           is_digit = True
+           value = int(str[:-2], 0)
+        elif (str.upper().endswith('UL')):
+           is_digit = True
+           value = int(str[:-2], 0)
+        elif (str.upper().endswith('L')):
+           is_digit = True
+           value = int(str[:-1], 0)
+        elif (str.upper().endswith('U')):
+           is_digit = True
+           value = int(str[:-1], 0)
+    except Exception as e:
+          print('Error:',e)
+          is_digit = False
+          value = 0
+    finally:
+        return is_digit, value
+
+def filter_out_safe_constants(str):
+    is_digit, value = get_digit_type_and_value(str)
+    if (is_digit):
+        if (value >= 0x100000000):
+            return True
+        else:
+            return False
+    else:
+        return True
+
+def construct_warnings(suggested_fun):
+    msg="WARNING: do_div() does a 64-by-32 division, please consider using %s instead."
+    return  msg % suggested_fun
+
+@depends on context@
+expression f;
+long l: script:python() { filter_out_safe_constants(l) };
+unsigned long ul : script:python() { filter_out_safe_constants(ul) };
+u64 ul64 : script:python() { filter_out_safe_constants(ul64) };
+s64 sl64 : script:python() { filter_out_safe_constants(sl64) };
+
+@@
+(
+* do_div(f, l);
+|
+* do_div(f, ul);
+|
+* do_div(f, ul64);
+|
+* do_div(f, sl64);
+)
+
+@r depends on (org || report)@
+expression f;
+position p;
+long l: script:python() { filter_out_safe_constants(l) };
+unsigned long ul : script:python() { filter_out_safe_constants(ul) };
+u64 ul64 : script:python() { filter_out_safe_constants(ul64) };
+s64 sl64 : script:python() { filter_out_safe_constants(sl64) };
+@@
+(
+do_div@p(f, l);
+|
+do_div@p(f, ul);
+|
+do_div@p(f, ul64);
+|
+do_div@p(f, sl64);
+)
+
+@script:python depends on org@
+p << r.p;
+ul << r.ul;
+@@
+
+coccilib.org.print_todo(p[0], construct_warnings("div64_ul"))
+
+@script:python depends on org@
+p << r.p;
+l << r.l;
+@@
+
+coccilib.org.print_todo(p[0], construct_warnings("div64_long"))
+
+@script:python depends on org@
+p << r.p;
+ul64 << r.ul64;
+@@
+
+coccilib.org.print_todo(p[0], construct_warnings("div64_u64"))
+
+@script:python depends on org@
+p << r.p;
+sl64 << r.sl64;
+@@
+
+coccilib.org.print_todo(p[0], construct_warnings("div64_s64"))
+
+@script:python depends on report@
+p << r.p;
+ul << r.ul;
+@@
+
+coccilib.report.print_report(p[0], construct_warnings("div64_ul"))
+
+@script:python depends on report@
+p << r.p;
+l << r.l;
+@@
+
+coccilib.report.print_report(p[0], construct_warnings("div64_long"))
+
+@script:python depends on report@
+p << r.p;
+sl64 << r.sl64;
+@@
+
+coccilib.report.print_report(p[0], construct_warnings("div64_s64"))
+
+@script:python depends on report@
+p << r.p;
+ul64 << r.ul64;
+@@
+
+coccilib.report.print_report(p[0], construct_warnings("div64_u64"))
index c3eb81c..0114c41 100755 (executable)
@@ -28,4 +28,9 @@ if [ -n "${building_out_of_srctree}" ]; then
        do
                rm -f arch/arm/boot/compressed/${f}
        done
+
+       for f in uart-ath79.c ashldi3.c bswapdi.c bswapsi.c
+       do
+               rm -f arch/mips/boot/compressed/${f}
+       done
 fi
index a7c4134..01e2650 100644 (file)
 
 #define MSR_IA32_BNDCFGS_RSVD          0x00000ffc
 
+#define MSR_IA32_XFD                   0x000001c4
+#define MSR_IA32_XFD_ERR               0x000001c5
 #define MSR_IA32_XSS                   0x00000da0
 
 #define MSR_IA32_APICBASE              0x0000001b
index 5a6aac9..754a078 100644 (file)
 #define ARCH_GET_CPUID         0x1011
 #define ARCH_SET_CPUID         0x1012
 
+#define ARCH_GET_XCOMP_SUPP    0x1021
+#define ARCH_GET_XCOMP_PERM    0x1022
+#define ARCH_REQ_XCOMP_PERM    0x1023
+
 #define ARCH_MAP_VDSO_X32      0x2001
 #define ARCH_MAP_VDSO_32       0x2002
 #define ARCH_MAP_VDSO_64       0x2003
index bbd1150..8791d0e 100644 (file)
@@ -88,5 +88,4 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OU
 
 $(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT)
        $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT)   \
-                   LIBBPF_OUTPUT=$(BPFOBJ_OUTPUT)                             \
-                   LIBBPF_DESTDIR=$(BPF_DESTDIR) CC=$(HOSTCC) LD=$(HOSTLD)
+                   CC=$(HOSTCC) LD=$(HOSTLD)
index 1c5fb86..4557a8b 100644 (file)
@@ -880,8 +880,11 @@ __SYSCALL(__NR_memfd_secret, sys_memfd_secret)
 #define __NR_process_mrelease 448
 __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
 
+#define __NR_futex_waitv 449
+__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
+
 #undef __NR_syscalls
-#define __NR_syscalls 449
+#define __NR_syscalls 450
 
 /*
  * 32 bit systems traditionally used different
index bde5860..914ebd9 100644 (file)
@@ -1522,6 +1522,12 @@ struct drm_i915_gem_caching {
 #define I915_TILING_NONE       0
 #define I915_TILING_X          1
 #define I915_TILING_Y          2
+/*
+ * Do not add new tiling types here.  The I915_TILING_* values are for
+ * de-tiling fence registers that no longer exist on modern platforms.  Although
+ * the hardware may support new types of tiling in general (e.g., Tile4), we
+ * do not need to add them to the uapi that is specific to now-defunct ioctls.
+ */
 #define I915_TILING_LAST       I915_TILING_Y
 
 #define I915_BIT_6_SWIZZLE_NONE                0
@@ -1824,6 +1830,7 @@ struct drm_i915_gem_context_param {
  * Extensions:
  *   i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE)
  *   i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
+ *   i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT)
  */
 #define I915_CONTEXT_PARAM_ENGINES     0xa
 
@@ -1846,6 +1853,55 @@ struct drm_i915_gem_context_param {
  * attempted to use it, never re-use this context param number.
  */
 #define I915_CONTEXT_PARAM_RINGSIZE    0xc
+
+/*
+ * I915_CONTEXT_PARAM_PROTECTED_CONTENT:
+ *
+ * Mark that the context makes use of protected content, which will result
+ * in the context being invalidated when the protected content session is.
+ * Given that the protected content session is killed on suspend, the device
+ * is kept awake for the lifetime of a protected context, so the user should
+ * make sure to dispose of them once done.
+ * This flag can only be set at context creation time and, when set to true,
+ * must be preceded by an explicit setting of I915_CONTEXT_PARAM_RECOVERABLE
+ * to false. This flag can't be set to true in conjunction with setting the
+ * I915_CONTEXT_PARAM_BANNABLE flag to false. Creation example:
+ *
+ * .. code-block:: C
+ *
+ *     struct drm_i915_gem_context_create_ext_setparam p_protected = {
+ *             .base = {
+ *                     .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+ *             },
+ *             .param = {
+ *                     .param = I915_CONTEXT_PARAM_PROTECTED_CONTENT,
+ *                     .value = 1,
+ *             }
+ *     };
+ *     struct drm_i915_gem_context_create_ext_setparam p_norecover = {
+ *             .base = {
+ *                     .name = I915_CONTEXT_CREATE_EXT_SETPARAM,
+ *                     .next_extension = to_user_pointer(&p_protected),
+ *             },
+ *             .param = {
+ *                     .param = I915_CONTEXT_PARAM_RECOVERABLE,
+ *                     .value = 0,
+ *             }
+ *     };
+ *     struct drm_i915_gem_context_create_ext create = {
+ *             .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS,
+ *             .extensions = to_user_pointer(&p_norecover);
+ *     };
+ *
+ *     ctx_id = gem_context_create_ext(drm_fd, &create);
+ *
+ * In addition to the normal failure cases, setting this flag during context
+ * creation can result in the following errors:
+ *
+ * -ENODEV: feature not available
+ * -EPERM: trying to mark a recoverable or not bannable context as protected
+ */
+#define I915_CONTEXT_PARAM_PROTECTED_CONTENT    0xd
 /* Must be kept compact -- no holes and well documented */
 
        __u64 value;
@@ -2050,6 +2106,135 @@ struct i915_context_engines_bond {
 } __attribute__((packed)) name__
 
 /**
+ * struct i915_context_engines_parallel_submit - Configure engine for
+ * parallel submission.
+ *
+ * Setup a slot in the context engine map to allow multiple BBs to be submitted
+ * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU
+ * in parallel. Multiple hardware contexts are created internally in the i915 to
+ * run these BBs. Once a slot is configured for N BBs only N BBs can be
+ * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user
+ * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how
+ * many BBs there are based on the slot's configuration. The N BBs are the last
+ * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set.
+ *
+ * The default placement behavior is to create implicit bonds between each
+ * context if each context maps to more than 1 physical engine (e.g. context is
+ * a virtual engine). Also we only allow contexts of same engine class and these
+ * contexts must be in logically contiguous order. Examples of the placement
+ * behavior are described below. Lastly, the default is to not allow BBs to be
+ * preempted mid-batch. Rather insert coordinated preemption points on all
+ * hardware contexts between each set of BBs. Flags could be added in the future
+ * to change both of these default behaviors.
+ *
+ * Returns -EINVAL if hardware context placement configuration is invalid or if
+ * the placement configuration isn't supported on the platform / submission
+ * interface.
+ * Returns -ENODEV if extension isn't supported on the platform / submission
+ * interface.
+ *
+ * .. code-block:: none
+ *
+ *     Examples syntax:
+ *     CS[X] = generic engine of same class, logical instance X
+ *     INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE
+ *
+ *     Example 1 pseudo code:
+ *     set_engines(INVALID)
+ *     set_parallel(engine_index=0, width=2, num_siblings=1,
+ *                  engines=CS[0],CS[1])
+ *
+ *     Results in the following valid placement:
+ *     CS[0], CS[1]
+ *
+ *     Example 2 pseudo code:
+ *     set_engines(INVALID)
+ *     set_parallel(engine_index=0, width=2, num_siblings=2,
+ *                  engines=CS[0],CS[2],CS[1],CS[3])
+ *
+ *     Results in the following valid placements:
+ *     CS[0], CS[1]
+ *     CS[2], CS[3]
+ *
+ *     This can be thought of as two virtual engines, each containing two
+ *     engines thereby making a 2D array. However, there are bonds tying the
+ *     entries together and placing restrictions on how they can be scheduled.
+ *     Specifically, the scheduler can choose only vertical columns from the 2D
+ *     array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the
+ *     scheduler wants to submit to CS[0], it must also choose CS[1] and vice
+ *     versa. Same for CS[2] requires also using CS[3].
+ *     VE[0] = CS[0], CS[2]
+ *     VE[1] = CS[1], CS[3]
+ *
+ *     Example 3 pseudo code:
+ *     set_engines(INVALID)
+ *     set_parallel(engine_index=0, width=2, num_siblings=2,
+ *                  engines=CS[0],CS[1],CS[1],CS[3])
+ *
+ *     Results in the following valid and invalid placements:
+ *     CS[0], CS[1]
+ *     CS[1], CS[3] - Not logically contiguous, return -EINVAL
+ */
+struct i915_context_engines_parallel_submit {
+       /**
+        * @base: base user extension.
+        */
+       struct i915_user_extension base;
+
+       /**
+        * @engine_index: slot for parallel engine
+        */
+       __u16 engine_index;
+
+       /**
+        * @width: number of contexts per parallel engine or in other words the
+        * number of batches in each submission
+        */
+       __u16 width;
+
+       /**
+        * @num_siblings: number of siblings per context or in other words the
+        * number of possible placements for each submission
+        */
+       __u16 num_siblings;
+
+       /**
+        * @mbz16: reserved for future use; must be zero
+        */
+       __u16 mbz16;
+
+       /**
+        * @flags: all undefined flags must be zero, currently not defined flags
+        */
+       __u64 flags;
+
+       /**
+        * @mbz64: reserved for future use; must be zero
+        */
+       __u64 mbz64[3];
+
+       /**
+        * @engines: 2-d array of engine instances to configure parallel engine
+        *
+        * length = width (i) * num_siblings (j)
+        * index = j + i * num_siblings
+        */
+       struct i915_engine_class_instance engines[0];
+
+} __packed;
+
+#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \
+       struct i915_user_extension base; \
+       __u16 engine_index; \
+       __u16 width; \
+       __u16 num_siblings; \
+       __u16 mbz16; \
+       __u64 flags; \
+       __u64 mbz64[3]; \
+       struct i915_engine_class_instance engines[N__]; \
+} __attribute__((packed)) name__
+
+/**
  * DOC: Context Engine Map uAPI
  *
  * Context engine map is a new way of addressing engines when submitting batch-
@@ -2108,6 +2293,7 @@ struct i915_context_param_engines {
        __u64 extensions; /* linked chain of extension blocks, 0 terminates */
 #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */
 #define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */
+#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */
        struct i915_engine_class_instance engines[0];
 } __attribute__((packed));
 
@@ -2726,14 +2912,20 @@ struct drm_i915_engine_info {
 
        /** @flags: Engine flags. */
        __u64 flags;
+#define I915_ENGINE_INFO_HAS_LOGICAL_INSTANCE          (1 << 0)
 
        /** @capabilities: Capabilities of this engine. */
        __u64 capabilities;
 #define I915_VIDEO_CLASS_CAPABILITY_HEVC               (1 << 0)
 #define I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC    (1 << 1)
 
+       /** @logical_instance: Logical instance of engine */
+       __u16 logical_instance;
+
        /** @rsvd1: Reserved fields. */
-       __u64 rsvd1[4];
+       __u16 rsvd1[3];
+       /** @rsvd2: Reserved fields. */
+       __u64 rsvd2[3];
 };
 
 /**
@@ -2979,8 +3171,12 @@ struct drm_i915_gem_create_ext {
         *
         * For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see
         * struct drm_i915_gem_create_ext_memory_regions.
+        *
+        * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
+        * struct drm_i915_gem_create_ext_protected_content.
         */
 #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
+#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
        __u64 extensions;
 };
 
@@ -3038,6 +3234,50 @@ struct drm_i915_gem_create_ext_memory_regions {
        __u64 regions;
 };
 
+/**
+ * struct drm_i915_gem_create_ext_protected_content - The
+ * I915_OBJECT_PARAM_PROTECTED_CONTENT extension.
+ *
+ * If this extension is provided, buffer contents are expected to be protected
+ * by PXP encryption and require decryption for scan out and processing. This
+ * is only possible on platforms that have PXP enabled, on all other scenarios
+ * using this extension will cause the ioctl to fail and return -ENODEV. The
+ * flags parameter is reserved for future expansion and must currently be set
+ * to zero.
+ *
+ * The buffer contents are considered invalid after a PXP session teardown.
+ *
+ * The encryption is guaranteed to be processed correctly only if the object
+ * is submitted with a context created using the
+ * I915_CONTEXT_PARAM_PROTECTED_CONTENT flag. This will also enable extra checks
+ * at submission time on the validity of the objects involved.
+ *
+ * Below is an example on how to create a protected object:
+ *
+ * .. code-block:: C
+ *
+ *      struct drm_i915_gem_create_ext_protected_content protected_ext = {
+ *              .base = { .name = I915_GEM_CREATE_EXT_PROTECTED_CONTENT },
+ *              .flags = 0,
+ *      };
+ *      struct drm_i915_gem_create_ext create_ext = {
+ *              .size = PAGE_SIZE,
+ *              .extensions = (uintptr_t)&protected_ext,
+ *      };
+ *
+ *      int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
+ *      if (err) ...
+ */
+struct drm_i915_gem_create_ext_protected_content {
+       /** @base: Extension link. See struct i915_user_extension. */
+       struct i915_user_extension base;
+       /** @flags: reserved for future usage, currently MBZ */
+       __u32 flags;
+};
+
+/* ID of the protected content session managed by i915 when PXP is active */
+#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
+
 #if defined(__cplusplus)
 }
 #endif
index b3610fd..eebd389 100644 (file)
@@ -7,24 +7,23 @@
 
 /* This struct should be in sync with struct rtnl_link_stats64 */
 struct rtnl_link_stats {
-       __u32   rx_packets;             /* total packets received       */
-       __u32   tx_packets;             /* total packets transmitted    */
-       __u32   rx_bytes;               /* total bytes received         */
-       __u32   tx_bytes;               /* total bytes transmitted      */
-       __u32   rx_errors;              /* bad packets received         */
-       __u32   tx_errors;              /* packet transmit problems     */
-       __u32   rx_dropped;             /* no space in linux buffers    */
-       __u32   tx_dropped;             /* no space available in linux  */
-       __u32   multicast;              /* multicast packets received   */
+       __u32   rx_packets;
+       __u32   tx_packets;
+       __u32   rx_bytes;
+       __u32   tx_bytes;
+       __u32   rx_errors;
+       __u32   tx_errors;
+       __u32   rx_dropped;
+       __u32   tx_dropped;
+       __u32   multicast;
        __u32   collisions;
-
        /* detailed rx_errors: */
        __u32   rx_length_errors;
-       __u32   rx_over_errors;         /* receiver ring buff overflow  */
-       __u32   rx_crc_errors;          /* recved pkt with crc error    */
-       __u32   rx_frame_errors;        /* recv'd frame alignment error */
-       __u32   rx_fifo_errors;         /* recv'r fifo overrun          */
-       __u32   rx_missed_errors;       /* receiver missed packet       */
+       __u32   rx_over_errors;
+       __u32   rx_crc_errors;
+       __u32   rx_frame_errors;
+       __u32   rx_fifo_errors;
+       __u32   rx_missed_errors;
 
        /* detailed tx_errors */
        __u32   tx_aborted_errors;
@@ -37,29 +36,201 @@ struct rtnl_link_stats {
        __u32   rx_compressed;
        __u32   tx_compressed;
 
-       __u32   rx_nohandler;           /* dropped, no handler found    */
+       __u32   rx_nohandler;
 };
 
-/* The main device statistics structure */
+/**
+ * struct rtnl_link_stats64 - The main device statistics structure.
+ *
+ * @rx_packets: Number of good packets received by the interface.
+ *   For hardware interfaces counts all good packets received from the device
+ *   by the host, including packets which host had to drop at various stages
+ *   of processing (even in the driver).
+ *
+ * @tx_packets: Number of packets successfully transmitted.
+ *   For hardware interfaces counts packets which host was able to successfully
+ *   hand over to the device, which does not necessarily mean that packets
+ *   had been successfully transmitted out of the device, only that device
+ *   acknowledged it copied them out of host memory.
+ *
+ * @rx_bytes: Number of good received bytes, corresponding to @rx_packets.
+ *
+ *   For IEEE 802.3 devices should count the length of Ethernet Frames
+ *   excluding the FCS.
+ *
+ * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets.
+ *
+ *   For IEEE 802.3 devices should count the length of Ethernet Frames
+ *   excluding the FCS.
+ *
+ * @rx_errors: Total number of bad packets received on this network device.
+ *   This counter must include events counted by @rx_length_errors,
+ *   @rx_crc_errors, @rx_frame_errors and other errors not otherwise
+ *   counted.
+ *
+ * @tx_errors: Total number of transmit problems.
+ *   This counter must include events counter by @tx_aborted_errors,
+ *   @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors,
+ *   @tx_window_errors and other errors not otherwise counted.
+ *
+ * @rx_dropped: Number of packets received but not processed,
+ *   e.g. due to lack of resources or unsupported protocol.
+ *   For hardware interfaces this counter may include packets discarded
+ *   due to L2 address filtering but should not include packets dropped
+ *   by the device due to buffer exhaustion which are counted separately in
+ *   @rx_missed_errors (since procfs folds those two counters together).
+ *
+ * @tx_dropped: Number of packets dropped on their way to transmission,
+ *   e.g. due to lack of resources.
+ *
+ * @multicast: Multicast packets received.
+ *   For hardware interfaces this statistic is commonly calculated
+ *   at the device level (unlike @rx_packets) and therefore may include
+ *   packets which did not reach the host.
+ *
+ *   For IEEE 802.3 devices this counter may be equivalent to:
+ *
+ *    - 30.3.1.1.21 aMulticastFramesReceivedOK
+ *
+ * @collisions: Number of collisions during packet transmissions.
+ *
+ * @rx_length_errors: Number of packets dropped due to invalid length.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter should be equivalent to a sum
+ *   of the following attributes:
+ *
+ *    - 30.3.1.1.23 aInRangeLengthErrors
+ *    - 30.3.1.1.24 aOutOfRangeLengthField
+ *    - 30.3.1.1.25 aFrameTooLongErrors
+ *
+ * @rx_over_errors: Receiver FIFO overflow event counter.
+ *
+ *   Historically the count of overflow events. Such events may be
+ *   reported in the receive descriptors or via interrupts, and may
+ *   not correspond one-to-one with dropped packets.
+ *
+ *   The recommended interpretation for high speed interfaces is -
+ *   number of packets dropped because they did not fit into buffers
+ *   provided by the host, e.g. packets larger than MTU or next buffer
+ *   in the ring was not available for a scatter transfer.
+ *
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   This statistics was historically used interchangeably with
+ *   @rx_fifo_errors.
+ *
+ *   This statistic corresponds to hardware events and is not commonly used
+ *   on software devices.
+ *
+ * @rx_crc_errors: Number of packets received with a CRC error.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.6 aFrameCheckSequenceErrors
+ *
+ * @rx_frame_errors: Receiver frame alignment errors.
+ *   Part of aggregate "frame" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter should be equivalent to:
+ *
+ *    - 30.3.1.1.7 aAlignmentErrors
+ *
+ * @rx_fifo_errors: Receiver FIFO error counter.
+ *
+ *   Historically the count of overflow events. Those events may be
+ *   reported in the receive descriptors or via interrupts, and may
+ *   not correspond one-to-one with dropped packets.
+ *
+ *   This statistics was used interchangeably with @rx_over_errors.
+ *   Not recommended for use in drivers for high speed interfaces.
+ *
+ *   This statistic is used on software devices, e.g. to count software
+ *   packet queue overflow (can) or sequencing errors (GRE).
+ *
+ * @rx_missed_errors: Count of packets missed by the host.
+ *   Folded into the "drop" counter in `/proc/net/dev`.
+ *
+ *   Counts number of packets dropped by the device due to lack
+ *   of buffer space. This usually indicates that the host interface
+ *   is slower than the network interface, or host is not keeping up
+ *   with the receive packet rate.
+ *
+ *   This statistic corresponds to hardware events and is not used
+ *   on software devices.
+ *
+ * @tx_aborted_errors:
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *   For IEEE 802.3 devices capable of half-duplex operation this counter
+ *   must be equivalent to:
+ *
+ *    - 30.3.1.1.11 aFramesAbortedDueToXSColls
+ *
+ *   High speed interfaces may use this counter as a general device
+ *   discard counter.
+ *
+ * @tx_carrier_errors: Number of frame transmission errors due to loss
+ *   of carrier during transmission.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.13 aCarrierSenseErrors
+ *
+ * @tx_fifo_errors: Number of frame transmission errors due to device
+ *   FIFO underrun / underflow. This condition occurs when the device
+ *   begins transmission of a frame but is unable to deliver the
+ *   entire frame to the transmitter in time for transmission.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for
+ *   old half-duplex Ethernet.
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices possibly equivalent to:
+ *
+ *    - 30.3.2.1.4 aSQETestErrors
+ *
+ * @tx_window_errors: Number of frame transmission errors due
+ *   to late collisions (for Ethernet - after the first 64B of transmission).
+ *   Part of aggregate "carrier" errors in `/proc/net/dev`.
+ *
+ *   For IEEE 802.3 devices this counter must be equivalent to:
+ *
+ *    - 30.3.1.1.10 aLateCollisions
+ *
+ * @rx_compressed: Number of correctly received compressed packets.
+ *   This counters is only meaningful for interfaces which support
+ *   packet compression (e.g. CSLIP, PPP).
+ *
+ * @tx_compressed: Number of transmitted compressed packets.
+ *   This counters is only meaningful for interfaces which support
+ *   packet compression (e.g. CSLIP, PPP).
+ *
+ * @rx_nohandler: Number of packets received on the interface
+ *   but dropped by the networking stack because the device is
+ *   not designated to receive packets (e.g. backup link in a bond).
+ */
 struct rtnl_link_stats64 {
-       __u64   rx_packets;             /* total packets received       */
-       __u64   tx_packets;             /* total packets transmitted    */
-       __u64   rx_bytes;               /* total bytes received         */
-       __u64   tx_bytes;               /* total bytes transmitted      */
-       __u64   rx_errors;              /* bad packets received         */
-       __u64   tx_errors;              /* packet transmit problems     */
-       __u64   rx_dropped;             /* no space in linux buffers    */
-       __u64   tx_dropped;             /* no space available in linux  */
-       __u64   multicast;              /* multicast packets received   */
+       __u64   rx_packets;
+       __u64   tx_packets;
+       __u64   rx_bytes;
+       __u64   tx_bytes;
+       __u64   rx_errors;
+       __u64   tx_errors;
+       __u64   rx_dropped;
+       __u64   tx_dropped;
+       __u64   multicast;
        __u64   collisions;
 
        /* detailed rx_errors: */
        __u64   rx_length_errors;
-       __u64   rx_over_errors;         /* receiver ring buff overflow  */
-       __u64   rx_crc_errors;          /* recved pkt with crc error    */
-       __u64   rx_frame_errors;        /* recv'd frame alignment error */
-       __u64   rx_fifo_errors;         /* recv'r fifo overrun          */
-       __u64   rx_missed_errors;       /* receiver missed packet       */
+       __u64   rx_over_errors;
+       __u64   rx_crc_errors;
+       __u64   rx_frame_errors;
+       __u64   rx_fifo_errors;
+       __u64   rx_missed_errors;
 
        /* detailed tx_errors */
        __u64   tx_aborted_errors;
@@ -71,8 +242,7 @@ struct rtnl_link_stats64 {
        /* for cslip etc */
        __u64   rx_compressed;
        __u64   tx_compressed;
-
-       __u64   rx_nohandler;           /* dropped, no handler found    */
+       __u64   rx_nohandler;
 };
 
 /* The struct should be in sync with struct ifmap */
@@ -170,12 +340,29 @@ enum {
        IFLA_PROP_LIST,
        IFLA_ALT_IFNAME, /* Alternative ifname */
        IFLA_PERM_ADDRESS,
+       IFLA_PROTO_DOWN_REASON,
+
+       /* device (sysfs) name as parent, used instead
+        * of IFLA_LINK where there's no parent netdev
+        */
+       IFLA_PARENT_DEV_NAME,
+       IFLA_PARENT_DEV_BUS_NAME,
+
        __IFLA_MAX
 };
 
 
 #define IFLA_MAX (__IFLA_MAX - 1)
 
+enum {
+       IFLA_PROTO_DOWN_REASON_UNSPEC,
+       IFLA_PROTO_DOWN_REASON_MASK,    /* u32, mask for reason bits */
+       IFLA_PROTO_DOWN_REASON_VALUE,   /* u32, reason bit value */
+
+       __IFLA_PROTO_DOWN_REASON_CNT,
+       IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1
+};
+
 /* backwards compatibility for userspace */
 #ifndef __KERNEL__
 #define IFLA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg))))
@@ -293,6 +480,7 @@ enum {
        IFLA_BR_MCAST_MLD_VERSION,
        IFLA_BR_VLAN_STATS_PER_PORT,
        IFLA_BR_MULTI_BOOLOPT,
+       IFLA_BR_MCAST_QUERIER_STATE,
        __IFLA_BR_MAX,
 };
 
@@ -346,6 +534,8 @@ enum {
        IFLA_BRPORT_BACKUP_PORT,
        IFLA_BRPORT_MRP_RING_OPEN,
        IFLA_BRPORT_MRP_IN_OPEN,
+       IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
+       IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
        __IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
@@ -433,6 +623,7 @@ enum macvlan_macaddr_mode {
 };
 
 #define MACVLAN_FLAG_NOPROMISC 1
+#define MACVLAN_FLAG_NODST     2 /* skip dst macvlan if matching src macvlan */
 
 /* VRF section */
 enum {
@@ -597,6 +788,18 @@ enum ifla_geneve_df {
        GENEVE_DF_MAX = __GENEVE_DF_END - 1,
 };
 
+/* Bareudp section  */
+enum {
+       IFLA_BAREUDP_UNSPEC,
+       IFLA_BAREUDP_PORT,
+       IFLA_BAREUDP_ETHERTYPE,
+       IFLA_BAREUDP_SRCPORT_MIN,
+       IFLA_BAREUDP_MULTIPROTO_MODE,
+       __IFLA_BAREUDP_MAX
+};
+
+#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1)
+
 /* PPP section */
 enum {
        IFLA_PPP_UNSPEC,
@@ -899,7 +1102,14 @@ enum {
 #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)
 
 
-/* HSR section */
+/* HSR/PRP section, both uses same interface */
+
+/* Different redundancy protocols for hsr device */
+enum {
+       HSR_PROTOCOL_HSR,
+       HSR_PROTOCOL_PRP,
+       HSR_PROTOCOL_MAX,
+};
 
 enum {
        IFLA_HSR_UNSPEC,
@@ -909,6 +1119,9 @@ enum {
        IFLA_HSR_SUPERVISION_ADDR,      /* Supervision frame multicast addr */
        IFLA_HSR_SEQ_NR,
        IFLA_HSR_VERSION,               /* HSR version */
+       IFLA_HSR_PROTOCOL,              /* Indicate different protocol than
+                                        * HSR. For example PRP.
+                                        */
        __IFLA_HSR_MAX,
 };
 
@@ -1033,6 +1246,8 @@ enum {
 #define RMNET_FLAGS_INGRESS_MAP_COMMANDS          (1U << 1)
 #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4           (1U << 2)
 #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4            (1U << 3)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5           (1U << 4)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5            (1U << 5)
 
 enum {
        IFLA_RMNET_UNSPEC,
@@ -1048,4 +1263,14 @@ struct ifla_rmnet_flags {
        __u32   mask;
 };
 
+/* MCTP section */
+
+enum {
+       IFLA_MCTP_UNSPEC,
+       IFLA_MCTP_NET,
+       __IFLA_MCTP_MAX,
+};
+
+#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1)
+
 #endif /* _UAPI_LINUX_IF_LINK_H */
index de45fcd..bb73e9a 100644 (file)
@@ -268,5 +268,8 @@ struct prctl_mm_map {
 # define PR_SCHED_CORE_SHARE_TO                2 /* push core_sched cookie to pid */
 # define PR_SCHED_CORE_SHARE_FROM      3 /* pull core_sched cookie to pid */
 # define PR_SCHED_CORE_MAX             4
+# define PR_SCHED_CORE_SCOPE_THREAD            0
+# define PR_SCHED_CORE_SCOPE_THREAD_GROUP      1
+# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP     2
 
 #endif /* _LINUX_PRCTL_H */
index 5859ca0..5fbb79e 100644 (file)
@@ -1002,7 +1002,7 @@ typedef int __bitwise snd_ctl_elem_iface_t;
 #define SNDRV_CTL_ELEM_ACCESS_WRITE            (1<<1)
 #define SNDRV_CTL_ELEM_ACCESS_READWRITE                (SNDRV_CTL_ELEM_ACCESS_READ|SNDRV_CTL_ELEM_ACCESS_WRITE)
 #define SNDRV_CTL_ELEM_ACCESS_VOLATILE         (1<<2)  /* control value may be changed without a notification */
-// (1 << 3) is unused.
+/* (1 << 3) is unused. */
 #define SNDRV_CTL_ELEM_ACCESS_TLV_READ         (1<<4)  /* TLV read is possible */
 #define SNDRV_CTL_ELEM_ACCESS_TLV_WRITE                (1<<5)  /* TLV write is possible */
 #define SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE    (SNDRV_CTL_ELEM_ACCESS_TLV_READ|SNDRV_CTL_ELEM_ACCESS_TLV_WRITE)
index d26e547..6f3df00 100644 (file)
@@ -45,8 +45,8 @@ struct bpf_gen {
        int nr_fd_array;
 };
 
-void bpf_gen__init(struct bpf_gen *gen, int log_level);
-int bpf_gen__finish(struct bpf_gen *gen);
+void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps);
+int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps);
 void bpf_gen__free(struct bpf_gen *gen);
 void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size);
 void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_params *map_attr, int map_idx);
index 502dea5..9934851 100644 (file)
@@ -18,7 +18,7 @@
 #define MAX_USED_MAPS  64
 #define MAX_USED_PROGS 32
 #define MAX_KFUNC_DESCS 256
-#define MAX_FD_ARRAY_SZ (MAX_USED_PROGS + MAX_KFUNC_DESCS)
+#define MAX_FD_ARRAY_SZ (MAX_USED_MAPS + MAX_KFUNC_DESCS)
 
 /* The following structure describes the stack layout of the loader program.
  * In addition R6 contains the pointer to context.
@@ -33,8 +33,8 @@
  */
 struct loader_stack {
        __u32 btf_fd;
-       __u32 prog_fd[MAX_USED_PROGS];
        __u32 inner_map_fd;
+       __u32 prog_fd[MAX_USED_PROGS];
 };
 
 #define stack_off(field) \
@@ -42,6 +42,11 @@ struct loader_stack {
 
 #define attr_field(attr, field) (attr + offsetof(union bpf_attr, field))
 
+static int blob_fd_array_off(struct bpf_gen *gen, int index)
+{
+       return gen->fd_array + index * sizeof(int);
+}
+
 static int realloc_insn_buf(struct bpf_gen *gen, __u32 size)
 {
        size_t off = gen->insn_cur - gen->insn_start;
@@ -102,11 +107,15 @@ static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn in
        emit(gen, insn2);
 }
 
-void bpf_gen__init(struct bpf_gen *gen, int log_level)
+static int add_data(struct bpf_gen *gen, const void *data, __u32 size);
+static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off);
+
+void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps)
 {
-       size_t stack_sz = sizeof(struct loader_stack);
+       size_t stack_sz = sizeof(struct loader_stack), nr_progs_sz;
        int i;
 
+       gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int));
        gen->log_level = log_level;
        /* save ctx pointer into R6 */
        emit(gen, BPF_MOV64_REG(BPF_REG_6, BPF_REG_1));
@@ -118,19 +127,27 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level)
        emit(gen, BPF_MOV64_IMM(BPF_REG_3, 0));
        emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel));
 
+       /* amount of stack actually used, only used to calculate iterations, not stack offset */
+       nr_progs_sz = offsetof(struct loader_stack, prog_fd[nr_progs]);
        /* jump over cleanup code */
        emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0,
-                             /* size of cleanup code below */
-                             (stack_sz / 4) * 3 + 2));
+                             /* size of cleanup code below (including map fd cleanup) */
+                             (nr_progs_sz / 4) * 3 + 2 +
+                             /* 6 insns for emit_sys_close_blob,
+                              * 6 insns for debug_regs in emit_sys_close_blob
+                              */
+                             nr_maps * (6 + (gen->log_level ? 6 : 0))));
 
        /* remember the label where all error branches will jump to */
        gen->cleanup_label = gen->insn_cur - gen->insn_start;
        /* emit cleanup code: close all temp FDs */
-       for (i = 0; i < stack_sz; i += 4) {
+       for (i = 0; i < nr_progs_sz; i += 4) {
                emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -stack_sz + i));
                emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, 1));
                emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close));
        }
+       for (i = 0; i < nr_maps; i++)
+               emit_sys_close_blob(gen, blob_fd_array_off(gen, i));
        /* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */
        emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
        emit(gen, BPF_EXIT_INSN());
@@ -160,8 +177,6 @@ static int add_data(struct bpf_gen *gen, const void *data, __u32 size)
  */
 static int add_map_fd(struct bpf_gen *gen)
 {
-       if (!gen->fd_array)
-               gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int));
        if (gen->nr_maps == MAX_USED_MAPS) {
                pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS);
                gen->error = -E2BIG;
@@ -174,8 +189,6 @@ static int add_kfunc_btf_fd(struct bpf_gen *gen)
 {
        int cur;
 
-       if (!gen->fd_array)
-               gen->fd_array = add_data(gen, NULL, MAX_FD_ARRAY_SZ * sizeof(int));
        if (gen->nr_fd_array == MAX_KFUNC_DESCS) {
                cur = add_data(gen, NULL, sizeof(int));
                return (cur - gen->fd_array) / sizeof(int);
@@ -183,11 +196,6 @@ static int add_kfunc_btf_fd(struct bpf_gen *gen)
        return MAX_USED_MAPS + gen->nr_fd_array++;
 }
 
-static int blob_fd_array_off(struct bpf_gen *gen, int index)
-{
-       return gen->fd_array + index * sizeof(int);
-}
-
 static int insn_bytes_to_bpf_size(__u32 sz)
 {
        switch (sz) {
@@ -359,10 +367,15 @@ static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off)
        __emit_sys_close(gen);
 }
 
-int bpf_gen__finish(struct bpf_gen *gen)
+int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
 {
        int i;
 
+       if (nr_progs != gen->nr_progs || nr_maps != gen->nr_maps) {
+               pr_warn("progs/maps mismatch\n");
+               gen->error = -EFAULT;
+               return gen->error;
+       }
        emit_sys_close_stack(gen, stack_off(btf_fd));
        for (i = 0; i < gen->nr_progs; i++)
                move_stack2ctx(gen,
index a1bea19..7c74342 100644 (file)
@@ -7258,7 +7258,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
        }
 
        if (obj->gen_loader)
-               bpf_gen__init(obj->gen_loader, attr->log_level);
+               bpf_gen__init(obj->gen_loader, attr->log_level, obj->nr_programs, obj->nr_maps);
 
        err = bpf_object__probe_loading(obj);
        err = err ? : bpf_object__load_vmlinux_btf(obj, false);
@@ -7277,7 +7277,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
                for (i = 0; i < obj->nr_maps; i++)
                        obj->maps[i].fd = -1;
                if (!err)
-                       err = bpf_gen__finish(obj->gen_loader);
+                       err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
        }
 
        /* clean up fd_array */
index add3990..2173582 100644 (file)
@@ -3310,6 +3310,9 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
        if (!insn->func)
                return false;
 
+       if (insn->func->static_call_tramp)
+               return true;
+
        /*
         * CONFIG_UBSAN_TRAP inserts a UD2 when it sees
         * __builtin_unreachable().  The BUG() macro has an unreachable() after
index 2d7df87..3cf7bac 100644 (file)
@@ -469,7 +469,7 @@ This option sets the time out limit. The default value is 500 ms.
 
 --switch-events::
 Record context switch events i.e. events of type PERF_RECORD_SWITCH or
-PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT or CoreSight)
+PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT, CoreSight or Arm SPE)
 switch events will be enabled automatically, which can be suppressed by
 by the option --no-switch-events.
 
index 0777748..80522bc 100644 (file)
@@ -516,17 +516,17 @@ kvm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/kvm_ioctl.sh
 $(kvm_ioctl_array): $(kvm_hdr_dir)/kvm.h $(kvm_ioctl_tbl)
        $(Q)$(SHELL) '$(kvm_ioctl_tbl)' $(kvm_hdr_dir) > $@
 
-socket_ipproto_array := $(beauty_outdir)/socket_ipproto_array.c
-socket_ipproto_tbl := $(srctree)/tools/perf/trace/beauty/socket_ipproto.sh
+socket_arrays := $(beauty_outdir)/socket.c
+socket_tbl := $(srctree)/tools/perf/trace/beauty/socket.sh
 
-$(socket_ipproto_array): $(linux_uapi_dir)/in.h $(socket_ipproto_tbl)
-       $(Q)$(SHELL) '$(socket_ipproto_tbl)' $(linux_uapi_dir) > $@
+$(socket_arrays): $(linux_uapi_dir)/in.h $(beauty_linux_dir)/socket.h $(socket_tbl)
+       $(Q)$(SHELL) '$(socket_tbl)' $(linux_uapi_dir) $(beauty_linux_dir) > $@
 
-socket_arrays := $(beauty_outdir)/socket_arrays.c
-socket_tbl := $(srctree)/tools/perf/trace/beauty/socket.sh
+sockaddr_arrays := $(beauty_outdir)/sockaddr.c
+sockaddr_tbl := $(srctree)/tools/perf/trace/beauty/sockaddr.sh
 
-$(socket_arrays): $(beauty_linux_dir)/socket.h $(socket_tbl)
-       $(Q)$(SHELL) '$(socket_tbl)' $(beauty_linux_dir) > $@
+$(sockaddr_arrays): $(beauty_linux_dir)/socket.h $(sockaddr_tbl)
+       $(Q)$(SHELL) '$(sockaddr_tbl)' $(beauty_linux_dir) > $@
 
 vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c
 vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux
@@ -736,8 +736,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
        $(sndrv_ctl_ioctl_array) \
        $(kcmp_type_array) \
        $(kvm_ioctl_array) \
-       $(socket_ipproto_array) \
        $(socket_arrays) \
+       $(sockaddr_arrays) \
        $(vhost_virtio_ioctl_array) \
        $(madvise_behavior_array) \
        $(mmap_flags_array) \
@@ -1113,8 +1113,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
                $(OUTPUT)$(sndrv_pcm_ioctl_array) \
                $(OUTPUT)$(kvm_ioctl_array) \
                $(OUTPUT)$(kcmp_type_array) \
-               $(OUTPUT)$(socket_ipproto_array) \
                $(OUTPUT)$(socket_arrays) \
+               $(OUTPUT)$(sockaddr_arrays) \
                $(OUTPUT)$(vhost_virtio_ioctl_array) \
                $(OUTPUT)$(perf_ioctl_array) \
                $(OUTPUT)$(prctl_option_array) \
index c625380..452b3d9 100644 (file)
@@ -2,6 +2,6 @@
 #ifndef ARCH_TESTS_H
 #define ARCH_TESTS_H
 
-extern struct test arch_tests[];
+extern struct test_suite *arch_tests[];
 
 #endif
index 6848101..6956111 100644 (file)
@@ -3,18 +3,10 @@
 #include "tests/tests.h"
 #include "arch-tests.h"
 
-struct test arch_tests[] = {
+struct test_suite *arch_tests[] = {
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
-       {
-               .desc = "DWARF unwind",
-               .func = test__dwarf_unwind,
-       },
+       &suite__dwarf_unwind,
 #endif
-       {
-               .desc = "Vectors page",
-               .func = test__vectors_page,
-       },
-       {
-               .func = NULL,
-       },
+       &suite__vectors_page,
+       NULL,
 };
index 7ffdd79..55a8358 100644 (file)
@@ -9,8 +9,7 @@
 
 #define VECTORS__MAP_NAME "[vectors]"
 
-int test__vectors_page(struct test *test __maybe_unused,
-                      int subtest __maybe_unused)
+static int test__vectors_page(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        void *start, *end;
 
@@ -22,3 +21,5 @@ int test__vectors_page(struct test *test __maybe_unused,
 
        return TEST_OK;
 }
+
+DEFINE_SUITE("Vectors page", vectors_page);
index c625380..452b3d9 100644 (file)
@@ -2,6 +2,6 @@
 #ifndef ARCH_TESTS_H
 #define ARCH_TESTS_H
 
-extern struct test arch_tests[];
+extern struct test_suite *arch_tests[];
 
 #endif
index 5b1543c..ad16b4f 100644 (file)
@@ -3,14 +3,9 @@
 #include "tests/tests.h"
 #include "arch-tests.h"
 
-struct test arch_tests[] = {
+struct test_suite *arch_tests[] = {
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
-       {
-               .desc = "DWARF unwind",
-               .func = test__dwarf_unwind,
-       },
+       &suite__dwarf_unwind,
 #endif
-       {
-               .func = NULL,
-       },
+       NULL,
 };
index a4420d4..2100d46 100644 (file)
@@ -23,6 +23,7 @@
 #include "../../../util/auxtrace.h"
 #include "../../../util/record.h"
 #include "../../../util/arm-spe.h"
+#include <tools/libc_compat.h> // reallocarray
 
 #define KiB(x) ((x) * 1024)
 #define MiB(x) ((x) * 1024 * 1024)
@@ -31,6 +32,8 @@ struct arm_spe_recording {
        struct auxtrace_record          itr;
        struct perf_pmu                 *arm_spe_pmu;
        struct evlist           *evlist;
+       int                     wrapped_cnt;
+       bool                    *wrapped;
 };
 
 static void arm_spe_set_timestamp(struct auxtrace_record *itr,
@@ -84,6 +87,55 @@ static int arm_spe_info_fill(struct auxtrace_record *itr,
        return 0;
 }
 
+static void
+arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts,
+                                          bool privileged)
+{
+       /*
+        * The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size nor
+        * snapshot size is specified, then the default is 4MiB for privileged users, 128KiB for
+        * unprivileged users.
+        *
+        * The default auxtrace mmap size is 4MiB/page_size for privileged users, 128KiB for
+        * unprivileged users. If an unprivileged user does not specify mmap pages, the mmap pages
+        * will be reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
+        * user is likely to get an error as they exceed their mlock limmit.
+        */
+
+       /*
+        * No size were given to '-S' or '-m,', so go with the default
+        */
+       if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
+               if (privileged) {
+                       opts->auxtrace_mmap_pages = MiB(4) / page_size;
+               } else {
+                       opts->auxtrace_mmap_pages = KiB(128) / page_size;
+                       if (opts->mmap_pages == UINT_MAX)
+                               opts->mmap_pages = KiB(256) / page_size;
+               }
+       } else if (!opts->auxtrace_mmap_pages && !privileged && opts->mmap_pages == UINT_MAX) {
+               opts->mmap_pages = KiB(256) / page_size;
+       }
+
+       /*
+        * '-m,xyz' was specified but no snapshot size, so make the snapshot size as big as the
+        * auxtrace mmap area.
+        */
+       if (!opts->auxtrace_snapshot_size)
+               opts->auxtrace_snapshot_size = opts->auxtrace_mmap_pages * (size_t)page_size;
+
+       /*
+        * '-Sxyz' was specified but no auxtrace mmap area, so make the auxtrace mmap area big
+        * enough to fit the requested snapshot size.
+        */
+       if (!opts->auxtrace_mmap_pages) {
+               size_t sz = opts->auxtrace_snapshot_size;
+
+               sz = round_up(sz, page_size) / page_size;
+               opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
+       }
+}
+
 static int arm_spe_recording_options(struct auxtrace_record *itr,
                                     struct evlist *evlist,
                                     struct record_opts *opts)
@@ -115,6 +167,36 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
        if (!opts->full_auxtrace)
                return 0;
 
+       /*
+        * we are in snapshot mode.
+        */
+       if (opts->auxtrace_snapshot_mode) {
+               /*
+                * Command arguments '-Sxyz' and/or '-m,xyz' are missing, so fill those in with
+                * default values.
+                */
+               if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages)
+                       arm_spe_snapshot_resolve_auxtrace_defaults(opts, privileged);
+
+               /*
+                * Snapshot size can't be bigger than the auxtrace area.
+                */
+               if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) {
+                       pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
+                              opts->auxtrace_snapshot_size,
+                              opts->auxtrace_mmap_pages * (size_t)page_size);
+                       return -EINVAL;
+               }
+
+               /*
+                * Something went wrong somewhere - this shouldn't happen.
+                */
+               if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
+                       pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
+                       return -EINVAL;
+               }
+       }
+
        /* We are in full trace mode but '-m,xyz' wasn't specified */
        if (!opts->auxtrace_mmap_pages) {
                if (privileged) {
@@ -138,6 +220,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
                }
        }
 
+       if (opts->auxtrace_snapshot_mode)
+               pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME,
+                         opts->auxtrace_snapshot_size);
 
        /*
         * To obtain the auxtrace buffer file descriptor, the auxtrace event
@@ -166,8 +251,199 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
        tracking_evsel->core.attr.sample_period = 1;
 
        /* In per-cpu case, always need the time of mmap events etc */
-       if (!perf_cpu_map__empty(cpus))
+       if (!perf_cpu_map__empty(cpus)) {
                evsel__set_sample_bit(tracking_evsel, TIME);
+               evsel__set_sample_bit(tracking_evsel, CPU);
+
+               /* also track task context switch */
+               if (!record_opts__no_switch_events(opts))
+                       tracking_evsel->core.attr.context_switch = 1;
+       }
+
+       return 0;
+}
+
+static int arm_spe_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
+                                        struct record_opts *opts,
+                                        const char *str)
+{
+       unsigned long long snapshot_size = 0;
+       char *endptr;
+
+       if (str) {
+               snapshot_size = strtoull(str, &endptr, 0);
+               if (*endptr || snapshot_size > SIZE_MAX)
+                       return -1;
+       }
+
+       opts->auxtrace_snapshot_mode = true;
+       opts->auxtrace_snapshot_size = snapshot_size;
+
+       return 0;
+}
+
+static int arm_spe_snapshot_start(struct auxtrace_record *itr)
+{
+       struct arm_spe_recording *ptr =
+                       container_of(itr, struct arm_spe_recording, itr);
+       struct evsel *evsel;
+
+       evlist__for_each_entry(ptr->evlist, evsel) {
+               if (evsel->core.attr.type == ptr->arm_spe_pmu->type)
+                       return evsel__disable(evsel);
+       }
+       return -EINVAL;
+}
+
+static int arm_spe_snapshot_finish(struct auxtrace_record *itr)
+{
+       struct arm_spe_recording *ptr =
+                       container_of(itr, struct arm_spe_recording, itr);
+       struct evsel *evsel;
+
+       evlist__for_each_entry(ptr->evlist, evsel) {
+               if (evsel->core.attr.type == ptr->arm_spe_pmu->type)
+                       return evsel__enable(evsel);
+       }
+       return -EINVAL;
+}
+
+static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx)
+{
+       bool *wrapped;
+       int cnt = ptr->wrapped_cnt, new_cnt, i;
+
+       /*
+        * No need to allocate, so return early.
+        */
+       if (idx < cnt)
+               return 0;
+
+       /*
+        * Make ptr->wrapped as big as idx.
+        */
+       new_cnt = idx + 1;
+
+       /*
+        * Free'ed in arm_spe_recording_free().
+        */
+       wrapped = reallocarray(ptr->wrapped, new_cnt, sizeof(bool));
+       if (!wrapped)
+               return -ENOMEM;
+
+       /*
+        * init new allocated values.
+        */
+       for (i = cnt; i < new_cnt; i++)
+               wrapped[i] = false;
+
+       ptr->wrapped_cnt = new_cnt;
+       ptr->wrapped = wrapped;
+
+       return 0;
+}
+
+static bool arm_spe_buffer_has_wrapped(unsigned char *buffer,
+                                     size_t buffer_size, u64 head)
+{
+       u64 i, watermark;
+       u64 *buf = (u64 *)buffer;
+       size_t buf_size = buffer_size;
+
+       /*
+        * Defensively handle the case where head might be continually increasing - if its value is
+        * equal or greater than the size of the ring buffer, then we can safely determine it has
+        * wrapped around. Otherwise, continue to detect if head might have wrapped.
+        */
+       if (head >= buffer_size)
+               return true;
+
+       /*
+        * We want to look the very last 512 byte (chosen arbitrarily) in the ring buffer.
+        */
+       watermark = buf_size - 512;
+
+       /*
+        * The value of head is somewhere within the size of the ring buffer. This can be that there
+        * hasn't been enough data to fill the ring buffer yet or the trace time was so long that
+        * head has numerically wrapped around.  To find we need to check if we have data at the
+        * very end of the ring buffer.  We can reliably do this because mmap'ed pages are zeroed
+        * out and there is a fresh mapping with every new session.
+        */
+
+       /*
+        * head is less than 512 byte from the end of the ring buffer.
+        */
+       if (head > watermark)
+               watermark = head;
+
+       /*
+        * Speed things up by using 64 bit transactions (see "u64 *buf" above)
+        */
+       watermark /= sizeof(u64);
+       buf_size /= sizeof(u64);
+
+       /*
+        * If we find trace data at the end of the ring buffer, head has been there and has
+        * numerically wrapped around at least once.
+        */
+       for (i = watermark; i < buf_size; i++)
+               if (buf[i])
+                       return true;
+
+       return false;
+}
+
+static int arm_spe_find_snapshot(struct auxtrace_record *itr, int idx,
+                                 struct auxtrace_mmap *mm, unsigned char *data,
+                                 u64 *head, u64 *old)
+{
+       int err;
+       bool wrapped;
+       struct arm_spe_recording *ptr =
+                       container_of(itr, struct arm_spe_recording, itr);
+
+       /*
+        * Allocate memory to keep track of wrapping if this is the first
+        * time we deal with this *mm.
+        */
+       if (idx >= ptr->wrapped_cnt) {
+               err = arm_spe_alloc_wrapped_array(ptr, idx);
+               if (err)
+                       return err;
+       }
+
+       /*
+        * Check to see if *head has wrapped around.  If it hasn't only the
+        * amount of data between *head and *old is snapshot'ed to avoid
+        * bloating the perf.data file with zeros.  But as soon as *head has
+        * wrapped around the entire size of the AUX ring buffer it taken.
+        */
+       wrapped = ptr->wrapped[idx];
+       if (!wrapped && arm_spe_buffer_has_wrapped(data, mm->len, *head)) {
+               wrapped = true;
+               ptr->wrapped[idx] = true;
+       }
+
+       pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
+                 __func__, idx, (size_t)*old, (size_t)*head, mm->len);
+
+       /*
+        * No wrap has occurred, we can just use *head and *old.
+        */
+       if (!wrapped)
+               return 0;
+
+       /*
+        * *head has wrapped around - adjust *head and *old to pickup the
+        * entire content of the AUX buffer.
+        */
+       if (*head >= mm->len) {
+               *old = *head - mm->len;
+       } else {
+               *head += mm->len;
+               *old = *head - mm->len;
+       }
 
        return 0;
 }
@@ -186,6 +462,7 @@ static void arm_spe_recording_free(struct auxtrace_record *itr)
        struct arm_spe_recording *sper =
                        container_of(itr, struct arm_spe_recording, itr);
 
+       free(sper->wrapped);
        free(sper);
 }
 
@@ -207,6 +484,10 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
 
        sper->arm_spe_pmu = arm_spe_pmu;
        sper->itr.pmu = arm_spe_pmu;
+       sper->itr.snapshot_start = arm_spe_snapshot_start;
+       sper->itr.snapshot_finish = arm_spe_snapshot_finish;
+       sper->itr.find_snapshot = arm_spe_find_snapshot;
+       sper->itr.parse_snapshot_options = arm_spe_parse_snapshot_options;
        sper->itr.recording_options = arm_spe_recording_options;
        sper->itr.info_priv_size = arm_spe_info_priv_size;
        sper->itr.info_fill = arm_spe_info_fill;
index c625380..452b3d9 100644 (file)
@@ -2,6 +2,6 @@
 #ifndef ARCH_TESTS_H
 #define ARCH_TESTS_H
 
-extern struct test arch_tests[];
+extern struct test_suite *arch_tests[];
 
 #endif
index 8c3fbd4..eb98c57 100644 (file)
@@ -3,14 +3,10 @@
 #include "tests/tests.h"
 #include "arch-tests.h"
 
-struct test arch_tests[] = {
+
+struct test_suite *arch_tests[] = {
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
-       {
-               .desc = "Test dwarf unwind",
-               .func = test__dwarf_unwind,
-       },
+       &suite__dwarf_unwind,
 #endif
-       {
-               .func = NULL,
-       },
+       NULL,
 };
index 18b5500..fe8f8dd 100644 (file)
 446    common  landlock_restrict_self  sys_landlock_restrict_self
 447    common  memfd_secret            sys_memfd_secret
 448    common  process_mrelease        sys_process_mrelease
+449    common  futex_waitv             sys_futex_waitv
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
index 9599e7a..6a1a1b3 100644 (file)
@@ -2,15 +2,15 @@
 #ifndef ARCH_TESTS_H
 #define ARCH_TESTS_H
 
-struct test;
+struct test_suite;
 
 /* Tests */
-int test__rdpmc(struct test *test, int subtest);
-int test__insn_x86(struct test *test, int subtest);
-int test__intel_pt_pkt_decoder(struct test *test, int subtest);
-int test__bp_modify(struct test *test, int subtest);
-int test__x86_sample_parsing(struct test *test, int subtest);
+int test__rdpmc(struct test_suite *test, int subtest);
+int test__insn_x86(struct test_suite *test, int subtest);
+int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest);
+int test__bp_modify(struct test_suite *test, int subtest);
+int test__x86_sample_parsing(struct test_suite *test, int subtest);
 
-extern struct test arch_tests[];
+extern struct test_suite *arch_tests[];
 
 #endif
index 71aa673..64fb73d 100644 (file)
@@ -3,39 +3,28 @@
 #include "tests/tests.h"
 #include "arch-tests.h"
 
-struct test arch_tests[] = {
-       {
-               .desc = "x86 rdpmc",
-               .func = test__rdpmc,
-       },
+DEFINE_SUITE("x86 rdpmc", rdpmc);
+#ifdef HAVE_AUXTRACE_SUPPORT
+DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86);
+DEFINE_SUITE("Intel PT packet decoder", intel_pt_pkt_decoder);
+#endif
+#if defined(__x86_64__)
+DEFINE_SUITE("x86 bp modify", bp_modify);
+#endif
+DEFINE_SUITE("x86 Sample parsing", x86_sample_parsing);
+
+struct test_suite *arch_tests[] = {
+       &suite__rdpmc,
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
-       {
-               .desc = "DWARF unwind",
-               .func = test__dwarf_unwind,
-       },
+       &suite__dwarf_unwind,
 #endif
 #ifdef HAVE_AUXTRACE_SUPPORT
-       {
-               .desc = "x86 instruction decoder - new instructions",
-               .func = test__insn_x86,
-       },
-       {
-               .desc = "Intel PT packet decoder",
-               .func = test__intel_pt_pkt_decoder,
-       },
+       &suite__insn_x86,
+       &suite__intel_pt_pkt_decoder,
 #endif
 #if defined(__x86_64__)
-       {
-               .desc = "x86 bp modify",
-               .func = test__bp_modify,
-       },
+       &suite__bp_modify,
 #endif
-       {
-               .desc = "x86 Sample parsing",
-               .func = test__x86_sample_parsing,
-       },
-       {
-               .func = NULL,
-       },
-
+       &suite__x86_sample_parsing,
+       NULL,
 };
index dffcf9b..0924ccd 100644 (file)
@@ -204,7 +204,7 @@ out:
        return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL;
 }
 
-int test__bp_modify(struct test *test __maybe_unused,
+int test__bp_modify(struct test_suite *test __maybe_unused,
                    int subtest __maybe_unused)
 {
        TEST_ASSERT_VAL("modify test 1 failed\n", !bp_modify1());
index 0262b0d..94b490c 100644 (file)
@@ -173,7 +173,7 @@ static int test_data_set(struct test_data *dat_set, int x86_64)
  * verbose (-v) option to see all the instructions and whether or not they
  * decoded successfully.
  */
-int test__insn_x86(struct test *test __maybe_unused, int subtest __maybe_unused)
+int test__insn_x86(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int ret = 0;
 
index 27dd8cf..cb5b2c6 100644 (file)
@@ -37,7 +37,7 @@ static pid_t spawn(void)
  * the last read counter value to avoid triggering a WARN_ON_ONCE() in
  * smp_call_function_many() caused by sending IPIs from NMI context.
  */
-int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest __maybe_unused)
+int test__intel_cqm_count_nmi_context(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct evlist *evlist = NULL;
        struct evsel *evsel = NULL;
index c933e3d..2fc882a 100644 (file)
@@ -289,7 +289,7 @@ static int test_one(struct test_data *d)
  * This test feeds byte sequences to the Intel PT packet decoder and checks the
  * results. Changes to the packet context are also checked.
  */
-int test__intel_pt_pkt_decoder(struct test *test __maybe_unused, int subtest __maybe_unused)
+int test__intel_pt_pkt_decoder(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct test_data *d = data;
        int ret;
index 1ea9166..498413a 100644 (file)
@@ -157,7 +157,7 @@ out_close:
        return 0;
 }
 
-int test__rdpmc(struct test *test __maybe_unused, int subtest __maybe_unused)
+int test__rdpmc(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int status = 0;
        int wret = 0;
index c92db87..bfbd366 100644 (file)
@@ -115,7 +115,7 @@ out_free:
  * For now, the PERF_SAMPLE_WEIGHT_STRUCT is the only X86 specific sample type.
  * The test only checks the PERF_SAMPLE_WEIGHT_STRUCT type.
  */
-int test__x86_sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused)
+int test__x86_sample_parsing(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        return do_test(PERF_SAMPLE_WEIGHT_STRUCT);
 }
index 5d1fe9c..137890f 100644 (file)
@@ -233,6 +233,7 @@ int bench_futex_lock_pi(int argc, const char **argv)
        print_summary();
 
        free(worker);
+       perf_cpu_map__put(cpu);
        return ret;
 err:
        usage_with_options(bench_futex_lock_pi_usage, options);
index 97fe31f..f7a5ffe 100644 (file)
@@ -294,6 +294,7 @@ int bench_futex_requeue(int argc, const char **argv)
        print_summary();
 
        free(worker);
+       perf_cpu_map__put(cpu);
        return ret;
 err:
        usage_with_options(bench_futex_requeue_usage, options);
index e970e6b..0983f40 100644 (file)
@@ -329,6 +329,7 @@ int bench_futex_wake_parallel(int argc, const char **argv)
        print_summary();
 
        free(blocked_worker);
+       perf_cpu_map__put(cpu);
        return ret;
 }
 #endif /* HAVE_PTHREAD_BARRIER */
index 77f058a..2226a47 100644 (file)
@@ -222,5 +222,6 @@ int bench_futex_wake(int argc, const char **argv)
        print_summary();
 
        free(worker);
+       perf_cpu_map__put(cpu);
        return ret;
 }
index 624ea12..0b52e08 100644 (file)
@@ -979,6 +979,8 @@ static struct syscall_fmt syscall_fmts[] = {
          .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
        { .name     = "getrlimit",
          .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
+       { .name     = "getsockopt",
+         .arg = { [1] = STRARRAY(level, socket_level), }, },
        { .name     = "gettid",     .errpid = true, },
        { .name     = "ioctl",
          .arg = {
@@ -1121,6 +1123,8 @@ static struct syscall_fmt syscall_fmts[] = {
          .arg = { [0] = STRARRAY(which, itimers), }, },
        { .name     = "setrlimit",
          .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
+       { .name     = "setsockopt",
+         .arg = { [1] = STRARRAY(level, socket_level), }, },
        { .name     = "socket",
          .arg = { [0] = STRARRAY(family, socket_families),
                   [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
index a42fab3..aa8cfea 100644 (file)
@@ -106,6 +106,9 @@ enum perf_hw_id {
        PERF_COUNT_HW_BRANCH_INSTRUCTIONS       = 4,
        PERF_COUNT_HW_BRANCH_MISSES             = 5,
        PERF_COUNT_HW_BUS_CYCLES                = 6,
+       PERF_COUNT_HW_STALLED_CYCLES_FRONTEND   = 7,
+       PERF_COUNT_HW_STALLED_CYCLES_BACKEND    = 8,
+       PERF_COUNT_HW_REF_CPU_CYCLES            = 9,
 };
 
 These are standardized types of events that work relatively uniformly
diff --git a/tools/perf/pmu-events/arch/powerpc/power10/metrics.json b/tools/perf/pmu-events/arch/powerpc/power10/metrics.json
new file mode 100644 (file)
index 0000000..b57526f
--- /dev/null
@@ -0,0 +1,676 @@
+[
+    {
+        "BriefDescription": "Percentage of cycles that are run cycles",
+        "MetricExpr": "PM_RUN_CYC / PM_CYC * 100",
+        "MetricGroup": "General",
+        "MetricName": "RUN_CYCLES_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction",
+        "MetricExpr": "PM_CYC / PM_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "CYCLES_PER_INSTRUCTION"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled for any reason",
+        "MetricExpr": "PM_DISP_STALL_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because there was a flush",
+        "MetricExpr": "PM_DISP_STALL_FLUSH / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_FLUSH_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because the MMU was handling a translation miss",
+        "MetricExpr": "PM_DISP_STALL_TRANSLATION / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_TRANSLATION_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled waiting to resolve an instruction ERAT miss",
+        "MetricExpr": "PM_DISP_STALL_IERAT_ONLY_MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_IERAT_ONLY_MISS_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled waiting to resolve an instruction TLB miss",
+        "MetricExpr": "PM_DISP_STALL_ITLB_MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_ITLB_MISS_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to an icache miss",
+        "MetricExpr": "PM_DISP_STALL_IC_MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_IC_MISS_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from the local L2",
+        "MetricExpr": "PM_DISP_STALL_IC_L2 / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_IC_L2_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from the local L3",
+        "MetricExpr": "PM_DISP_STALL_IC_L3 / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_IC_L3_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while the instruction was fetched from any source beyond the local L3",
+        "MetricExpr": "PM_DISP_STALL_IC_L3MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_IC_L3MISS_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to an icache miss after a branch mispredict",
+        "MetricExpr": "PM_DISP_STALL_BR_MPRED_ICMISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_BR_MPRED_ICMISS_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from the local L2 after suffering a branch mispredict",
+        "MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L2 / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_BR_MPRED_IC_L2_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from the local L3 after suffering a branch mispredict",
+        "MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L3 / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_BR_MPRED_IC_L3_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled while instruction was fetched from any source beyond the local L3 after suffering a branch mispredict",
+        "MetricExpr": "PM_DISP_STALL_BR_MPRED_IC_L3MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_BR_MPRED_IC_L3MISS_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled due to a branch mispredict",
+        "MetricExpr": "PM_DISP_STALL_BR_MPRED / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_BR_MPRED_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch for any reason",
+        "MetricExpr": "PM_DISP_STALL_HELD_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_HELD_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because of a synchronizing instruction that requires the ICT to be empty before dispatch",
+        "MetricExpr": "PM_DISP_STALL_HELD_SYNC_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISP_HELD_STALL_SYNC_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch while waiting on the scoreboard",
+        "MetricExpr": "PM_DISP_STALL_HELD_SCOREBOARD_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISP_HELD_STALL_SCOREBOARD_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch due to issue queue full",
+        "MetricExpr": "PM_DISP_STALL_HELD_ISSQ_FULL_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISP_HELD_STALL_ISSQ_FULL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the mapper/SRB was full",
+        "MetricExpr": "PM_DISP_STALL_HELD_RENAME_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_HELD_RENAME_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the STF mapper/SRB was full",
+        "MetricExpr": "PM_DISP_STALL_HELD_STF_MAPPER_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_HELD_STF_MAPPER_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because the XVFC mapper/SRB was full",
+        "MetricExpr": "PM_DISP_STALL_HELD_XVFC_MAPPER_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_HELD_XVFC_MAPPER_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch for any other reason",
+        "MetricExpr": "PM_DISP_STALL_HELD_OTHER_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_HELD_OTHER_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction has been dispatched but not issued for any reason",
+        "MetricExpr": "PM_ISSUE_STALL / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "ISSUE_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting to be finished in one of the execution units",
+        "MetricExpr": "PM_EXEC_STALL / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "EXECUTION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction spent executing an NTC instruction that gets flushed some time after dispatch",
+        "MetricExpr": "PM_EXEC_STALL_NTC_FLUSH / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "NTC_FLUSH_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTF instruction finishes at dispatch",
+        "MetricExpr": "PM_EXEC_STALL_FIN_AT_DISP / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "FIN_AT_DISP_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is executing in the branch unit",
+        "MetricExpr": "PM_EXEC_STALL_BRU / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "BRU_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a simple fixed point instruction that is executing in the LSU",
+        "MetricExpr": "PM_EXEC_STALL_SIMPLE_FX / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "SIMPLE_FX_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is executing in the VSU",
+        "MetricExpr": "PM_EXEC_STALL_VSU / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "VSU_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting to be finished in one of the execution units",
+        "MetricExpr": "PM_EXEC_STALL_TRANSLATION / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "TRANSLATION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a load or store that suffered a translation miss",
+        "MetricExpr": "PM_EXEC_STALL_DERAT_ONLY_MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DERAT_ONLY_MISS_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is recovering from a TLB miss",
+        "MetricExpr": "PM_EXEC_STALL_DERAT_DTLB_MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DERAT_DTLB_MISS_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is executing in the LSU",
+        "MetricExpr": "PM_EXEC_STALL_LSU / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "LSU_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a load that is executing in the LSU",
+        "MetricExpr": "PM_EXEC_STALL_LOAD / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "LOAD_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from either the local L2 or local L3",
+        "MetricExpr": "PM_EXEC_STALL_DMISS_L2L3 / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DMISS_L2L3_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from either the local L2 or local L3, with an RC dispatch conflict",
+        "MetricExpr": "PM_EXEC_STALL_DMISS_L2L3_CONFLICT / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DMISS_L2L3_CONFLICT_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from either the local L2 or local L3, without an RC dispatch conflict",
+        "MetricExpr": "PM_EXEC_STALL_DMISS_L2L3_NOCONFLICT / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DMISS_L2L3_NOCONFLICT_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from a source beyond the local L2 and local L3",
+        "MetricExpr": "PM_EXEC_STALL_DMISS_L3MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DMISS_L3MISS_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from a neighbor chiplet's L2 or L3 in the same chip",
+        "MetricExpr": "PM_EXEC_STALL_DMISS_L21_L31 / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DMISS_L21_L31_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from L4, local memory or OpenCAPI chip",
+        "MetricExpr": "PM_EXEC_STALL_DMISS_LMEM / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DMISS_LMEM_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from a remote chip (cache, L4, memory or OpenCAPI) in the same group",
+        "MetricExpr": "PM_EXEC_STALL_DMISS_OFF_CHIP / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DMISS_OFF_CHIP_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is waiting for a load miss to resolve from a distant chip (cache, L4, memory or OpenCAPI chip)",
+        "MetricExpr": "PM_EXEC_STALL_DMISS_OFF_NODE / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DMISS_OFF_NODE_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is executing a TLBIEL instruction",
+        "MetricExpr": "PM_EXEC_STALL_TLBIEL / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "TLBIEL_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is finishing a load after its data has been reloaded from a data source beyond the local L1, OR when the LSU is processing an L1-hit, OR when the NTF instruction merged with another load in the LMQ",
+        "MetricExpr": "PM_EXEC_STALL_LOAD_FINISH / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "LOAD_FINISH_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a store that is executing in the LSU",
+        "MetricExpr": "PM_EXEC_STALL_STORE / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "STORE_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is in the store unit outside of handling store misses or other special store operations",
+        "MetricExpr": "PM_EXEC_STALL_STORE_PIPE / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "STORE_PIPE_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a store whose cache line was not resident in the L1 and had to wait for allocation of the missing line into the L1",
+        "MetricExpr": "PM_EXEC_STALL_STORE_MISS / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "STORE_MISS_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a TLBIE instruction waiting for a response from the L2",
+        "MetricExpr": "PM_EXEC_STALL_TLBIE / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "TLBIE_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is executing a PTESYNC instruction",
+        "MetricExpr": "PM_EXEC_STALL_PTESYNC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "PTESYNC_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction cannot complete because the thread was blocked",
+        "MetricExpr": "PM_CMPL_STALL / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "COMPLETION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction cannot complete because it was interrupted by ANY exception",
+        "MetricExpr": "PM_CMPL_STALL_EXCEPTION / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "EXCEPTION_COMPLETION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is stuck at finish waiting for the non-speculative finish of either a STCX instruction waiting for its result or a load waiting for non-critical sectors of data and ECC",
+        "MetricExpr": "PM_CMPL_STALL_MEM_ECC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "MEM_ECC_COMPLETION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a STCX instruction waiting for resolution from the nest",
+        "MetricExpr": "PM_CMPL_STALL_STCX / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "STCX_COMPLETION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a LWSYNC instruction waiting to complete",
+        "MetricExpr": "PM_CMPL_STALL_LWSYNC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "LWSYNC_COMPLETION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction is a HWSYNC instruction stuck at finish waiting for a response from the L2",
+        "MetricExpr": "PM_CMPL_STALL_HWSYNC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "HWSYNC_COMPLETION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction required special handling before completion",
+        "MetricExpr": "PM_CMPL_STALL_SPECIAL / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "SPECIAL_COMPLETION_STALL_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when dispatch was stalled because fetch was being held, so there was nothing in the pipeline for this thread",
+        "MetricExpr": "PM_DISP_STALL_FETCH / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_FETCH_CPI"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTC instruction was held at dispatch because of power management",
+        "MetricExpr": "PM_DISP_STALL_HELD_HALT_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "CPI",
+        "MetricName": "DISPATCHED_HELD_HALT_CPI"
+    },
+    {
+        "BriefDescription": "Percentage of flushes per completed instruction",
+        "MetricExpr": "PM_FLUSH / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Others",
+        "MetricName": "FLUSH_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of flushes due to a branch mispredict per completed instruction",
+        "MetricExpr": "PM_FLUSH_MPRED / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Others",
+        "MetricName": "BR_MPRED_FLUSH_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of branch mispredictions per completed instruction",
+        "MetricExpr": "PM_BR_MPRED_CMPL / PM_RUN_INST_CMPL",
+        "MetricGroup": "Others",
+        "MetricName": "BRANCH_MISPREDICTION_RATE"
+    },
+    {
+        "BriefDescription": "Percentage of finished loads that missed in the L1",
+        "MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1 * 100",
+        "MetricGroup": "Others",
+        "MetricName": "L1_LD_MISS_RATIO",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of completed instructions that were loads that missed the L1",
+        "MetricExpr": "PM_LD_MISS_L1 / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Others",
+        "MetricName": "L1_LD_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of completed instructions when the DPTEG required for the load/store instruction in execution was missing from the TLB",
+        "MetricExpr": "PM_DTLB_MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Others",
+        "MetricName": "DTLB_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Average number of completed instructions dispatched per instruction completed",
+        "MetricExpr": "PM_INST_DISP / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "DISPATCH_PER_INST_CMPL"
+    },
+    {
+        "BriefDescription": "Percentage of completed instructions that were a demand load that did not hit in the L1 or L2",
+        "MetricExpr": "PM_DATA_FROM_L2MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "General",
+        "MetricName": "L2_LD_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of completed instructions that were demand fetches that missed the L1 icache",
+        "MetricExpr": "PM_L1_ICACHE_MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Instruction_Misses",
+        "MetricName": "L1_INST_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of completed instructions that were demand fetches that reloaded from beyond the L3 icache",
+        "MetricExpr": "PM_INST_FROM_L3MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "General",
+        "MetricName": "L3_INST_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Average number of completed instructions per cycle",
+        "MetricExpr": "PM_INST_CMPL / PM_CYC",
+        "MetricGroup": "General",
+        "MetricName": "IPC"
+    },
+    {
+        "BriefDescription": "Average number of cycles per completed instruction group",
+        "MetricExpr": "PM_CYC / PM_1PLUS_PPC_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "CYCLES_PER_COMPLETED_INSTRUCTIONS_SET"
+    },
+    {
+        "BriefDescription": "Percentage of cycles when at least 1 instruction dispatched",
+        "MetricExpr": "PM_1PLUS_PPC_DISP / PM_RUN_CYC * 100",
+        "MetricGroup": "General",
+        "MetricName": "CYCLES_ATLEAST_ONE_INST_DISPATCHED",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Average number of finished loads per completed instruction",
+        "MetricExpr": "PM_LD_REF_L1 / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "LOADS_PER_INST"
+    },
+    {
+        "BriefDescription": "Average number of finished stores per completed instruction",
+        "MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "STORES_PER_INST"
+    },
+    {
+        "BriefDescription": "Percentage of demand loads that reloaded from beyond the L2 per completed instruction",
+        "MetricExpr": "PM_DATA_FROM_L2MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "dL1_Reloads",
+        "MetricName": "DL1_RELOAD_FROM_L2_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of demand loads that reloaded from beyond the L3 per completed instruction",
+        "MetricExpr": "PM_DATA_FROM_L3MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "dL1_Reloads",
+        "MetricName": "DL1_RELOAD_FROM_L3_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of DERAT misses with 4k page size per completed instruction",
+        "MetricExpr": "PM_DERAT_MISS_4K / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_4K_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of DERAT misses with 64k page size per completed instruction",
+        "MetricExpr": "PM_DERAT_MISS_64K / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_64K_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Average number of run cycles per completed instruction",
+        "MetricExpr": "PM_RUN_CYC / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "RUN_CPI"
+    },
+    {
+        "BriefDescription": "Percentage of DERAT misses per completed instruction",
+        "MetricExpr": "PM_DERAT_MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Average number of completed instructions per run cycle",
+        "MetricExpr": "PM_RUN_INST_CMPL / PM_RUN_CYC",
+        "MetricGroup": "General",
+        "MetricName": "RUN_IPC"
+    },
+    {
+        "BriefDescription": "Average number of completed instructions per instruction group",
+        "MetricExpr": "PM_RUN_INST_CMPL / PM_1PLUS_PPC_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "AVERAGE_COMPLETED_INSTRUCTION_SET_SIZE"
+    },
+    {
+        "BriefDescription": "Average number of finished instructions per completed instructions",
+        "MetricExpr": "PM_INST_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "INST_FIN_PER_CMPL"
+    },
+    {
+        "BriefDescription": "Average cycles per completed instruction when the NTF instruction is completing and the finish was overlooked",
+        "MetricExpr": "PM_EXEC_STALL_UNKNOWN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "EXEC_STALL_UNKOWN_CPI"
+    },
+    {
+        "BriefDescription": "Percentage of finished branches that were taken",
+        "MetricExpr": "PM_BR_TAKEN_CMPL / PM_BR_FIN * 100",
+        "MetricGroup": "General",
+        "MetricName": "TAKEN_BRANCHES",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of completed instructions that were a demand load that did not hit in the L1, L2, or the L3",
+        "MetricExpr": "PM_DATA_FROM_L3MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "General",
+        "MetricName": "L3_LD_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Average number of finished branches per completed instruction",
+        "MetricExpr": "PM_BR_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "BRANCHES_PER_INST"
+    },
+    {
+        "BriefDescription": "Average number of instructions finished in the LSU per completed instruction",
+        "MetricExpr": "PM_LSU_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "LSU_PER_INST"
+    },
+    {
+        "BriefDescription": "Average number of instructions finished in the VSU per completed instruction",
+        "MetricExpr": "PM_VSU_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "VSU_PER_INST"
+    },
+    {
+        "BriefDescription": "Average number of TLBIE instructions finished in the LSU per completed instruction",
+        "MetricExpr": "PM_TLBIE_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "TLBIE_PER_INST"
+    },
+    {
+        "BriefDescription": "Average number of STCX instructions finshed per completed instruction",
+        "MetricExpr": "PM_STCX_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "STXC_PER_INST"
+    },
+    {
+        "BriefDescription": "Average number of LARX instructions finshed per completed instruction",
+        "MetricExpr": "PM_LARX_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "LARX_PER_INST"
+    },
+    {
+        "BriefDescription": "Average number of PTESYNC instructions finshed per completed instruction",
+        "MetricExpr": "PM_PTESYNC_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "PTESYNC_PER_INST"
+    },
+    {
+        "BriefDescription": "Average number of simple fixed-point instructions finshed in the store unit per completed instruction",
+        "MetricExpr": "PM_FX_LSU_FIN / PM_RUN_INST_CMPL",
+        "MetricGroup": "General",
+        "MetricName": "FX_PER_INST"
+    },
+    {
+        "BriefDescription": "Percentage of demand load misses that reloaded the L1 cache",
+        "MetricExpr": "PM_LD_DEMAND_MISS_L1 / PM_LD_MISS_L1 * 100",
+        "MetricGroup": "General",
+        "MetricName": "DL1_MISS_RELOADS",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of demand load misses that reloaded from beyond the local L2",
+        "MetricExpr": "PM_DATA_FROM_L2MISS / PM_LD_DEMAND_MISS_L1 * 100",
+        "MetricGroup": "dL1_Reloads",
+        "MetricName": "DL1_RELOAD_FROM_L2_MISS",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of demand load misses that reloaded from beyond the local L3",
+        "MetricExpr": "PM_DATA_FROM_L3MISS / PM_LD_DEMAND_MISS_L1 * 100",
+        "MetricGroup": "dL1_Reloads",
+        "MetricName": "DL1_RELOAD_FROM_L3_MISS",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of cycles stalled due to the NTC instruction waiting for a load miss to resolve from a source beyond the local L2 and local L3",
+        "MetricExpr": "DMISS_L3MISS_STALL_CPI / RUN_CPI * 100",
+        "MetricGroup": "General",
+        "MetricName": "DCACHE_MISS_CPI",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of DERAT misses with 2M page size per completed instruction",
+        "MetricExpr": "PM_DERAT_MISS_2M / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_2M_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of DERAT misses with 16M page size per completed instruction",
+        "MetricExpr": "PM_DERAT_MISS_16M / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_16M_MISS_RATE",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "DERAT miss ratio for 4K page size",
+        "MetricExpr": "PM_DERAT_MISS_4K / PM_DERAT_MISS",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_4K_MISS_RATIO"
+    },
+    {
+        "BriefDescription": "DERAT miss ratio for 2M page size",
+        "MetricExpr": "PM_DERAT_MISS_2M / PM_DERAT_MISS",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_2M_MISS_RATIO"
+    },
+    {
+        "BriefDescription": "DERAT miss ratio for 16M page size",
+        "MetricExpr": "PM_DERAT_MISS_16M / PM_DERAT_MISS",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_16M_MISS_RATIO"
+    },
+    {
+        "BriefDescription": "DERAT miss ratio for 64K page size",
+        "MetricExpr": "PM_DERAT_MISS_64K / PM_DERAT_MISS",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_64K_MISS_RATIO"
+    },
+    {
+        "BriefDescription": "Percentage of DERAT misses that resulted in TLB reloads",
+        "MetricExpr": "PM_DTLB_MISS / PM_DERAT_MISS * 100",
+        "MetricGroup": "Translation",
+        "MetricName": "DERAT_MISS_RELOAD",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of icache misses that were reloaded from beyond the local L3",
+        "MetricExpr": "PM_INST_FROM_L3MISS / PM_L1_ICACHE_MISS * 100",
+        "MetricGroup": "Instruction_Misses",
+        "MetricName": "INST_FROM_L3_MISS",
+        "ScaleUnit": "1%"
+    },
+    {
+        "BriefDescription": "Percentage of icache reloads from the beyond the L3 per completed instruction",
+        "MetricExpr": "PM_INST_FROM_L3MISS / PM_RUN_INST_CMPL * 100",
+        "MetricGroup": "Instruction_Misses",
+        "MetricName": "INST_FROM_L3_MISS_RATE",
+        "ScaleUnit": "1%"
+    }
+]
index 2ada86a..e91cf2c 100644 (file)
@@ -289,8 +289,8 @@ static int test_get_dec(void)
        return ret;
 }
 
-int test__api_io(struct test *test __maybe_unused,
-               int subtest __maybe_unused)
+static int test__api_io(struct test_suite *test __maybe_unused,
+                       int subtest __maybe_unused)
 {
        int ret = 0;
 
@@ -302,3 +302,5 @@ int test__api_io(struct test *test __maybe_unused,
                ret = TEST_FAIL;
        return ret;
 }
+
+DEFINE_SUITE("Test api io", api_io);
index 9b40a25..0f73e30 100644 (file)
@@ -178,7 +178,7 @@ static int run_dir(const char *d, const char *perf)
        return system(cmd) ? TEST_FAIL : TEST_OK;
 }
 
-int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__attr(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct stat st;
        char path_perf[PATH_MAX];
@@ -207,3 +207,5 @@ int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused)
 
        return TEST_SKIP;
 }
+
+DEFINE_SUITE("Setup struct perf_event_attr", attr);
index 7447a44..79a980b 100644 (file)
@@ -82,7 +82,7 @@ static int do_test(struct evlist *evlist, int mmap_pages,
 }
 
 
-int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__backward_ring_buffer(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int ret = TEST_SKIP, err, sample_count = 0, comm_count = 0;
        char pid[16], sbuf[STRERR_BUFSIZE];
@@ -167,3 +167,5 @@ out_delete_evlist:
        evlist__delete(evlist);
        return ret;
 }
+
+DEFINE_SUITE("Read backward ring buffer", backward_ring_buffer);
index 12b805e..3848563 100644 (file)
@@ -40,7 +40,7 @@ static int test_bitmap(const char *str)
        return ret;
 }
 
-int test__bitmap_print(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__bitmap_print(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        TEST_ASSERT_VAL("failed to convert map", test_bitmap("1"));
        TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,5"));
@@ -51,3 +51,5 @@ int test__bitmap_print(struct test *test __maybe_unused, int subtest __maybe_unu
        TEST_ASSERT_VAL("failed to convert map", test_bitmap("1-10,12-20,22-30,32-40"));
        return 0;
 }
+
+DEFINE_SUITE("Print bitmap", bitmap_print);
index 489b506..d1ebb55 100644 (file)
 #include "../perf-sys.h"
 #include "cloexec.h"
 
+/*
+ * PowerPC and S390 do not support creation of instruction breakpoints using the
+ * perf_event interface.
+ *
+ * Just disable the test for these architectures until these issues are
+ * resolved.
+ */
+#if defined(__powerpc__) || defined(__s390x__)
+#define BP_ACCOUNT_IS_SUPPORTED 0
+#else
+#define BP_ACCOUNT_IS_SUPPORTED 1
+#endif
+
 static volatile long the_var;
 
 static noinline int test_function(void)
@@ -173,13 +186,18 @@ static int detect_share(int wp_cnt, int bp_cnt)
  *     we create another watchpoint to ensure
  *     the slot accounting is correct
  */
-int test__bp_accounting(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__bp_accounting(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int has_ioctl = detect_ioctl();
        int wp_cnt = detect_cnt(false);
        int bp_cnt = detect_cnt(true);
        int share  = detect_share(wp_cnt, bp_cnt);
 
+       if (!BP_ACCOUNT_IS_SUPPORTED) {
+               pr_debug("Test not supported on this architecture");
+               return TEST_SKIP;
+       }
+
        pr_debug("watchpoints count %d, breakpoints count %d, has_ioctl %d, share %d\n",
                 wp_cnt, bp_cnt, has_ioctl, share);
 
@@ -189,18 +207,4 @@ int test__bp_accounting(struct test *test __maybe_unused, int subtest __maybe_un
        return bp_accounting(wp_cnt, share);
 }
 
-bool test__bp_account_is_supported(void)
-{
-       /*
-        * PowerPC and S390 do not support creation of instruction
-        * breakpoints using the perf_event interface.
-        *
-        * Just disable the test for these architectures until these
-        * issues are resolved.
-        */
-#if defined(__powerpc__) || defined(__s390x__)
-       return false;
-#else
-       return true;
-#endif
-}
+DEFINE_SUITE("Breakpoint accounting", bp_accounting);
index ef37353..1f2908f 100644 (file)
@@ -161,11 +161,16 @@ static long long bp_count(int fd)
        return count;
 }
 
-int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__bp_signal(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct sigaction sa;
        long long count1, count2, count3;
 
+       if (!BP_SIGNAL_IS_SUPPORTED) {
+               pr_debug("Test not supported on this architecture");
+               return TEST_SKIP;
+       }
+
        /* setup SIGIO signal handler */
        memset(&sa, 0, sizeof(struct sigaction));
        sa.sa_sigaction = (void *) sig_handler;
@@ -285,29 +290,4 @@ int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused
                TEST_OK : TEST_FAIL;
 }
 
-bool test__bp_signal_is_supported(void)
-{
-       /*
-        * PowerPC and S390 do not support creation of instruction
-        * breakpoints using the perf_event interface.
-        *
-        * ARM requires explicit rounding down of the instruction
-        * pointer in Thumb mode, and then requires the single-step
-        * to be handled explicitly in the overflow handler to avoid
-        * stepping into the SIGIO handler and getting stuck on the
-        * breakpointed instruction.
-        *
-        * Since arm64 has the same issue with arm for the single-step
-        * handling, this case also gets stuck on the breakpointed
-        * instruction.
-        *
-        * Just disable the test for these architectures until these
-        * issues are resolved.
-        */
-#if defined(__powerpc__) || defined(__s390x__) || defined(__arm__) || \
-    defined(__aarch64__)
-       return false;
-#else
-       return true;
-#endif
-}
+DEFINE_SUITE("Breakpoint overflow signal handler", bp_signal);
index eb4dbbd..4e897c2 100644 (file)
@@ -59,13 +59,18 @@ static long long bp_count(int fd)
 #define EXECUTIONS 10000
 #define THRESHOLD  100
 
-int test__bp_signal_overflow(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__bp_signal_overflow(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct perf_event_attr pe;
        struct sigaction sa;
        long long count;
        int fd, i, fails = 0;
 
+       if (!BP_SIGNAL_IS_SUPPORTED) {
+               pr_debug("Test not supported on this architecture");
+               return TEST_SKIP;
+       }
+
        /* setup SIGIO signal handler */
        memset(&sa, 0, sizeof(struct sigaction));
        sa.sa_sigaction = (void *) sig_handler;
@@ -133,3 +138,5 @@ int test__bp_signal_overflow(struct test *test __maybe_unused, int subtest __may
 
        return fails ? TEST_FAIL : TEST_OK;
 }
+
+DEFINE_SUITE("Breakpoint overflow sampling", bp_signal_overflow);
index 2bf146e..329f77f 100644 (file)
@@ -62,7 +62,6 @@ static int llseek_loop(void)
 
 static struct {
        enum test_llvm__testcase prog_id;
-       const char *desc;
        const char *name;
        const char *msg_compile_fail;
        const char *msg_load_fail;
@@ -72,7 +71,6 @@ static struct {
 } bpf_testcase_table[] = {
        {
                .prog_id          = LLVM_TESTCASE_BASE,
-               .desc             = "Basic BPF filtering",
                .name             = "[basic_bpf_test]",
                .msg_compile_fail = "fix 'perf test LLVM' first",
                .msg_load_fail    = "load bpf object failed",
@@ -81,7 +79,6 @@ static struct {
        },
        {
                .prog_id          = LLVM_TESTCASE_BASE,
-               .desc             = "BPF pinning",
                .name             = "[bpf_pinning]",
                .msg_compile_fail = "fix kbuild first",
                .msg_load_fail    = "check your vmlinux setting?",
@@ -92,7 +89,6 @@ static struct {
 #ifdef HAVE_BPF_PROLOGUE
        {
                .prog_id          = LLVM_TESTCASE_BPF_PROLOGUE,
-               .desc             = "BPF prologue generation",
                .name             = "[bpf_prologue_test]",
                .msg_compile_fail = "fix kbuild first",
                .msg_load_fail    = "check your vmlinux setting?",
@@ -283,18 +279,6 @@ out:
        return ret;
 }
 
-int test__bpf_subtest_get_nr(void)
-{
-       return (int)ARRAY_SIZE(bpf_testcase_table);
-}
-
-const char *test__bpf_subtest_get_desc(int i)
-{
-       if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
-               return NULL;
-       return bpf_testcase_table[i].desc;
-}
-
 static int check_env(void)
 {
        int err;
@@ -313,7 +297,7 @@ static int check_env(void)
        }
 
        err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns,
-                              sizeof(insns) / sizeof(insns[0]),
+                              ARRAY_SIZE(insns),
                               license, kver_int, NULL, 0);
        if (err < 0) {
                pr_err("Missing basic BPF support, skip this test: %s\n",
@@ -325,7 +309,7 @@ static int check_env(void)
        return 0;
 }
 
-int test__bpf(struct test *test __maybe_unused, int i)
+static int test__bpf(int i)
 {
        int err;
 
@@ -343,21 +327,60 @@ int test__bpf(struct test *test __maybe_unused, int i)
        err = __test__bpf(i);
        return err;
 }
+#endif
 
-#else
-int test__bpf_subtest_get_nr(void)
+static int test__basic_bpf_test(struct test_suite *test __maybe_unused,
+                               int subtest __maybe_unused)
 {
-       return 0;
+#ifdef HAVE_LIBBPF_SUPPORT
+       return test__bpf(0);
+#else
+       pr_debug("Skip BPF test because BPF support is not compiled\n");
+       return TEST_SKIP;
+#endif
 }
 
-const char *test__bpf_subtest_get_desc(int i __maybe_unused)
+static int test__bpf_pinning(struct test_suite *test __maybe_unused,
+                            int subtest __maybe_unused)
 {
-       return NULL;
+#ifdef HAVE_LIBBPF_SUPPORT
+       return test__bpf(1);
+#else
+       pr_debug("Skip BPF test because BPF support is not compiled\n");
+       return TEST_SKIP;
+#endif
 }
 
-int test__bpf(struct test *test __maybe_unused, int i __maybe_unused)
+static int test__bpf_prologue_test(struct test_suite *test __maybe_unused,
+                                  int subtest __maybe_unused)
 {
+#if defined(HAVE_LIBBPF_SUPPORT) && defined(HAVE_BPF_PROLOGUE)
+       return test__bpf(2);
+#else
        pr_debug("Skip BPF test because BPF support is not compiled\n");
        return TEST_SKIP;
+#endif
 }
+
+
+static struct test_case bpf_tests[] = {
+#ifdef HAVE_LIBBPF_SUPPORT
+       TEST_CASE("Basic BPF filtering", basic_bpf_test),
+       TEST_CASE("BPF pinning", bpf_pinning),
+#ifdef HAVE_BPF_PROLOGUE
+       TEST_CASE("BPF prologue generation", bpf_prologue_test),
+#else
+       TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"),
+#endif
+#else
+       TEST_CASE_REASON("Basic BPF filtering", basic_bpf_test, "not compiled in"),
+       TEST_CASE_REASON("BPF pinning", bpf_pinning, "not compiled in"),
+       TEST_CASE_REASON("BPF prologue generation", bpf_prologue_test, "not compiled in"),
 #endif
+       { .name = NULL, }
+};
+
+struct test_suite suite__bpf = {
+       .desc = "BPF filter",
+       .test_cases = bpf_tests,
+};
index da7dc5e..8cb5a1c 100644 (file)
 
 static bool dont_fork;
 
-struct test __weak arch_tests[] = {
-       {
-               .func = NULL,
-       },
+struct test_suite *__weak arch_tests[] = {
+       NULL,
 };
 
-static struct test generic_tests[] = {
-       {
-               .desc = "vmlinux symtab matches kallsyms",
-               .func = test__vmlinux_matches_kallsyms,
-       },
-       {
-               .desc = "Detect openat syscall event",
-               .func = test__openat_syscall_event,
-       },
-       {
-               .desc = "Detect openat syscall event on all cpus",
-               .func = test__openat_syscall_event_on_all_cpus,
-       },
-       {
-               .desc = "Read samples using the mmap interface",
-               .func = test__basic_mmap,
-       },
-       {
-               .desc = "Test data source output",
-               .func = test__mem,
-       },
-       {
-               .desc = "Parse event definition strings",
-               .func = test__parse_events,
-       },
-       {
-               .desc = "Simple expression parser",
-               .func = test__expr,
-       },
-       {
-               .desc = "PERF_RECORD_* events & perf_sample fields",
-               .func = test__PERF_RECORD,
-       },
-       {
-               .desc = "Parse perf pmu format",
-               .func = test__pmu,
-       },
-       {
-               .desc = "PMU events",
-               .func = test__pmu_events,
-               .subtest = {
-                       .skip_if_fail   = false,
-                       .get_nr         = test__pmu_events_subtest_get_nr,
-                       .get_desc       = test__pmu_events_subtest_get_desc,
-                       .skip_reason    = test__pmu_events_subtest_skip_reason,
-               },
-
-       },
-       {
-               .desc = "DSO data read",
-               .func = test__dso_data,
-       },
-       {
-               .desc = "DSO data cache",
-               .func = test__dso_data_cache,
-       },
-       {
-               .desc = "DSO data reopen",
-               .func = test__dso_data_reopen,
-       },
-       {
-               .desc = "Roundtrip evsel->name",
-               .func = test__perf_evsel__roundtrip_name_test,
-       },
-       {
-               .desc = "Parse sched tracepoints fields",
-               .func = test__perf_evsel__tp_sched_test,
-       },
-       {
-               .desc = "syscalls:sys_enter_openat event fields",
-               .func = test__syscall_openat_tp_fields,
-       },
-       {
-               .desc = "Setup struct perf_event_attr",
-               .func = test__attr,
-       },
-       {
-               .desc = "Match and link multiple hists",
-               .func = test__hists_link,
-       },
-       {
-               .desc = "'import perf' in python",
-               .func = test__python_use,
-       },
-       {
-               .desc = "Breakpoint overflow signal handler",
-               .func = test__bp_signal,
-               .is_supported = test__bp_signal_is_supported,
-       },
-       {
-               .desc = "Breakpoint overflow sampling",
-               .func = test__bp_signal_overflow,
-               .is_supported = test__bp_signal_is_supported,
-       },
-       {
-               .desc = "Breakpoint accounting",
-               .func = test__bp_accounting,
-               .is_supported = test__bp_account_is_supported,
-       },
-       {
-               .desc = "Watchpoint",
-               .func = test__wp,
-               .is_supported = test__wp_is_supported,
-               .subtest = {
-                       .skip_if_fail   = false,
-                       .get_nr         = test__wp_subtest_get_nr,
-                       .get_desc       = test__wp_subtest_get_desc,
-                       .skip_reason    = test__wp_subtest_skip_reason,
-               },
-       },
-       {
-               .desc = "Number of exit events of a simple workload",
-               .func = test__task_exit,
-       },
-       {
-               .desc = "Software clock events period values",
-               .func = test__sw_clock_freq,
-       },
-       {
-               .desc = "Object code reading",
-               .func = test__code_reading,
-       },
-       {
-               .desc = "Sample parsing",
-               .func = test__sample_parsing,
-       },
-       {
-               .desc = "Use a dummy software event to keep tracking",
-               .func = test__keep_tracking,
-       },
-       {
-               .desc = "Parse with no sample_id_all bit set",
-               .func = test__parse_no_sample_id_all,
-       },
-       {
-               .desc = "Filter hist entries",
-               .func = test__hists_filter,
-       },
-       {
-               .desc = "Lookup mmap thread",
-               .func = test__mmap_thread_lookup,
-       },
-       {
-               .desc = "Share thread maps",
-               .func = test__thread_maps_share,
-       },
-       {
-               .desc = "Sort output of hist entries",
-               .func = test__hists_output,
-       },
-       {
-               .desc = "Cumulate child hist entries",
-               .func = test__hists_cumulate,
-       },
-       {
-               .desc = "Track with sched_switch",
-               .func = test__switch_tracking,
-       },
-       {
-               .desc = "Filter fds with revents mask in a fdarray",
-               .func = test__fdarray__filter,
-       },
-       {
-               .desc = "Add fd to a fdarray, making it autogrow",
-               .func = test__fdarray__add,
-       },
-       {
-               .desc = "kmod_path__parse",
-               .func = test__kmod_path__parse,
-       },
-       {
-               .desc = "Thread map",
-               .func = test__thread_map,
-       },
-       {
-               .desc = "LLVM search and compile",
-               .func = test__llvm,
-               .subtest = {
-                       .skip_if_fail   = true,
-                       .get_nr         = test__llvm_subtest_get_nr,
-                       .get_desc       = test__llvm_subtest_get_desc,
-               },
-       },
-       {
-               .desc = "Session topology",
-               .func = test__session_topology,
-       },
-       {
-               .desc = "BPF filter",
-               .func = test__bpf,
-               .subtest = {
-                       .skip_if_fail   = true,
-                       .get_nr         = test__bpf_subtest_get_nr,
-                       .get_desc       = test__bpf_subtest_get_desc,
-               },
-       },
-       {
-               .desc = "Synthesize thread map",
-               .func = test__thread_map_synthesize,
-       },
-       {
-               .desc = "Remove thread map",
-               .func = test__thread_map_remove,
-       },
-       {
-               .desc = "Synthesize cpu map",
-               .func = test__cpu_map_synthesize,
-       },
-       {
-               .desc = "Synthesize stat config",
-               .func = test__synthesize_stat_config,
-       },
-       {
-               .desc = "Synthesize stat",
-               .func = test__synthesize_stat,
-       },
-       {
-               .desc = "Synthesize stat round",
-               .func = test__synthesize_stat_round,
-       },
-       {
-               .desc = "Synthesize attr update",
-               .func = test__event_update,
-       },
-       {
-               .desc = "Event times",
-               .func = test__event_times,
-       },
-       {
-               .desc = "Read backward ring buffer",
-               .func = test__backward_ring_buffer,
-       },
-       {
-               .desc = "Print cpu map",
-               .func = test__cpu_map_print,
-       },
-       {
-               .desc = "Merge cpu map",
-               .func = test__cpu_map_merge,
-       },
-
-       {
-               .desc = "Probe SDT events",
-               .func = test__sdt_event,
-       },
-       {
-               .desc = "is_printable_array",
-               .func = test__is_printable_array,
-       },
-       {
-               .desc = "Print bitmap",
-               .func = test__bitmap_print,
-       },
-       {
-               .desc = "perf hooks",
-               .func = test__perf_hooks,
-       },
-       {
-               .desc = "builtin clang support",
-               .func = test__clang,
-               .subtest = {
-                       .skip_if_fail   = true,
-                       .get_nr         = test__clang_subtest_get_nr,
-                       .get_desc       = test__clang_subtest_get_desc,
-               }
-       },
-       {
-               .desc = "unit_number__scnprintf",
-               .func = test__unit_number__scnprint,
-       },
-       {
-               .desc = "mem2node",
-               .func = test__mem2node,
-       },
-       {
-               .desc = "time utils",
-               .func = test__time_utils,
-       },
-       {
-               .desc = "Test jit_write_elf",
-               .func = test__jit_write_elf,
-       },
-       {
-               .desc = "Test libpfm4 support",
-               .func = test__pfm,
-               .subtest = {
-                       .skip_if_fail   = true,
-                       .get_nr         = test__pfm_subtest_get_nr,
-                       .get_desc       = test__pfm_subtest_get_desc,
-               }
-       },
-       {
-               .desc = "Test api io",
-               .func = test__api_io,
-       },
-       {
-               .desc = "maps__merge_in",
-               .func = test__maps__merge_in,
-       },
-       {
-               .desc = "Demangle Java",
-               .func = test__demangle_java,
-       },
-       {
-               .desc = "Demangle OCaml",
-               .func = test__demangle_ocaml,
-       },
-       {
-               .desc = "Parse and process metrics",
-               .func = test__parse_metric,
-       },
-       {
-               .desc = "PE file support",
-               .func = test__pe_file_parsing,
-       },
-       {
-               .desc = "Event expansion for cgroups",
-               .func = test__expand_cgroup_events,
-       },
-       {
-               .desc = "Convert perf time to TSC",
-               .func = test__perf_time_to_tsc,
-               .is_supported = test__tsc_is_supported,
-       },
-       {
-               .desc = "dlfilter C API",
-               .func = test__dlfilter,
-       },
-       {
-               .func = NULL,
-       },
+static struct test_suite *generic_tests[] = {
+       &suite__vmlinux_matches_kallsyms,
+       &suite__openat_syscall_event,
+       &suite__openat_syscall_event_on_all_cpus,
+       &suite__basic_mmap,
+       &suite__mem,
+       &suite__parse_events,
+       &suite__expr,
+       &suite__PERF_RECORD,
+       &suite__pmu,
+       &suite__pmu_events,
+       &suite__dso_data,
+       &suite__dso_data_cache,
+       &suite__dso_data_reopen,
+       &suite__perf_evsel__roundtrip_name_test,
+       &suite__perf_evsel__tp_sched_test,
+       &suite__syscall_openat_tp_fields,
+       &suite__attr,
+       &suite__hists_link,
+       &suite__python_use,
+       &suite__bp_signal,
+       &suite__bp_signal_overflow,
+       &suite__bp_accounting,
+       &suite__wp,
+       &suite__task_exit,
+       &suite__sw_clock_freq,
+       &suite__code_reading,
+       &suite__sample_parsing,
+       &suite__keep_tracking,
+       &suite__parse_no_sample_id_all,
+       &suite__hists_filter,
+       &suite__mmap_thread_lookup,
+       &suite__thread_maps_share,
+       &suite__hists_output,
+       &suite__hists_cumulate,
+       &suite__switch_tracking,
+       &suite__fdarray__filter,
+       &suite__fdarray__add,
+       &suite__kmod_path__parse,
+       &suite__thread_map,
+       &suite__llvm,
+       &suite__session_topology,
+       &suite__bpf,
+       &suite__thread_map_synthesize,
+       &suite__thread_map_remove,
+       &suite__cpu_map_synthesize,
+       &suite__synthesize_stat_config,
+       &suite__synthesize_stat,
+       &suite__synthesize_stat_round,
+       &suite__event_update,
+       &suite__event_times,
+       &suite__backward_ring_buffer,
+       &suite__cpu_map_print,
+       &suite__cpu_map_merge,
+       &suite__sdt_event,
+       &suite__is_printable_array,
+       &suite__bitmap_print,
+       &suite__perf_hooks,
+       &suite__clang,
+       &suite__unit_number__scnprint,
+       &suite__mem2node,
+       &suite__time_utils,
+       &suite__jit_write_elf,
+       &suite__pfm,
+       &suite__api_io,
+       &suite__maps__merge_in,
+       &suite__demangle_java,
+       &suite__demangle_ocaml,
+       &suite__parse_metric,
+       &suite__pe_file_parsing,
+       &suite__expand_cgroup_events,
+       &suite__perf_time_to_tsc,
+       &suite__dlfilter,
+       NULL,
 };
 
-static struct test *tests[] = {
+static struct test_suite **tests[] = {
        generic_tests,
        arch_tests,
 };
 
+static int num_subtests(const struct test_suite *t)
+{
+       int num;
+
+       if (!t->test_cases)
+               return 0;
+
+       num = 0;
+       while (t->test_cases[num].name)
+               num++;
+
+       return num;
+}
+
+static bool has_subtests(const struct test_suite *t)
+{
+       return num_subtests(t) > 1;
+}
+
+static const char *skip_reason(const struct test_suite *t, int subtest)
+{
+       if (t->test_cases && subtest >= 0)
+               return t->test_cases[subtest].skip_reason;
+
+       return NULL;
+}
+
+static const char *test_description(const struct test_suite *t, int subtest)
+{
+       if (t->test_cases && subtest >= 0)
+               return t->test_cases[subtest].desc;
+
+       return t->desc;
+}
+
+static test_fnptr test_function(const struct test_suite *t, int subtest)
+{
+       if (subtest <= 0)
+               return t->test_cases[0].run_case;
+
+       return t->test_cases[subtest].run_case;
+}
+
 static bool perf_test__matches(const char *desc, int curr, int argc, const char *argv[])
 {
        int i;
@@ -398,7 +182,7 @@ static bool perf_test__matches(const char *desc, int curr, int argc, const char
        return false;
 }
 
-static int run_test(struct test *test, int subtest)
+static int run_test(struct test_suite *test, int subtest)
 {
        int status, err = -1, child = dont_fork ? 0 : fork();
        char sbuf[STRERR_BUFSIZE];
@@ -430,7 +214,7 @@ static int run_test(struct test *test, int subtest)
                        }
                }
 
-               err = test->func(test, subtest);
+               err = test_function(test, subtest)(test, subtest);
                if (!dont_fork)
                        exit(err);
        }
@@ -450,24 +234,19 @@ static int run_test(struct test *test, int subtest)
        return err;
 }
 
-#define for_each_test(j, t)                                    \
+#define for_each_test(j, k, t)                 \
        for (j = 0; j < ARRAY_SIZE(tests); j++) \
-               for (t = &tests[j][0]; t->func; t++)
+               for (k = 0, t = tests[j][k]; tests[j][k]; k++, t = tests[j][k])
 
-static int test_and_print(struct test *t, bool force_skip, int subtest)
+static int test_and_print(struct test_suite *t, int subtest)
 {
        int err;
 
-       if (!force_skip) {
-               pr_debug("\n--- start ---\n");
-               err = run_test(t, subtest);
-               pr_debug("---- end ----\n");
-       } else {
-               pr_debug("\n--- force skipped ---\n");
-               err = TEST_SKIP;
-       }
+       pr_debug("\n--- start ---\n");
+       err = run_test(t, subtest);
+       pr_debug("---- end ----\n");
 
-       if (!t->subtest.get_nr)
+       if (!has_subtests(t))
                pr_debug("%s:", t->desc);
        else
                pr_debug("%s subtest %d:", t->desc, subtest + 1);
@@ -477,11 +256,10 @@ static int test_and_print(struct test *t, bool force_skip, int subtest)
                pr_info(" Ok\n");
                break;
        case TEST_SKIP: {
-               const char *skip_reason = NULL;
-               if (t->subtest.skip_reason)
-                       skip_reason = t->subtest.skip_reason(subtest);
-               if (skip_reason)
-                       color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (%s)\n", skip_reason);
+               const char *reason = skip_reason(t, subtest);
+
+               if (reason)
+                       color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (%s)\n", reason);
                else
                        color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
        }
@@ -580,7 +358,7 @@ struct shell_test {
        const char *file;
 };
 
-static int shell_test__run(struct test *test, int subdir __maybe_unused)
+static int shell_test__run(struct test_suite *test, int subdir __maybe_unused)
 {
        int err;
        char script[PATH_MAX];
@@ -622,24 +400,34 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width,
        for_each_shell_test(entlist, n_dirs, st.dir, ent) {
                int curr = i++;
                char desc[256];
-               struct test test = {
-                       .desc = shell_test__description(desc, sizeof(desc), st.dir, ent->d_name),
-                       .func = shell_test__run,
+               struct test_case test_cases[] = {
+                       {
+                               .desc = shell_test__description(desc,
+                                                               sizeof(desc),
+                                                               st.dir,
+                                                               ent->d_name),
+                               .run_case = shell_test__run,
+                       },
+                       { .name = NULL, }
+               };
+               struct test_suite test_suite = {
+                       .desc = test_cases[0].desc,
+                       .test_cases = test_cases,
                        .priv = &st,
                };
 
-               if (!perf_test__matches(test.desc, curr, argc, argv))
+               if (!perf_test__matches(test_suite.desc, curr, argc, argv))
                        continue;
 
                st.file = ent->d_name;
-               pr_info("%2d: %-*s:", i, width, test.desc);
+               pr_info("%2d: %-*s:", i, width, test_suite.desc);
 
                if (intlist__find(skiplist, i)) {
                        color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
                        continue;
                }
 
-               test_and_print(&test, false, -1);
+               test_and_print(&test_suite, 0);
        }
 
        for (e = 0; e < n_dirs; e++)
@@ -650,33 +438,31 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width,
 
 static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
 {
-       struct test *t;
-       unsigned int j;
+       struct test_suite *t;
+       unsigned int j, k;
        int i = 0;
        int width = shell_tests__max_desc_width();
 
-       for_each_test(j, t) {
-               int len = strlen(t->desc);
+       for_each_test(j, k, t) {
+               int len = strlen(test_description(t, -1));
 
                if (width < len)
                        width = len;
        }
 
-       for_each_test(j, t) {
-               int curr = i++, err;
+       for_each_test(j, k, t) {
+               int curr = i++;
                int subi;
 
-               if (!perf_test__matches(t->desc, curr, argc, argv)) {
+               if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) {
                        bool skip = true;
                        int subn;
 
-                       if (!t->subtest.get_nr)
-                               continue;
-
-                       subn = t->subtest.get_nr();
+                       subn = num_subtests(t);
 
                        for (subi = 0; subi < subn; subi++) {
-                               if (perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv))
+                               if (perf_test__matches(test_description(t, subi),
+                                                       curr, argc, argv))
                                        skip = false;
                        }
 
@@ -684,22 +470,17 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
                                continue;
                }
 
-               if (t->is_supported && !t->is_supported()) {
-                       pr_debug("%2d: %-*s: Disabled\n", i, width, t->desc);
-                       continue;
-               }
-
-               pr_info("%2d: %-*s:", i, width, t->desc);
+               pr_info("%2d: %-*s:", i, width, test_description(t, -1));
 
                if (intlist__find(skiplist, i)) {
                        color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
                        continue;
                }
 
-               if (!t->subtest.get_nr) {
-                       test_and_print(t, false, -1);
+               if (!has_subtests(t)) {
+                       test_and_print(t, -1);
                } else {
-                       int subn = t->subtest.get_nr();
+                       int subn = num_subtests(t);
                        /*
                         * minus 2 to align with normal testcases.
                         * For subtest we print additional '.x' in number.
@@ -709,7 +490,6 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
                         * 35.1: Basic BPF llvm compiling test                          : Ok
                         */
                        int subw = width > 2 ? width - 2 : width;
-                       bool skip = false;
 
                        if (subn <= 0) {
                                color_fprintf(stderr, PERF_COLOR_YELLOW,
@@ -719,21 +499,20 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
                        pr_info("\n");
 
                        for (subi = 0; subi < subn; subi++) {
-                               int len = strlen(t->subtest.get_desc(subi));
+                               int len = strlen(test_description(t, subi));
 
                                if (subw < len)
                                        subw = len;
                        }
 
                        for (subi = 0; subi < subn; subi++) {
-                               if (!perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv))
+                               if (!perf_test__matches(test_description(t, subi),
+                                                       curr, argc, argv))
                                        continue;
 
                                pr_info("%2d.%1d: %-*s:", i, subi + 1, subw,
-                                       t->subtest.get_desc(subi));
-                               err = test_and_print(t, skip, subi);
-                               if (err != TEST_OK && t->subtest.skip_if_fail)
-                                       skip = true;
+                                       test_description(t, subi));
+                               test_and_print(t, subi);
                        }
                }
        }
@@ -759,7 +538,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i)
        for_each_shell_test(entlist, n_dirs, path, ent) {
                int curr = i++;
                char bf[256];
-               struct test t = {
+               struct test_suite t = {
                        .desc = shell_test__description(bf, sizeof(bf), path, ent->d_name),
                };
 
@@ -778,26 +557,25 @@ static int perf_test__list_shell(int argc, const char **argv, int i)
 
 static int perf_test__list(int argc, const char **argv)
 {
-       unsigned int j;
-       struct test *t;
+       unsigned int j, k;
+       struct test_suite *t;
        int i = 0;
 
-       for_each_test(j, t) {
+       for_each_test(j, k, t) {
                int curr = i++;
 
-               if (!perf_test__matches(t->desc, curr, argc, argv) ||
-                   (t->is_supported && !t->is_supported()))
+               if (!perf_test__matches(test_description(t, -1), curr, argc, argv))
                        continue;
 
-               pr_info("%2d: %s\n", i, t->desc);
+               pr_info("%2d: %s\n", i, test_description(t, -1));
 
-               if (t->subtest.get_nr) {
-                       int subn = t->subtest.get_nr();
+               if (has_subtests(t)) {
+                       int subn = num_subtests(t);
                        int subi;
 
                        for (subi = 0; subi < subn; subi++)
                                pr_info("%2d:%1d: %s\n", i, subi + 1,
-                                       t->subtest.get_desc(subi));
+                                       test_description(t, subi));
                }
        }
 
index 2577d3e..a711100 100644 (file)
@@ -3,44 +3,30 @@
 #include "c++/clang-c.h"
 #include <linux/kernel.h>
 
-static struct {
-       int (*func)(void);
-       const char *desc;
-} clang_testcase_table[] = {
-#ifdef HAVE_LIBCLANGLLVM_SUPPORT
-       {
-               .func = test__clang_to_IR,
-               .desc = "builtin clang compile C source to IR",
-       },
-       {
-               .func = test__clang_to_obj,
-               .desc = "builtin clang compile C source to ELF object",
-       },
-#endif
-};
-
-int test__clang_subtest_get_nr(void)
-{
-       return (int)ARRAY_SIZE(clang_testcase_table);
-}
-
-const char *test__clang_subtest_get_desc(int i)
-{
-       if (i < 0 || i >= (int)ARRAY_SIZE(clang_testcase_table))
-               return NULL;
-       return clang_testcase_table[i].desc;
-}
-
 #ifndef HAVE_LIBCLANGLLVM_SUPPORT
-int test__clang(struct test *test __maybe_unused, int i __maybe_unused)
+static int test__clang_to_IR(struct test_suite *test __maybe_unused,
+                            int subtest __maybe_unused)
 {
        return TEST_SKIP;
 }
-#else
-int test__clang(struct test *test __maybe_unused, int i)
+
+static int test__clang_to_obj(struct test_suite *test __maybe_unused,
+                             int subtest __maybe_unused)
 {
-       if (i < 0 || i >= (int)ARRAY_SIZE(clang_testcase_table))
-               return TEST_FAIL;
-       return clang_testcase_table[i].func();
+       return TEST_SKIP;
 }
 #endif
+
+static struct test_case clang_tests[] = {
+       TEST_CASE_REASON("builtin clang compile C source to IR", clang_to_IR,
+                        "not compiled in"),
+       TEST_CASE_REASON("builtin clang compile C source to ELF object",
+                        clang_to_obj,
+                        "not compiled in"),
+       { .name = NULL, }
+};
+
+struct test_suite suite__clang = {
+       .desc = "builtin clang support",
+       .test_cases = clang_tests,
+};
index f439bd4..5610767 100644 (file)
@@ -716,7 +716,7 @@ out_err:
        return err;
 }
 
-int test__code_reading(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__code_reading(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int ret;
 
@@ -743,3 +743,5 @@ int test__code_reading(struct test *test __maybe_unused, int subtest __maybe_unu
                return -1;
        };
 }
+
+DEFINE_SUITE("Object code reading", code_reading);
index 0472b11..89a1550 100644 (file)
@@ -75,7 +75,7 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
 }
 
 
-int test__cpu_map_synthesize(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__cpu_map_synthesize(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct perf_cpu_map *cpus;
 
@@ -111,7 +111,7 @@ static int cpu_map_print(const char *str)
        return !strcmp(buf, str);
 }
 
-int test__cpu_map_print(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__cpu_map_print(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1"));
        TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,5"));
@@ -123,7 +123,7 @@ int test__cpu_map_print(struct test *test __maybe_unused, int subtest __maybe_un
        return 0;
 }
 
-int test__cpu_map_merge(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__cpu_map_merge(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct perf_cpu_map *a = perf_cpu_map__new("4,2,1");
        struct perf_cpu_map *b = perf_cpu_map__new("4,5,7");
@@ -137,3 +137,7 @@ int test__cpu_map_merge(struct test *test __maybe_unused, int subtest __maybe_un
        perf_cpu_map__put(c);
        return 0;
 }
+
+DEFINE_SUITE("Synthesize cpu map", cpu_map_synthesize);
+DEFINE_SUITE("Print cpu map", cpu_map_print);
+DEFINE_SUITE("Merge cpu map", cpu_map_merge);
index 8f3b908..44d1be3 100644 (file)
@@ -7,7 +7,7 @@
 #include "debug.h"
 #include "demangle-java.h"
 
-int test__demangle_java(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__demangle_java(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int ret = TEST_OK;
        char *buf = NULL;
@@ -40,3 +40,5 @@ int test__demangle_java(struct test *test __maybe_unused, int subtest __maybe_un
 
        return ret;
 }
+
+DEFINE_SUITE("Demangle Java", demangle_java);
index 0043be8..90a4285 100644 (file)
@@ -7,7 +7,7 @@
 #include "debug.h"
 #include "demangle-ocaml.h"
 
-int test__demangle_ocaml(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__demangle_ocaml(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int ret = TEST_OK;
        char *buf = NULL;
@@ -41,3 +41,5 @@ int test__demangle_ocaml(struct test *test __maybe_unused, int subtest __maybe_u
 
        return ret;
 }
+
+DEFINE_SUITE("Demangle OCaml", demangle_ocaml);
index bc03b5d..84352d5 100644 (file)
@@ -398,7 +398,7 @@ static void test_data__free(struct test_data *td)
        }
 }
 
-int test__dlfilter(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__dlfilter(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct test_data td = {.fd = -1};
        int pid = getpid();
@@ -414,3 +414,5 @@ int test__dlfilter(struct test *test __maybe_unused, int subtest __maybe_unused)
        test_data__free(&td);
        return err;
 }
+
+DEFINE_SUITE("dlfilter C API", dlfilter);
index 43e1b01..3419a4a 100644 (file)
@@ -113,7 +113,7 @@ static int dso__data_fd(struct dso *dso, struct machine *machine)
        return fd;
 }
 
-int test__dso_data(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__dso_data(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct machine machine;
        struct dso *dso;
@@ -248,7 +248,7 @@ static int set_fd_limit(int n)
        return setrlimit(RLIMIT_NOFILE, &rlim);
 }
 
-int test__dso_data_cache(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__dso_data_cache(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct machine machine;
        long nr_end, nr = open_files_cnt();
@@ -318,7 +318,7 @@ static long new_limit(int count)
        return ret;
 }
 
-int test__dso_data_reopen(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__dso_data_reopen(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct machine machine;
        long nr_end, nr = open_files_cnt(), lim = new_limit(3);
@@ -393,3 +393,7 @@ int test__dso_data_reopen(struct test *test __maybe_unused, int subtest __maybe_
        TEST_ASSERT_VAL("failed leaking files", nr == nr_end);
        return 0;
 }
+
+DEFINE_SUITE("DSO data read", dso_data);
+DEFINE_SUITE("DSO data cache", dso_data_cache);
+DEFINE_SUITE("DSO data reopen", dso_data_reopen);
index c756284..2dab2d2 100644 (file)
@@ -195,7 +195,8 @@ NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__krava_1(struct thread *th
        return ret;
 }
 
-int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__dwarf_unwind(struct test_suite *test __maybe_unused,
+                             int subtest __maybe_unused)
 {
        struct machine *machine;
        struct thread *thread;
@@ -237,3 +238,5 @@ int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unu
        machine__delete(machine);
        return err;
 }
+
+DEFINE_SUITE("Test dwarf unwind", dwarf_unwind);
index 04ce440..7606eb3 100644 (file)
@@ -216,7 +216,7 @@ out_err:
  * and checks that enabled and running times
  * match.
  */
-int test__event_times(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__event_times(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int err, ret = 0;
 
@@ -239,3 +239,5 @@ int test__event_times(struct test *test __maybe_unused, int subtest __maybe_unus
 #undef _T
        return ret;
 }
+
+DEFINE_SUITE("Event times", event_times);
index 44a5052..fbb68de 100644 (file)
@@ -83,7 +83,7 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused,
        return 0;
 }
 
-int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__event_update(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct evsel *evsel;
        struct event_name tmp;
@@ -123,3 +123,5 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu
        evlist__delete(evlist);
        return 0;
 }
+
+DEFINE_SUITE("Synthesize attr update", event_update);
index 4e09f0a..fdbf176 100644 (file)
@@ -99,7 +99,8 @@ out_delete_evlist:
 #define perf_evsel__name_array_test(names, distance) \
        __perf_evsel__name_array_test(names, ARRAY_SIZE(names), distance)
 
-int test__perf_evsel__roundtrip_name_test(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__perf_evsel__roundtrip_name_test(struct test_suite *test __maybe_unused,
+                                                int subtest __maybe_unused)
 {
        int err = 0, ret = 0;
 
@@ -120,3 +121,5 @@ int test__perf_evsel__roundtrip_name_test(struct test *test __maybe_unused, int
 
        return ret;
 }
+
+DEFINE_SUITE("Roundtrip evsel->name", perf_evsel__roundtrip_name_test);
index f9e34bd..cf4da3d 100644 (file)
@@ -32,7 +32,8 @@ static int evsel__test_field(struct evsel *evsel, const char *name, int size, bo
        return ret;
 }
 
-int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__perf_evsel__tp_sched_test(struct test_suite *test __maybe_unused,
+                                          int subtest __maybe_unused)
 {
        struct evsel *evsel = evsel__newtp("sched", "sched_switch");
        int ret = 0;
@@ -87,3 +88,5 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
        evsel__delete(evsel);
        return ret;
 }
+
+DEFINE_SUITE("Parse sched tracepoints fields", perf_evsel__tp_sched_test);
index 80cff8a..dfefe5b 100644 (file)
@@ -221,8 +221,8 @@ out:
        return ret;
 }
 
-int test__expand_cgroup_events(struct test *test __maybe_unused,
-                              int subtest __maybe_unused)
+static int test__expand_cgroup_events(struct test_suite *test __maybe_unused,
+                                     int subtest __maybe_unused)
 {
        int ret;
 
@@ -240,3 +240,5 @@ int test__expand_cgroup_events(struct test *test __maybe_unused,
 
        return ret;
 }
+
+DEFINE_SUITE("Event expansion for cgroups", expand_cgroup_events);
index 0777832..c895de4 100644 (file)
@@ -62,11 +62,11 @@ static int test(struct expr_parse_ctx *ctx, const char *e, double val2)
        return 0;
 }
 
-int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
+static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_unused)
 {
        struct expr_id_data *val_ptr;
        const char *p;
-       double val;
+       double val, num_cpus, num_cores, num_dies, num_packages;
        int ret;
        struct expr_parse_ctx *ctx;
 
@@ -134,6 +134,16 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
        TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "EVENT2,param=3@",
                                                    (void **)&val_ptr));
 
+       expr__ctx_clear(ctx);
+       TEST_ASSERT_VAL("find ids",
+                       expr__find_ids("dash\\-event1 - dash\\-event2",
+                                      NULL, ctx) == 0);
+       TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 2);
+       TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event1",
+                                                   (void **)&val_ptr));
+       TEST_ASSERT_VAL("find ids", hashmap__find(ctx->ids, "dash-event2",
+                                                   (void **)&val_ptr));
+
        /* Only EVENT1 or EVENT2 need be measured depending on the value of smt_on. */
        expr__ctx_clear(ctx);
        TEST_ASSERT_VAL("find ids",
@@ -151,7 +161,31 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
                        NULL, ctx) == 0);
        TEST_ASSERT_VAL("find ids", hashmap__size(ctx->ids) == 0);
 
+       /* Test toplogy constants appear well ordered. */
+       expr__ctx_clear(ctx);
+       TEST_ASSERT_VAL("#num_cpus", expr__parse(&num_cpus, ctx, "#num_cpus") == 0);
+       TEST_ASSERT_VAL("#num_cores", expr__parse(&num_cores, ctx, "#num_cores") == 0);
+       TEST_ASSERT_VAL("#num_cpus >= #num_cores", num_cpus >= num_cores);
+       TEST_ASSERT_VAL("#num_dies", expr__parse(&num_dies, ctx, "#num_dies") == 0);
+       TEST_ASSERT_VAL("#num_cores >= #num_dies", num_cores >= num_dies);
+       TEST_ASSERT_VAL("#num_packages", expr__parse(&num_packages, ctx, "#num_packages") == 0);
+       TEST_ASSERT_VAL("#num_dies >= #num_packages", num_dies >= num_packages);
+
+       /*
+        * Source count returns the number of events aggregating in a leader
+        * event including the leader. Check parsing yields an id.
+        */
+       expr__ctx_clear(ctx);
+       TEST_ASSERT_VAL("source count",
+                       expr__find_ids("source_count(EVENT1)",
+                       NULL, ctx) == 0);
+       TEST_ASSERT_VAL("source count", hashmap__size(ctx->ids) == 1);
+       TEST_ASSERT_VAL("source count", hashmap__find(ctx->ids, "EVENT1",
+                                                       (void **)&val_ptr));
+
        expr__ctx_free(ctx);
 
        return 0;
 }
+
+DEFINE_SUITE("Simple expression parser", expr);
index d9eca8e..40983c3 100644 (file)
@@ -28,7 +28,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE
        return printed + fdarray__fprintf(fda, fp);
 }
 
-int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__fdarray__filter(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int nr_fds, err = TEST_FAIL;
        struct fdarray *fda = fdarray__new(5, 5);
@@ -89,7 +89,7 @@ out:
        return err;
 }
 
-int test__fdarray__add(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__fdarray__add(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int err = TEST_FAIL;
        struct fdarray *fda = fdarray__new(2, 2);
@@ -158,3 +158,6 @@ out_delete:
 out:
        return err;
 }
+
+DEFINE_SUITE("Filter fds with revents mask in a fdarray", fdarray__filter);
+DEFINE_SUITE("Add fd to a fdarray, making it autogrow", fdarray__add);
index f797f98..95f3be1 100644 (file)
@@ -16,8 +16,8 @@
 
 #define TEMPL "/tmp/perf-test-XXXXXX"
 
-int test__jit_write_elf(struct test *test __maybe_unused,
-                       int subtest __maybe_unused)
+static int test__jit_write_elf(struct test_suite *test __maybe_unused,
+                              int subtest __maybe_unused)
 {
 #ifdef HAVE_JITDUMP
        static unsigned char x86_code[] = {
@@ -49,3 +49,5 @@ int test__jit_write_elf(struct test *test __maybe_unused,
        return TEST_SKIP;
 #endif
 }
+
+DEFINE_SUITE("Test jit_write_elf", jit_write_elf);
index 890cb1f..17f4fcd 100644 (file)
@@ -689,7 +689,7 @@ out:
        return err;
 }
 
-int test__hists_cumulate(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__hists_cumulate(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int err = TEST_FAIL;
        struct machines machines;
@@ -736,3 +736,5 @@ out:
 
        return err;
 }
+
+DEFINE_SUITE("Cumulate child hist entries", hists_cumulate);
index ca6120c..08cbeb9 100644 (file)
@@ -101,7 +101,7 @@ out:
        return TEST_FAIL;
 }
 
-int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__hists_filter(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int err = TEST_FAIL;
        struct machines machines;
@@ -325,3 +325,5 @@ out:
 
        return err;
 }
+
+DEFINE_SUITE("Filter hist entries", hists_filter);
index a024d3f..c575e13 100644 (file)
@@ -264,7 +264,7 @@ static int validate_link(struct hists *leader, struct hists *other)
        return __validate_link(leader, 0) || __validate_link(other, 1);
 }
 
-int test__hists_link(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__hists_link(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int err = -1;
        struct hists *hists, *first_hists;
@@ -339,3 +339,5 @@ out:
 
        return err;
 }
+
+DEFINE_SUITE("Match and link multiple hists", hists_link);
index 8973f35..0bde4a7 100644 (file)
@@ -575,7 +575,7 @@ out:
        return err;
 }
 
-int test__hists_output(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__hists_output(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int err = TEST_FAIL;
        struct machines machines;
@@ -623,3 +623,5 @@ out:
 
        return err;
 }
+
+DEFINE_SUITE("Sort output of hist entries", hists_output);
index 9c7b3ba..f72de24 100644 (file)
@@ -5,7 +5,7 @@
 #include "debug.h"
 #include "print_binary.h"
 
-int test__is_printable_array(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__is_printable_array(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        char buf1[] = { 'k', 'r', 4, 'v', 'a', 0 };
        char buf2[] = { 'k', 'r', 'a', 'v', 4, 0 };
@@ -36,3 +36,5 @@ int test__is_printable_array(struct test *test __maybe_unused, int subtest __may
 
        return TEST_OK;
 }
+
+DEFINE_SUITE("is_printable_array", is_printable_array);
index a0438b0..dd20673 100644 (file)
@@ -61,7 +61,7 @@ static int find_comm(struct evlist *evlist, const char *comm)
  * when an event is disabled but a dummy software event is not disabled.  If the
  * test passes %0 is returned, otherwise %-1 is returned.
  */
-int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__keep_tracking(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct record_opts opts = {
                .mmap_pages          = UINT_MAX,
@@ -160,3 +160,5 @@ out_err:
 
        return err;
 }
+
+DEFINE_SUITE("Use a dummy software event to keep tracking", keep_tracking);
index e483210..dfe1bd5 100644 (file)
@@ -47,7 +47,7 @@ static int test_is_kernel_module(const char *path, int cpumode, bool expect)
 #define M(path, c, e) \
        TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e))
 
-int test__kmod_path__parse(struct test *t __maybe_unused, int subtest __maybe_unused)
+static int test__kmod_path__parse(struct test_suite *t __maybe_unused, int subtest __maybe_unused)
 {
        /* path                alloc_name  kmod  comp   name   */
        T("/xxxx/xxxx/x-x.ko", true      , true, 0    , "[x_x]");
@@ -159,3 +159,5 @@ int test__kmod_path__parse(struct test *t __maybe_unused, int subtest __maybe_un
 
        return 0;
 }
+
+DEFINE_SUITE("kmod_path__parse", kmod_path__parse);
index 33e43cc..8ac0a3a 100644 (file)
@@ -124,7 +124,7 @@ out:
        return ret;
 }
 
-int test__llvm(struct test *test __maybe_unused, int subtest)
+static int test__llvm(int subtest)
 {
        int ret;
        void *obj_buf = NULL;
@@ -148,32 +148,72 @@ int test__llvm(struct test *test __maybe_unused, int subtest)
 
        return ret;
 }
+#endif //HAVE_LIBBPF_SUPPORT
 
-int test__llvm_subtest_get_nr(void)
+static int test__llvm__bpf_base_prog(struct test_suite *test __maybe_unused,
+                                    int subtest __maybe_unused)
 {
-       return __LLVM_TESTCASE_MAX;
+#ifdef HAVE_LIBBPF_SUPPORT
+       return test__llvm(LLVM_TESTCASE_BASE);
+#else
+       pr_debug("Skip LLVM test because BPF support is not compiled\n");
+       return TEST_SKIP;
+#endif
 }
 
-const char *test__llvm_subtest_get_desc(int subtest)
-{
-       if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
-               return NULL;
-
-       return bpf_source_table[subtest].desc;
-}
-#else //HAVE_LIBBPF_SUPPORT
-int test__llvm(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__llvm__bpf_test_kbuild_prog(struct test_suite *test __maybe_unused,
+                                           int subtest __maybe_unused)
 {
+#ifdef HAVE_LIBBPF_SUPPORT
+       return test__llvm(LLVM_TESTCASE_KBUILD);
+#else
+       pr_debug("Skip LLVM test because BPF support is not compiled\n");
        return TEST_SKIP;
+#endif
 }
 
-int test__llvm_subtest_get_nr(void)
+static int test__llvm__bpf_test_prologue_prog(struct test_suite *test __maybe_unused,
+                                             int subtest __maybe_unused)
 {
-       return 0;
+#ifdef HAVE_LIBBPF_SUPPORT
+       return test__llvm(LLVM_TESTCASE_BPF_PROLOGUE);
+#else
+       pr_debug("Skip LLVM test because BPF support is not compiled\n");
+       return TEST_SKIP;
+#endif
 }
 
-const char *test__llvm_subtest_get_desc(int subtest __maybe_unused)
+static int test__llvm__bpf_test_relocation(struct test_suite *test __maybe_unused,
+                                          int subtest __maybe_unused)
 {
-       return NULL;
+#ifdef HAVE_LIBBPF_SUPPORT
+       return test__llvm(LLVM_TESTCASE_BPF_RELOCATION);
+#else
+       pr_debug("Skip LLVM test because BPF support is not compiled\n");
+       return TEST_SKIP;
+#endif
 }
-#endif // HAVE_LIBBPF_SUPPORT
+
+
+static struct test_case llvm_tests[] = {
+#ifdef HAVE_LIBBPF_SUPPORT
+       TEST_CASE("Basic BPF llvm compile", llvm__bpf_base_prog),
+       TEST_CASE("kbuild searching", llvm__bpf_test_kbuild_prog),
+       TEST_CASE("Compile source for BPF prologue generation",
+                 llvm__bpf_test_prologue_prog),
+       TEST_CASE("Compile source for BPF relocation", llvm__bpf_test_relocation),
+#else
+       TEST_CASE_REASON("Basic BPF llvm compile", llvm__bpf_base_prog, "not compiled in"),
+       TEST_CASE_REASON("kbuild searching", llvm__bpf_test_kbuild_prog, "not compiled in"),
+       TEST_CASE_REASON("Compile source for BPF prologue generation",
+                       llvm__bpf_test_prologue_prog, "not compiled in"),
+       TEST_CASE_REASON("Compile source for BPF relocation",
+                       llvm__bpf_test_relocation, "not compiled in"),
+#endif
+       { .name = NULL, }
+};
+
+struct test_suite suite__llvm = {
+       .desc = "LLVM search and compile",
+       .test_cases = llvm_tests,
+};
index 1ac7291..e308a32 100644 (file)
@@ -33,7 +33,7 @@ static int check_maps(struct map_def *merged, unsigned int size, struct maps *ma
        return TEST_OK;
 }
 
-int test__maps__merge_in(struct test *t __maybe_unused, int subtest __maybe_unused)
+static int test__maps__merge_in(struct test_suite *t __maybe_unused, int subtest __maybe_unused)
 {
        struct maps maps;
        unsigned int i;
@@ -120,3 +120,5 @@ int test__maps__merge_in(struct test *t __maybe_unused, int subtest __maybe_unus
        maps__exit(&maps);
        return TEST_OK;
 }
+
+DEFINE_SUITE("maps__merge_in", maps__merge_in);
index 673a11a..56014ec 100644 (file)
@@ -23,7 +23,7 @@ static int check(union perf_mem_data_src data_src,
        return 0;
 }
 
-int test__mem(struct test *text __maybe_unused, int subtest __maybe_unused)
+static int test__mem(struct test_suite *text __maybe_unused, int subtest __maybe_unused)
 {
        int ret = 0;
        union perf_mem_data_src src;
@@ -56,3 +56,5 @@ int test__mem(struct test *text __maybe_unused, int subtest __maybe_unused)
 
        return ret;
 }
+
+DEFINE_SUITE("Test data source output", mem);
index e4d0d58..b17b863 100644 (file)
@@ -43,7 +43,7 @@ static unsigned long *get_bitmap(const char *str, int nbits)
        return bm && map ? bm : NULL;
 }
 
-int test__mem2node(struct test *t __maybe_unused, int subtest __maybe_unused)
+static int test__mem2node(struct test_suite *t __maybe_unused, int subtest __maybe_unused)
 {
        struct mem2node map;
        struct memory_node nodes[3];
@@ -77,3 +77,5 @@ int test__mem2node(struct test *t __maybe_unused, int subtest __maybe_unused)
        mem2node__exit(&map);
        return 0;
 }
+
+DEFINE_SUITE("mem2node", mem2node);
index d38757d..90b2fed 100644 (file)
@@ -29,7 +29,7 @@
  * Then it checks if the number of syscalls reported as perf events by
  * the kernel corresponds to the number of syscalls made.
  */
-int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int err = -1;
        union perf_event *event;
@@ -164,3 +164,5 @@ out_free_threads:
        perf_thread_map__put(threads);
        return err;
 }
+
+DEFINE_SUITE("Read samples using the mmap interface", basic_mmap);
index 6f2da7a..a4301fc 100644 (file)
@@ -224,7 +224,7 @@ static int mmap_events(synth_cb synth)
  *
  * by using all thread objects.
  */
-int test__mmap_thread_lookup(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__mmap_thread_lookup(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        /* perf_event__synthesize_threads synthesize */
        TEST_ASSERT_VAL("failed with sythesizing all",
@@ -236,3 +236,5 @@ int test__mmap_thread_lookup(struct test *test __maybe_unused, int subtest __may
 
        return 0;
 }
+
+DEFINE_SUITE("Lookup mmap thread", mmap_thread_lookup);
index f7dd6c4..cd3dd46 100644 (file)
@@ -19,7 +19,8 @@
 #include "stat.h"
 #include "util/counts.h"
 
-int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__openat_syscall_event_on_all_cpus(struct test_suite *test __maybe_unused,
+                                                 int subtest __maybe_unused)
 {
        int err = -1, fd, cpu;
        struct perf_cpu_map *cpus;
@@ -127,3 +128,5 @@ out_thread_map_delete:
        perf_thread_map__put(threads);
        return err;
 }
+
+DEFINE_SUITE("Detect openat syscall event on all cpus", openat_syscall_event_on_all_cpus);
index 5e4af2f..a7b2800 100644 (file)
@@ -22,7 +22,8 @@
 #define AT_FDCWD       -100
 #endif
 
-int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__syscall_openat_tp_fields(struct test_suite *test __maybe_unused,
+                                         int subtest __maybe_unused)
 {
        struct record_opts opts = {
                .target = {
@@ -142,3 +143,5 @@ out_delete_evlist:
 out:
        return err;
 }
+
+DEFINE_SUITE("syscalls:sys_enter_openat event fields", syscall_openat_tp_fields);
index 85a8f0f..7f4c13c 100644 (file)
@@ -13,7 +13,8 @@
 #include "tests.h"
 #include "util/counts.h"
 
-int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__openat_syscall_event(struct test_suite *test __maybe_unused,
+                                     int subtest __maybe_unused)
 {
        int err = -1, fd;
        struct evsel *evsel;
@@ -66,3 +67,5 @@ out_thread_map_delete:
        perf_thread_map__put(threads);
        return err;
 }
+
+DEFINE_SUITE("Detect openat syscall event", openat_syscall_event);
index 6af9463..a508f1d 100644 (file)
@@ -2276,7 +2276,7 @@ static int test_pmu_events_alias(char *event, char *alias)
        return test_event(&e);
 }
 
-int test__parse_events(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__parse_events(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int ret1, ret2 = 0;
        char *event, *alias;
@@ -2319,3 +2319,5 @@ do {                                                      \
 
        return ret2;
 }
+
+DEFINE_SUITE("Parse event definition strings", parse_events);
index dfc797e..574b7e4 100644 (file)
@@ -369,7 +369,7 @@ static int test_metric_group(void)
        return 0;
 }
 
-int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__parse_metric(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        TEST_ASSERT_VAL("IPC failed", test_ipc() == 0);
        TEST_ASSERT_VAL("frontend failed", test_frontend() == 0);
@@ -383,3 +383,5 @@ int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unu
        }
        return 0;
 }
+
+DEFINE_SUITE("Parse and process metrics", parse_metric);
index 4712736..d62e315 100644 (file)
@@ -67,7 +67,8 @@ struct test_attr_event {
  *
  * Return: %0 on success, %-1 if the test fails.
  */
-int test__parse_no_sample_id_all(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__parse_no_sample_id_all(struct test_suite *test __maybe_unused,
+                                       int subtest __maybe_unused)
 {
        int err;
 
@@ -103,3 +104,5 @@ int test__parse_no_sample_id_all(struct test *test __maybe_unused, int subtest _
 
        return 0;
 }
+
+DEFINE_SUITE("Parse with no sample_id_all bit set", parse_no_sample_id_all);
index 58b90c4..c09a9fa 100644 (file)
@@ -68,7 +68,7 @@ static int run_dir(const char *d)
        return TEST_OK;
 }
 
-int test__pe_file_parsing(struct test *test __maybe_unused,
+static int test__pe_file_parsing(struct test_suite *test __maybe_unused,
                          int subtest __maybe_unused)
 {
        struct stat st;
@@ -89,10 +89,12 @@ int test__pe_file_parsing(struct test *test __maybe_unused,
 
 #else
 
-int test__pe_file_parsing(struct test *test __maybe_unused,
+static int test__pe_file_parsing(struct test_suite *test __maybe_unused,
                          int subtest __maybe_unused)
 {
        return TEST_SKIP;
 }
 
 #endif
+
+DEFINE_SUITE("PE file support", pe_file_parsing);
index dd865e0..78cdeb8 100644 (file)
@@ -26,7 +26,7 @@ static void the_hook(void *_hook_flags)
        raise(SIGSEGV);
 }
 
-int test__perf_hooks(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__perf_hooks(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int hook_flags = 0;
 
@@ -45,3 +45,5 @@ int test__perf_hooks(struct test *test __maybe_unused, int subtest __maybe_unuse
                return TEST_FAIL;
        return TEST_OK;
 }
+
+DEFINE_SUITE("perf hooks", perf_hooks);
index 0df471b..6354465 100644 (file)
@@ -41,7 +41,7 @@ realloc:
        return cpu;
 }
 
-int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct record_opts opts = {
                .target = {
@@ -332,3 +332,5 @@ out_delete_evlist:
 out:
        return (err < 0 || errs > 0) ? -1 : 0;
 }
+
+DEFINE_SUITE("PERF_RECORD_* events & perf_sample fields", PERF_RECORD);
index 7c56bc1..d12d0ad 100644 (file)
 #include "pmu.h"
 #include "pmu-hybrid.h"
 
+/*
+ * Except x86_64/i386 and Arm64, other archs don't support TSC in perf.  Just
+ * enable the test for x86_64/i386 and Arm64 archs.
+ */
+#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__)
+#define TSC_IS_SUPPORTED 1
+#else
+#define TSC_IS_SUPPORTED 0
+#endif
+
 #define CHECK__(x) {                           \
        while ((x) < 0) {                       \
                pr_debug(#x " failed!\n");      \
@@ -45,7 +55,7 @@
  * %0 is returned, otherwise %-1 is returned.  If TSC conversion is not
  * supported then then the test passes but " (not supported)" is printed.
  */
-int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__perf_time_to_tsc(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct record_opts opts = {
                .mmap_pages          = UINT_MAX,
@@ -69,6 +79,11 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
        u64 test_time, comm1_time = 0, comm2_time = 0;
        struct mmap *md;
 
+       if (!TSC_IS_SUPPORTED) {
+               pr_debug("Test not supported on this architecture");
+               return TEST_SKIP;
+       }
+
        threads = thread_map__new(-1, getpid(), UINT_MAX);
        CHECK_NOT_NULL__(threads);
 
@@ -185,15 +200,4 @@ out_err:
        return err;
 }
 
-bool test__tsc_is_supported(void)
-{
-       /*
-        * Except x86_64/i386 and Arm64, other archs don't support TSC in perf.
-        * Just enable the test for x86_64/i386 and Arm64 archs.
-        */
-#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__)
-       return true;
-#else
-       return false;
-#endif
-}
+DEFINE_SUITE("Convert perf time to TSC", perf_time_to_tsc);
index e8fd0da..71b76de 100644 (file)
 #include <linux/kernel.h>
 
 #ifdef HAVE_LIBPFM
-static int test__pfm_events(void);
-static int test__pfm_group(void);
-#endif
-
-static const struct {
-       int (*func)(void);
-       const char *desc;
-} pfm_testcase_table[] = {
-#ifdef HAVE_LIBPFM
-       {
-               .func = test__pfm_events,
-               .desc = "test of individual --pfm-events",
-       },
-       {
-               .func = test__pfm_group,
-               .desc = "test groups of --pfm-events",
-       },
-#endif
-};
-
-#ifdef HAVE_LIBPFM
 static int count_pfm_events(struct perf_evlist *evlist)
 {
        struct perf_evsel *evsel;
@@ -44,7 +23,8 @@ static int count_pfm_events(struct perf_evlist *evlist)
        return count;
 }
 
-static int test__pfm_events(void)
+static int test__pfm_events(struct test_suite *test __maybe_unused,
+                           int subtest __maybe_unused)
 {
        struct evlist *evlist;
        struct option opt;
@@ -104,7 +84,8 @@ static int test__pfm_events(void)
        return 0;
 }
 
-static int test__pfm_group(void)
+static int test__pfm_group(struct test_suite *test __maybe_unused,
+                          int subtest __maybe_unused)
 {
        struct evlist *evlist;
        struct option opt;
@@ -187,27 +168,27 @@ static int test__pfm_group(void)
        }
        return 0;
 }
-#endif
-
-const char *test__pfm_subtest_get_desc(int i)
-{
-       if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table))
-               return NULL;
-       return pfm_testcase_table[i].desc;
-}
-
-int test__pfm_subtest_get_nr(void)
+#else
+static int test__pfm_events(struct test_suite *test __maybe_unused,
+                           int subtest __maybe_unused)
 {
-       return (int)ARRAY_SIZE(pfm_testcase_table);
+       return TEST_SKIP;
 }
 
-int test__pfm(struct test *test __maybe_unused, int i __maybe_unused)
+static int test__pfm_group(struct test_suite *test __maybe_unused,
+                          int subtest __maybe_unused)
 {
-#ifdef HAVE_LIBPFM
-       if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table))
-               return TEST_FAIL;
-       return pfm_testcase_table[i].func();
-#else
        return TEST_SKIP;
-#endif
 }
+#endif
+
+static struct test_case pfm_tests[] = {
+       TEST_CASE_REASON("test of individual --pfm-events", pfm_events, "not compiled in"),
+       TEST_CASE_REASON("test groups of --pfm-events", pfm_group, "not compiled in"),
+       { .name = NULL, }
+};
+
+struct test_suite suite__pfm = {
+       .desc = "Test libpfm4 support",
+       .test_cases = pfm_tests,
+};
index 9ae894c..df1c9a3 100644 (file)
@@ -418,7 +418,8 @@ static int compare_alias_to_test_event(struct perf_pmu_alias *alias,
 }
 
 /* Verify generated events from pmu-events.c are as expected */
-static int test_pmu_event_table(void)
+static int test__pmu_event_table(struct test_suite *test __maybe_unused,
+                                int subtest __maybe_unused)
 {
        const struct pmu_event *sys_event_tables = __test_pmu_get_sys_events_table();
        const struct pmu_events_map *map = __test_pmu_get_events_map();
@@ -705,7 +706,8 @@ static struct perf_pmu_test_pmu test_pmus[] = {
 };
 
 /* Test that aliases generated are as expected */
-static int test_aliases(void)
+static int test__aliases(struct test_suite *test __maybe_unused,
+                       int subtest __maybe_unused)
 {
        struct perf_pmu *pmu = NULL;
        unsigned long i;
@@ -892,7 +894,8 @@ out_err:
 
 }
 
-static int test_parsing(void)
+static int test__parsing(struct test_suite *test __maybe_unused,
+                        int subtest __maybe_unused)
 {
        const struct pmu_events_map *cpus_map = pmu_events_map__find();
        const struct pmu_events_map *map;
@@ -1034,7 +1037,8 @@ out:
  * or all defined cpus via the 'fake_pmu'
  * in parse_events.
  */
-static int test_parsing_fake(void)
+static int test__parsing_fake(struct test_suite *test __maybe_unused,
+                             int subtest __maybe_unused)
 {
        const struct pmu_events_map *map;
        const struct pmu_event *pe;
@@ -1068,55 +1072,16 @@ static int test_parsing_fake(void)
        return 0;
 }
 
-static const struct {
-       int (*func)(void);
-       const char *desc;
-} pmu_events_testcase_table[] = {
-       {
-               .func = test_pmu_event_table,
-               .desc = "PMU event table sanity",
-       },
-       {
-               .func = test_aliases,
-               .desc = "PMU event map aliases",
-       },
-       {
-               .func = test_parsing,
-               .desc = "Parsing of PMU event table metrics",
-       },
-       {
-               .func = test_parsing_fake,
-               .desc = "Parsing of PMU event table metrics with fake PMUs",
-       },
+static struct test_case pmu_events_tests[] = {
+       TEST_CASE("PMU event table sanity", pmu_event_table),
+       TEST_CASE("PMU event map aliases", aliases),
+       TEST_CASE_REASON("Parsing of PMU event table metrics", parsing,
+                        "some metrics failed"),
+       TEST_CASE("Parsing of PMU event table metrics with fake PMUs", parsing_fake),
+       { .name = NULL, }
 };
 
-const char *test__pmu_events_subtest_get_desc(int subtest)
-{
-       if (subtest < 0 ||
-           subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table))
-               return NULL;
-       return pmu_events_testcase_table[subtest].desc;
-}
-
-const char *test__pmu_events_subtest_skip_reason(int subtest)
-{
-       if (subtest < 0 ||
-           subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table))
-               return NULL;
-       if (pmu_events_testcase_table[subtest].func != test_parsing)
-               return NULL;
-       return "some metrics failed";
-}
-
-int test__pmu_events_subtest_get_nr(void)
-{
-       return (int)ARRAY_SIZE(pmu_events_testcase_table);
-}
-
-int test__pmu_events(struct test *test __maybe_unused, int subtest)
-{
-       if (subtest < 0 ||
-           subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table))
-               return TEST_FAIL;
-       return pmu_events_testcase_table[subtest].func();
-}
+struct test_suite suite__pmu_events = {
+       .desc = "PMU events",
+       .test_cases = pmu_events_tests,
+};
index 714e683..8507bd6 100644 (file)
@@ -137,7 +137,7 @@ static struct list_head *test_terms_list(void)
        return &terms;
 }
 
-int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__pmu(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        char *format = test_format_dir_get();
        LIST_HEAD(formats);
@@ -177,3 +177,5 @@ int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused)
        test_format_dir_put(format);
        return ret;
 }
+
+DEFINE_SUITE("Parse perf pmu format", pmu);
index 98c6d47..6b990ee 100644 (file)
@@ -9,7 +9,7 @@
 #include "tests.h"
 #include "util/debug.h"
 
-int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__python_use(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        char *cmd;
        int ret;
@@ -23,3 +23,5 @@ int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unuse
        free(cmd);
        return ret;
 }
+
+DEFINE_SUITE("'import perf' in python", python_use);
index c83a115..b669d22 100644 (file)
@@ -368,7 +368,7 @@ out_free:
  * checks sample format bits separately and together.  If the test passes %0 is
  * returned, otherwise %-1 is returned.
  */
-int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__sample_parsing(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15};
        u64 sample_type;
@@ -426,3 +426,5 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u
 
        return 0;
 }
+
+DEFINE_SUITE("Sample parsing", sample_parsing);
index ed76c69..9197128 100644 (file)
@@ -76,7 +76,7 @@ static int search_cached_probe(const char *target,
        return ret;
 }
 
-int test__sdt_event(struct test *test __maybe_unused, int subtests __maybe_unused)
+static int test__sdt_event(struct test_suite *test __maybe_unused, int subtests __maybe_unused)
 {
        int ret = TEST_FAIL;
        char __tempdir[] = "./test-buildid-XXXXXX";
@@ -114,9 +114,11 @@ error:
        return ret;
 }
 #else
-int test__sdt_event(struct test *test __maybe_unused, int subtests __maybe_unused)
+static int test__sdt_event(struct test_suite *test __maybe_unused, int subtests __maybe_unused)
 {
        pr_debug("Skip SDT event test because SDT support is not compiled\n");
        return TEST_SKIP;
 }
 #endif
+
+DEFINE_SUITE("Probe SDT events", sdt_event);
index 8a168cf..49bd875 100755 (executable)
@@ -12,7 +12,7 @@ skip_if_no_z_record() {
 
 collect_z_record() {
        echo "Collecting compressed record file:"
-       [[ "$(uname -m)" != s390x ]] && gflag='-g'
+       [ "$(uname -m)" != s390x ] && gflag='-g'
        $perf_tool record -o $trace_file $gflag -z -F 5000 -- \
                dd count=500 if=/dev/urandom of=/dev/null
 }
index 2de7fd0..b30dba4 100755 (executable)
@@ -7,11 +7,11 @@ set -e
 for p in $(perf list --raw-dump pmu); do
   echo "Testing $p"
   result=$(perf stat -e "$p" true 2>&1)
-  if [[ ! "$result" =~ "$p" ]] && [[ ! "$result" =~ "<not supported>" ]]; then
+  if ! echo "$result" | grep -q "$p" && ! echo "$result" | grep -q "<not supported>" ; then
     # We failed to see the event and it is supported. Possibly the workload was
     # too small so retry with something longer.
     result=$(perf stat -e "$p" perf bench internals synthesize 2>&1)
-    if [[ ! "$result" =~ "$p" ]]; then
+    if ! echo "$result" | grep -q "$p" ; then
       echo "Event '$p' not printed in:"
       echo "$result"
       exit 1
index 2aed20d..13473ae 100755 (executable)
@@ -23,7 +23,7 @@ compare_number()
 
 # skip if --bpf-counters is not supported
 if ! perf stat --bpf-counters true > /dev/null 2>&1; then
-       if [ "$1" == "-v" ]; then
+       if [ "$1" = "-v" ]; then
                echo "Skipping: --bpf-counters not supported"
                perf --no-pager stat --bpf-counters true || true
        fi
diff --git a/tools/perf/tests/shell/test_arm_spe.sh b/tools/perf/tests/shell/test_arm_spe.sh
new file mode 100755 (executable)
index 0000000..e59044e
--- /dev/null
@@ -0,0 +1,89 @@
+#!/bin/sh
+# Check Arm SPE trace data recording and synthesized samples
+
+# Uses the 'perf record' to record trace data of Arm SPE events;
+# then verify if any SPE event samples are generated by SPE with
+# 'perf script' and 'perf report' commands.
+
+# SPDX-License-Identifier: GPL-2.0
+# German Gomez <german.gomez@arm.com>, 2021
+
+skip_if_no_arm_spe_event() {
+       perf list | egrep -q 'arm_spe_[0-9]+//' && return 0
+
+       # arm_spe event doesn't exist
+       return 2
+}
+
+skip_if_no_arm_spe_event || exit 2
+
+perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+glb_err=0
+
+cleanup_files()
+{
+       rm -f ${perfdata}
+       exit $glb_err
+}
+
+trap cleanup_files exit term int
+
+arm_spe_report() {
+       if [ $2 != 0 ]; then
+               echo "$1: FAIL"
+               glb_err=$2
+       else
+               echo "$1: PASS"
+       fi
+}
+
+perf_script_samples() {
+       echo "Looking at perf.data file for dumping samples:"
+
+       # from arm-spe.c/arm_spe_synth_events()
+       events="(ld1-miss|ld1-access|llc-miss|lld-access|tlb-miss|tlb-access|branch-miss|remote-access|memory)"
+
+       # Below is an example of the samples dumping:
+       #       dd  3048 [002]          1    l1d-access:      ffffaa64999c __GI___libc_write+0x3c (/lib/aarch64-linux-gnu/libc-2.27.so)
+       #       dd  3048 [002]          1    tlb-access:      ffffaa64999c __GI___libc_write+0x3c (/lib/aarch64-linux-gnu/libc-2.27.so)
+       #       dd  3048 [002]          1        memory:      ffffaa64999c __GI___libc_write+0x3c (/lib/aarch64-linux-gnu/libc-2.27.so)
+       perf script -F,-time -i ${perfdata} 2>&1 | \
+               egrep " +$1 +[0-9]+ .* +${events}:(.*:)? +" > /dev/null 2>&1
+}
+
+perf_report_samples() {
+       echo "Looking at perf.data file for reporting samples:"
+
+       # Below is an example of the samples reporting:
+       #   73.04%    73.04%  dd    libc-2.27.so      [.] _dl_addr
+       #    7.71%     7.71%  dd    libc-2.27.so      [.] getenv
+       #    2.59%     2.59%  dd    ld-2.27.so        [.] strcmp
+       perf report --stdio -i ${perfdata} 2>&1 | \
+               egrep " +[0-9]+\.[0-9]+% +[0-9]+\.[0-9]+% +$1 " > /dev/null 2>&1
+}
+
+arm_spe_snapshot_test() {
+       echo "Recording trace with snapshot mode $perfdata"
+       perf record -o ${perfdata} -e arm_spe// -S \
+               -- dd if=/dev/zero of=/dev/null > /dev/null 2>&1 &
+       PERFPID=$!
+
+       # Wait for perf program
+       sleep 1
+
+       # Send signal to snapshot trace data
+       kill -USR2 $PERFPID
+
+       # Stop perf program
+       kill $PERFPID
+       wait $PERFPID
+
+       perf_script_samples dd &&
+       perf_report_samples dd
+
+       err=$?
+       arm_spe_report "SPE snapshot testing" $err
+}
+
+arm_spe_snapshot_test
+exit $glb_err
index c191150..2eb096b 100644 (file)
@@ -47,7 +47,8 @@ static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
        return 0;
 }
 
-int test__synthesize_stat_config(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__synthesize_stat_config(struct test_suite *test __maybe_unused,
+                                       int subtest __maybe_unused)
 {
        struct perf_stat_config stat_config = {
                .aggr_mode      = AGGR_CORE,
@@ -77,7 +78,7 @@ static int process_stat_event(struct perf_tool *tool __maybe_unused,
        return 0;
 }
 
-int test__synthesize_stat(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__synthesize_stat(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct perf_counts_values count;
 
@@ -103,7 +104,7 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
        return 0;
 }
 
-int test__synthesize_stat_round(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__synthesize_stat_round(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        TEST_ASSERT_VAL("failed to synthesize stat_config",
                !perf_event__synthesize_stat_round(NULL, 0xdeadbeef, PERF_STAT_ROUND_TYPE__INTERVAL,
@@ -111,3 +112,7 @@ int test__synthesize_stat_round(struct test *test __maybe_unused, int subtest __
 
        return 0;
 }
+
+DEFINE_SUITE("Synthesize stat config", synthesize_stat_config);
+DEFINE_SUITE("Synthesize stat", synthesize_stat);
+DEFINE_SUITE("Synthesize stat round", synthesize_stat_round);
index 7498884..9cd6fec 100644 (file)
@@ -133,7 +133,7 @@ out_delete_evlist:
        return err;
 }
 
-int test__sw_clock_freq(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__sw_clock_freq(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int ret;
 
@@ -143,3 +143,5 @@ int test__sw_clock_freq(struct test *test __maybe_unused, int subtest __maybe_un
 
        return ret;
 }
+
+DEFINE_SUITE("Software clock events period values", sw_clock_freq);
index 62c0ec2..0c0c232 100644 (file)
@@ -321,7 +321,7 @@ out_free_nodes:
  * evsel->core.system_wide and evsel->tracking flags (respectively) with other events
  * sometimes enabled or disabled.
  */
-int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__switch_tracking(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        const char *sched_switch = "sched:sched_switch";
        struct switch_tracking switch_tracking = { .tids = NULL, };
@@ -588,3 +588,5 @@ out_err:
        err = -1;
        goto out;
 }
+
+DEFINE_SUITE("Track with sched_switch", switch_tracking);
index 4c2969d..25f075f 100644 (file)
@@ -39,7 +39,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
  * if the number of exit event reported by the kernel is 1 or not
  * in order to check the kernel returns correct number of event.
  */
-int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__task_exit(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        int err = -1;
        union perf_event *event;
@@ -151,3 +151,5 @@ out_delete_evlist:
        evlist__delete(evlist);
        return err;
 }
+
+DEFINE_SUITE("Number of exit events of a simple workload", task_exit);
index fe1306f..8f65098 100644 (file)
@@ -27,112 +27,146 @@ enum {
        TEST_SKIP = -2,
 };
 
-struct test {
+struct test_suite;
+
+typedef int (*test_fnptr)(struct test_suite *, int);
+
+struct test_case {
+       const char *name;
+       const char *desc;
+       const char *skip_reason;
+       test_fnptr run_case;
+};
+
+struct test_suite {
        const char *desc;
-       int (*func)(struct test *test, int subtest);
-       struct {
-               bool skip_if_fail;
-               int (*get_nr)(void);
-               const char *(*get_desc)(int subtest);
-               const char *(*skip_reason)(int subtest);
-       } subtest;
-       bool (*is_supported)(void);
+       struct test_case *test_cases;
        void *priv;
 };
 
+#define DECLARE_SUITE(name) \
+       extern struct test_suite suite__##name;
+
+#define TEST_CASE(description, _name)                  \
+       {                                               \
+               .name = #_name,                         \
+               .desc = description,                    \
+               .run_case = test__##_name,              \
+       }
+
+#define TEST_CASE_REASON(description, _name, _reason)  \
+       {                                               \
+               .name = #_name,                         \
+               .desc = description,                    \
+               .run_case = test__##_name,              \
+               .skip_reason = _reason,                 \
+       }
+
+#define DEFINE_SUITE(description, _name)               \
+       struct test_case tests__##_name[] = {           \
+               TEST_CASE(description, _name),          \
+               {       .name = NULL, }                 \
+       };                                              \
+       struct test_suite suite__##_name = {            \
+               .desc = description,                    \
+               .test_cases = tests__##_name,           \
+       }
+
 /* Tests */
-int test__vmlinux_matches_kallsyms(struct test *test, int subtest);
-int test__openat_syscall_event(struct test *test, int subtest);
-int test__openat_syscall_event_on_all_cpus(struct test *test, int subtest);
-int test__basic_mmap(struct test *test, int subtest);
-int test__PERF_RECORD(struct test *test, int subtest);
-int test__perf_evsel__roundtrip_name_test(struct test *test, int subtest);
-int test__perf_evsel__tp_sched_test(struct test *test, int subtest);
-int test__syscall_openat_tp_fields(struct test *test, int subtest);
-int test__pmu(struct test *test, int subtest);
-int test__pmu_events(struct test *test, int subtest);
-const char *test__pmu_events_subtest_get_desc(int subtest);
-const char *test__pmu_events_subtest_skip_reason(int subtest);
-int test__pmu_events_subtest_get_nr(void);
-int test__attr(struct test *test, int subtest);
-int test__dso_data(struct test *test, int subtest);
-int test__dso_data_cache(struct test *test, int subtest);
-int test__dso_data_reopen(struct test *test, int subtest);
-int test__parse_events(struct test *test, int subtest);
-int test__hists_link(struct test *test, int subtest);
-int test__python_use(struct test *test, int subtest);
-int test__bp_signal(struct test *test, int subtest);
-int test__bp_signal_overflow(struct test *test, int subtest);
-int test__bp_accounting(struct test *test, int subtest);
-int test__wp(struct test *test, int subtest);
-const char *test__wp_subtest_get_desc(int subtest);
-const char *test__wp_subtest_skip_reason(int subtest);
-int test__wp_subtest_get_nr(void);
-int test__task_exit(struct test *test, int subtest);
-int test__mem(struct test *test, int subtest);
-int test__sw_clock_freq(struct test *test, int subtest);
-int test__code_reading(struct test *test, int subtest);
-int test__sample_parsing(struct test *test, int subtest);
-int test__keep_tracking(struct test *test, int subtest);
-int test__parse_no_sample_id_all(struct test *test, int subtest);
-int test__dwarf_unwind(struct test *test, int subtest);
-int test__expr(struct test *test, int subtest);
-int test__hists_filter(struct test *test, int subtest);
-int test__mmap_thread_lookup(struct test *test, int subtest);
-int test__thread_maps_share(struct test *test, int subtest);
-int test__hists_output(struct test *test, int subtest);
-int test__hists_cumulate(struct test *test, int subtest);
-int test__switch_tracking(struct test *test, int subtest);
-int test__fdarray__filter(struct test *test, int subtest);
-int test__fdarray__add(struct test *test, int subtest);
-int test__kmod_path__parse(struct test *test, int subtest);
-int test__thread_map(struct test *test, int subtest);
-int test__llvm(struct test *test, int subtest);
-const char *test__llvm_subtest_get_desc(int subtest);
-int test__llvm_subtest_get_nr(void);
-int test__bpf(struct test *test, int subtest);
-const char *test__bpf_subtest_get_desc(int subtest);
-int test__bpf_subtest_get_nr(void);
-int test__session_topology(struct test *test, int subtest);
-int test__thread_map_synthesize(struct test *test, int subtest);
-int test__thread_map_remove(struct test *test, int subtest);
-int test__cpu_map_synthesize(struct test *test, int subtest);
-int test__synthesize_stat_config(struct test *test, int subtest);
-int test__synthesize_stat(struct test *test, int subtest);
-int test__synthesize_stat_round(struct test *test, int subtest);
-int test__event_update(struct test *test, int subtest);
-int test__event_times(struct test *test, int subtest);
-int test__backward_ring_buffer(struct test *test, int subtest);
-int test__cpu_map_print(struct test *test, int subtest);
-int test__cpu_map_merge(struct test *test, int subtest);
-int test__sdt_event(struct test *test, int subtest);
-int test__is_printable_array(struct test *test, int subtest);
-int test__bitmap_print(struct test *test, int subtest);
-int test__perf_hooks(struct test *test, int subtest);
-int test__clang(struct test *test, int subtest);
-const char *test__clang_subtest_get_desc(int subtest);
-int test__clang_subtest_get_nr(void);
-int test__unit_number__scnprint(struct test *test, int subtest);
-int test__mem2node(struct test *t, int subtest);
-int test__maps__merge_in(struct test *t, int subtest);
-int test__time_utils(struct test *t, int subtest);
-int test__jit_write_elf(struct test *test, int subtest);
-int test__api_io(struct test *test, int subtest);
-int test__demangle_java(struct test *test, int subtest);
-int test__demangle_ocaml(struct test *test, int subtest);
-int test__pfm(struct test *test, int subtest);
-const char *test__pfm_subtest_get_desc(int subtest);
-int test__pfm_subtest_get_nr(void);
-int test__parse_metric(struct test *test, int subtest);
-int test__pe_file_parsing(struct test *test, int subtest);
-int test__expand_cgroup_events(struct test *test, int subtest);
-int test__perf_time_to_tsc(struct test *test, int subtest);
-int test__dlfilter(struct test *test, int subtest);
-
-bool test__bp_signal_is_supported(void);
-bool test__bp_account_is_supported(void);
-bool test__wp_is_supported(void);
-bool test__tsc_is_supported(void);
+DECLARE_SUITE(vmlinux_matches_kallsyms);
+DECLARE_SUITE(openat_syscall_event);
+DECLARE_SUITE(openat_syscall_event_on_all_cpus);
+DECLARE_SUITE(basic_mmap);
+DECLARE_SUITE(PERF_RECORD);
+DECLARE_SUITE(perf_evsel__roundtrip_name_test);
+DECLARE_SUITE(perf_evsel__tp_sched_test);
+DECLARE_SUITE(syscall_openat_tp_fields);
+DECLARE_SUITE(pmu);
+DECLARE_SUITE(pmu_events);
+DECLARE_SUITE(attr);
+DECLARE_SUITE(dso_data);
+DECLARE_SUITE(dso_data_cache);
+DECLARE_SUITE(dso_data_reopen);
+DECLARE_SUITE(parse_events);
+DECLARE_SUITE(hists_link);
+DECLARE_SUITE(python_use);
+DECLARE_SUITE(bp_signal);
+DECLARE_SUITE(bp_signal_overflow);
+DECLARE_SUITE(bp_accounting);
+DECLARE_SUITE(wp);
+DECLARE_SUITE(task_exit);
+DECLARE_SUITE(mem);
+DECLARE_SUITE(sw_clock_freq);
+DECLARE_SUITE(code_reading);
+DECLARE_SUITE(sample_parsing);
+DECLARE_SUITE(keep_tracking);
+DECLARE_SUITE(parse_no_sample_id_all);
+DECLARE_SUITE(dwarf_unwind);
+DECLARE_SUITE(expr);
+DECLARE_SUITE(hists_filter);
+DECLARE_SUITE(mmap_thread_lookup);
+DECLARE_SUITE(thread_maps_share);
+DECLARE_SUITE(hists_output);
+DECLARE_SUITE(hists_cumulate);
+DECLARE_SUITE(switch_tracking);
+DECLARE_SUITE(fdarray__filter);
+DECLARE_SUITE(fdarray__add);
+DECLARE_SUITE(kmod_path__parse);
+DECLARE_SUITE(thread_map);
+DECLARE_SUITE(llvm);
+DECLARE_SUITE(bpf);
+DECLARE_SUITE(session_topology);
+DECLARE_SUITE(thread_map_synthesize);
+DECLARE_SUITE(thread_map_remove);
+DECLARE_SUITE(cpu_map_synthesize);
+DECLARE_SUITE(synthesize_stat_config);
+DECLARE_SUITE(synthesize_stat);
+DECLARE_SUITE(synthesize_stat_round);
+DECLARE_SUITE(event_update);
+DECLARE_SUITE(event_times);
+DECLARE_SUITE(backward_ring_buffer);
+DECLARE_SUITE(cpu_map_print);
+DECLARE_SUITE(cpu_map_merge);
+DECLARE_SUITE(sdt_event);
+DECLARE_SUITE(is_printable_array);
+DECLARE_SUITE(bitmap_print);
+DECLARE_SUITE(perf_hooks);
+DECLARE_SUITE(clang);
+DECLARE_SUITE(unit_number__scnprint);
+DECLARE_SUITE(mem2node);
+DECLARE_SUITE(maps__merge_in);
+DECLARE_SUITE(time_utils);
+DECLARE_SUITE(jit_write_elf);
+DECLARE_SUITE(api_io);
+DECLARE_SUITE(demangle_java);
+DECLARE_SUITE(demangle_ocaml);
+DECLARE_SUITE(pfm);
+DECLARE_SUITE(parse_metric);
+DECLARE_SUITE(pe_file_parsing);
+DECLARE_SUITE(expand_cgroup_events);
+DECLARE_SUITE(perf_time_to_tsc);
+DECLARE_SUITE(dlfilter);
+
+/*
+ * PowerPC and S390 do not support creation of instruction breakpoints using the
+ * perf_event interface.
+ *
+ * ARM requires explicit rounding down of the instruction pointer in Thumb mode,
+ * and then requires the single-step to be handled explicitly in the overflow
+ * handler to avoid stepping into the SIGIO handler and getting stuck on the
+ * breakpointed instruction.
+ *
+ * Since arm64 has the same issue with arm for the single-step handling, this
+ * case also gets stuck on the breakpointed instruction.
+ *
+ * Just disable the test for these architectures until these issues are
+ * resolved.
+ */
+#if defined(__powerpc__) || defined(__s390x__) || defined(__arm__) || defined(__aarch64__)
+#define BP_SIGNAL_IS_SUPPORTED 0
+#else
+#define BP_SIGNAL_IS_SUPPORTED 1
+#endif
 
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
 struct thread;
@@ -142,7 +176,7 @@ int test__arch_unwind_sample(struct perf_sample *sample,
 #endif
 
 #if defined(__arm__)
-int test__vectors_page(struct test *test, int subtest);
+DECLARE_SUITE(vectors_page);
 #endif
 
 #endif /* TESTS_H */
index d1e208b..e413c13 100644 (file)
@@ -19,7 +19,7 @@ struct machine;
 #define NAME   (const char *) "perf"
 #define NAMEUL (unsigned long) NAME
 
-int test__thread_map(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__thread_map(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct perf_thread_map *map;
 
@@ -86,7 +86,7 @@ static int process_event(struct perf_tool *tool __maybe_unused,
        return 0;
 }
 
-int test__thread_map_synthesize(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__thread_map_synthesize(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct perf_thread_map *threads;
 
@@ -106,7 +106,7 @@ int test__thread_map_synthesize(struct test *test __maybe_unused, int subtest __
        return 0;
 }
 
-int test__thread_map_remove(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__thread_map_remove(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct perf_thread_map *threads;
        char *str;
@@ -145,3 +145,7 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb
        perf_thread_map__put(threads);
        return 0;
 }
+
+DEFINE_SUITE("Thread map", thread_map);
+DEFINE_SUITE("Synthesize thread map", thread_map_synthesize);
+DEFINE_SUITE("Remove thread map", thread_map_remove);
index 9371484..84edd82 100644 (file)
@@ -4,7 +4,7 @@
 #include "thread.h"
 #include "debug.h"
 
-int test__thread_maps_share(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__thread_maps_share(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        struct machines machines;
        struct machine *machine;
@@ -96,3 +96,5 @@ int test__thread_maps_share(struct test *test __maybe_unused, int subtest __mayb
        machines__exit(&machines);
        return 0;
 }
+
+DEFINE_SUITE("Share thread maps", thread_maps_share);
index fe57ca3..38df103 100644 (file)
@@ -131,7 +131,7 @@ out:
        return pass;
 }
 
-int test__time_utils(struct test *t __maybe_unused, int subtest __maybe_unused)
+static int test__time_utils(struct test_suite *t __maybe_unused, int subtest __maybe_unused)
 {
        bool pass = true;
 
@@ -249,3 +249,5 @@ int test__time_utils(struct test *t __maybe_unused, int subtest __maybe_unused)
 
        return pass ? 0 : TEST_FAIL;
 }
+
+DEFINE_SUITE("time utils", time_utils);
index 4574c46..8699861 100644 (file)
@@ -175,7 +175,7 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
        return 0;
 }
 
-int test__session_topology(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__session_topology(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
 {
        char path[PATH_MAX];
        struct perf_cpu_map *map;
@@ -201,3 +201,5 @@ free_path:
        unlink(path);
        return ret;
 }
+
+DEFINE_SUITE("Session topology", session_topology);
index 3721757..88bcada 100644 (file)
@@ -7,7 +7,7 @@
 #include "units.h"
 #include "debug.h"
 
-int test__unit_number__scnprint(struct test *t __maybe_unused, int subtest __maybe_unused)
+static int test__unit_number__scnprint(struct test_suite *t __maybe_unused, int subtest __maybe_unused)
 {
        struct {
                u64              n;
@@ -38,3 +38,5 @@ int test__unit_number__scnprint(struct test *t __maybe_unused, int subtest __may
 
        return TEST_OK;
 }
+
+DEFINE_SUITE("unit_number__scnprintf", unit_number__scnprint);
index 4f884aa..e80df13 100644 (file)
@@ -111,7 +111,8 @@ static bool is_ignored_symbol(const char *name, char type)
        return false;
 }
 
-int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest __maybe_unused)
+static int test__vmlinux_matches_kallsyms(struct test_suite *test __maybe_unused,
+                                       int subtest __maybe_unused)
 {
        int err = -1;
        struct rb_node *nd;
@@ -352,3 +353,5 @@ out:
        machine__exit(&vmlinux);
        return err;
 }
+
+DEFINE_SUITE("vmlinux symtab matches kallsyms", vmlinux_matches_kallsyms);
index 9387fa7..820d942 100644 (file)
@@ -48,6 +48,7 @@ static void get__perf_event_attr(struct perf_event_attr *attr, int wp_type,
        attr->exclude_hv     = 1;
 }
 
+#ifndef __s390x__
 static int __event(int wp_type, void *wp_addr, unsigned long wp_len)
 {
        int fd;
@@ -61,9 +62,14 @@ static int __event(int wp_type, void *wp_addr, unsigned long wp_len)
 
        return fd;
 }
+#endif
 
-static int wp_ro_test(void)
+static int test__wp_ro(struct test_suite *test __maybe_unused,
+                      int subtest __maybe_unused)
 {
+#if defined(__s390x__) || defined(__x86_64__) || defined(__i386__)
+       return TEST_SKIP;
+#else
        int fd;
        unsigned long tmp, tmp1 = rand();
 
@@ -79,10 +85,15 @@ static int wp_ro_test(void)
 
        close(fd);
        return 0;
+#endif
 }
 
-static int wp_wo_test(void)
+static int test__wp_wo(struct test_suite *test __maybe_unused,
+                      int subtest __maybe_unused)
 {
+#if defined(__s390x__)
+       return TEST_SKIP;
+#else
        int fd;
        unsigned long tmp, tmp1 = rand();
 
@@ -98,10 +109,15 @@ static int wp_wo_test(void)
 
        close(fd);
        return 0;
+#endif
 }
 
-static int wp_rw_test(void)
+static int test__wp_rw(struct test_suite *test __maybe_unused,
+                      int subtest __maybe_unused)
 {
+#if defined(__s390x__)
+       return TEST_SKIP;
+#else
        int fd;
        unsigned long tmp, tmp1 = rand();
 
@@ -118,10 +134,15 @@ static int wp_rw_test(void)
 
        close(fd);
        return 0;
+#endif
 }
 
-static int wp_modify_test(void)
+static int test__wp_modify(struct test_suite *test __maybe_unused,
+                          int subtest __maybe_unused)
 {
+#if defined(__s390x__)
+       return TEST_SKIP;
+#else
        int fd, ret;
        unsigned long tmp = rand();
        struct perf_event_attr new_attr;
@@ -163,93 +184,18 @@ static int wp_modify_test(void)
 
        close(fd);
        return 0;
-}
-
-static bool wp_ro_supported(void)
-{
-#if defined (__x86_64__) || defined (__i386__)
-       return false;
-#else
-       return true;
-#endif
-}
-
-static const char *wp_ro_skip_msg(void)
-{
-#if defined (__x86_64__) || defined (__i386__)
-       return "missing hardware support";
-#else
-       return NULL;
 #endif
 }
 
-static struct {
-       const char *desc;
-       int (*target_func)(void);
-       bool (*is_supported)(void);
-       const char *(*skip_msg)(void);
-} wp_testcase_table[] = {
-       {
-               .desc = "Read Only Watchpoint",
-               .target_func = &wp_ro_test,
-               .is_supported = &wp_ro_supported,
-               .skip_msg = &wp_ro_skip_msg,
-       },
-       {
-               .desc = "Write Only Watchpoint",
-               .target_func = &wp_wo_test,
-       },
-       {
-               .desc = "Read / Write Watchpoint",
-               .target_func = &wp_rw_test,
-       },
-       {
-               .desc = "Modify Watchpoint",
-               .target_func = &wp_modify_test,
-       },
+static struct test_case wp_tests[] = {
+       TEST_CASE_REASON("Read Only Watchpoint", wp_ro, "missing hardware support"),
+       TEST_CASE_REASON("Write Only Watchpoint", wp_wo, "missing hardware support"),
+       TEST_CASE_REASON("Read / Write Watchpoint", wp_rw, "missing hardware support"),
+       TEST_CASE_REASON("Modify Watchpoint", wp_modify, "missing hardware support"),
+       { .name = NULL, }
 };
 
-int test__wp_subtest_get_nr(void)
-{
-       return (int)ARRAY_SIZE(wp_testcase_table);
-}
-
-const char *test__wp_subtest_get_desc(int i)
-{
-       if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table))
-               return NULL;
-       return wp_testcase_table[i].desc;
-}
-
-const char *test__wp_subtest_skip_reason(int i)
-{
-       if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table))
-               return NULL;
-       if (!wp_testcase_table[i].skip_msg)
-               return NULL;
-       return wp_testcase_table[i].skip_msg();
-}
-
-int test__wp(struct test *test __maybe_unused, int i)
-{
-       if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table))
-               return TEST_FAIL;
-
-       if (wp_testcase_table[i].is_supported &&
-           !wp_testcase_table[i].is_supported())
-               return TEST_SKIP;
-
-       return !wp_testcase_table[i].target_func() ? TEST_OK : TEST_FAIL;
-}
-
-/* The s390 so far does not have support for
- * instruction breakpoint using the perf_event_open() system call.
- */
-bool test__wp_is_supported(void)
-{
-#if defined(__s390x__)
-       return false;
-#else
-       return true;
-#endif
-}
+struct test_suite suite__wp = {
+       .desc = "Watchpoint",
+       .test_cases = wp_tests,
+};
index d6dfe68..f527a46 100644 (file)
@@ -62,6 +62,8 @@ size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_
 
 extern struct strarray strarray__socket_families;
 
+extern struct strarray strarray__socket_level;
+
 /**
  * augmented_arg: extra payload for syscall pointer arguments
  
@@ -230,6 +232,9 @@ size_t syscall_arg__scnprintf_sockaddr(char *bf, size_t size, struct syscall_arg
 size_t syscall_arg__scnprintf_socket_protocol(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_SK_PROTO syscall_arg__scnprintf_socket_protocol
 
+size_t syscall_arg__scnprintf_socket_level(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_SK_LEVEL syscall_arg__scnprintf_socket_level
+
 size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags
 
index cd11063..2e0e867 100644 (file)
@@ -7,7 +7,7 @@
 #include <sys/un.h>
 #include <arpa/inet.h>
 
-#include "trace/beauty/generated/socket_arrays.c"
+#include "trace/beauty/generated/sockaddr.c"
 DEFINE_STRARRAY(socket_families, "PF_");
 
 static size_t af_inet__scnprintf(struct sockaddr *sa, char *bf, size_t size)
diff --git a/tools/perf/trace/beauty/sockaddr.sh b/tools/perf/trace/beauty/sockaddr.sh
new file mode 100755 (executable)
index 0000000..3820e5c
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+# This one uses a copy from the kernel sources headers that is in a
+# place used just for these tools/perf/beauty/ usage, we shouldn't not
+# put it in tools/include/linux otherwise they would be used in the
+# normal compiler building process and would drag needless stuff from the
+# kernel.
+
+# When what these scripts need is already in tools/include/ then use it,
+# otherwise grab and check the copy from the kernel sources just for these
+# string table building scripts.
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/perf/trace/beauty/include/linux/
+
+printf "static const char *socket_families[] = {\n"
+# #define AF_LOCAL     1       /* POSIX name for AF_UNIX       */
+regex='^#define[[:space:]]+AF_(\w+)[[:space:]]+([[:digit:]]+).*'
+
+egrep $regex ${header_dir}/socket.h | \
+       sed -r "s/$regex/\2 \1/g"       | \
+       xargs printf "\t[%s] = \"%s\",\n" | \
+       egrep -v "\"(UNIX|MAX)\""
+printf "};\n"
index f23a3dd..b0870c7 100644 (file)
@@ -9,9 +9,10 @@
 #include <sys/types.h>
 #include <sys/socket.h>
 
+#include "trace/beauty/generated/socket.c"
+
 static size_t socket__scnprintf_ipproto(int protocol, char *bf, size_t size, bool show_prefix)
 {
-#include "trace/beauty/generated/socket_ipproto_array.c"
        static DEFINE_STRARRAY(socket_ipproto, "IPPROTO_");
 
        return strarray__scnprintf(&strarray__socket_ipproto, bf, size, "%d", show_prefix, protocol);
@@ -26,3 +27,21 @@ size_t syscall_arg__scnprintf_socket_protocol(char *bf, size_t size, struct sysc
 
        return syscall_arg__scnprintf_int(bf, size, arg);
 }
+
+static size_t socket__scnprintf_level(int level, char *bf, size_t size, bool show_prefix)
+{
+#if defined(__alpha__) || defined(__hppa__) || defined(__mips__) || defined(__sparc__)
+       const int sol_socket = 0xffff;
+#else
+       const int sol_socket = 1;
+#endif
+       if (level == sol_socket)
+               return scnprintf(bf, size, "%sSOCKET", show_prefix ? "SOL_" : "");
+
+       return strarray__scnprintf(&strarray__socket_level, bf, size, "%d", show_prefix, level);
+}
+
+size_t syscall_arg__scnprintf_socket_level(char *bf, size_t size, struct syscall_arg *arg)
+{
+       return socket__scnprintf_level(arg->val, bf, size, arg->show_string_prefix);
+}
index 3820e5c..76330ac 100755 (executable)
@@ -1,24 +1,28 @@
 #!/bin/sh
 # SPDX-License-Identifier: LGPL-2.1
 
-# This one uses a copy from the kernel sources headers that is in a
-# place used just for these tools/perf/beauty/ usage, we shouldn't not
-# put it in tools/include/linux otherwise they would be used in the
-# normal compiler building process and would drag needless stuff from the
-# kernel.
+if [ $# -gt 0 ] ; then
+       uapi_header_dir=$1
+       beauty_header_dir=$2
+else
+       uapi_header_dir=tools/include/uapi/linux/
+       beauty_header_dir=tools/perf/trace/beauty/include/linux/
+fi
 
-# When what these scripts need is already in tools/include/ then use it,
-# otherwise grab and check the copy from the kernel sources just for these
-# string table building scripts.
+printf "static const char *socket_ipproto[] = {\n"
+ipproto_regex='^[[:space:]]+IPPROTO_(\w+)[[:space:]]+=[[:space:]]+([[:digit:]]+),.*'
 
-[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/perf/trace/beauty/include/linux/
+egrep $ipproto_regex ${uapi_header_dir}/in.h | \
+       sed -r "s/$ipproto_regex/\2 \1/g"       | \
+       sort -n | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n\n"
 
-printf "static const char *socket_families[] = {\n"
-# #define AF_LOCAL     1       /* POSIX name for AF_UNIX       */
-regex='^#define[[:space:]]+AF_(\w+)[[:space:]]+([[:digit:]]+).*'
+printf "static const char *socket_level[] = {\n"
+socket_level_regex='^#define[[:space:]]+SOL_(\w+)[[:space:]]+([[:digit:]]+)([[:space:]]+\/.*)?'
 
-egrep $regex ${header_dir}/socket.h | \
-       sed -r "s/$regex/\2 \1/g"       | \
-       xargs printf "\t[%s] = \"%s\",\n" | \
-       egrep -v "\"(UNIX|MAX)\""
-printf "};\n"
+egrep $socket_level_regex ${beauty_header_dir}/socket.h | \
+       sed -r "s/$socket_level_regex/\2 \1/g"  | \
+       sort -n | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n\n"
+
+printf 'DEFINE_STRARRAY(socket_level, "SOL_");\n'
diff --git a/tools/perf/trace/beauty/socket_ipproto.sh b/tools/perf/trace/beauty/socket_ipproto.sh
deleted file mode 100755 (executable)
index de0f2f2..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: LGPL-2.1
-
-[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
-
-printf "static const char *socket_ipproto[] = {\n"
-regex='^[[:space:]]+IPPROTO_(\w+)[[:space:]]+=[[:space:]]+([[:digit:]]+),.*'
-
-egrep $regex ${header_dir}/in.h | \
-       sed -r "s/$regex/\2 \1/g"       | \
-       sort | xargs printf "\t[%s] = \"%s\",\n"
-printf "};\n"
index 8511af5..0190068 100644 (file)
@@ -1255,6 +1255,17 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r
        return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name);
 }
 
+void annotation__init(struct annotation *notes)
+{
+       pthread_mutex_init(&notes->lock, NULL);
+}
+
+void annotation__exit(struct annotation *notes)
+{
+       annotated_source__delete(notes->src);
+       pthread_mutex_destroy(&notes->lock);
+}
+
 static void annotation_line__add(struct annotation_line *al, struct list_head *head)
 {
        list_add_tail(&al->node, head);
@@ -3132,7 +3143,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
        notes->nr_events = nr_pcnt;
 
        annotation__update_column_widths(notes);
-       sym->annotate2 = true;
+       sym->annotate2 = 1;
 
        return 0;
 
index 3757416..986f2bb 100644 (file)
@@ -299,6 +299,9 @@ struct annotation {
        struct annotated_source *src;
 };
 
+void annotation__init(struct annotation *notes);
+void annotation__exit(struct annotation *notes);
+
 static inline int annotation__cycles_width(struct annotation *notes)
 {
        if (notes->have_cycles && notes->options->show_minmax_cycle)
index 32fe418..3fc528c 100644 (file)
@@ -151,6 +151,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
        u64 payload, ip;
 
        memset(&decoder->record, 0x0, sizeof(decoder->record));
+       decoder->record.context_id = (u64)-1;
 
        while (1) {
                err = arm_spe_get_next_packet(decoder);
@@ -180,6 +181,7 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
                case ARM_SPE_COUNTER:
                        break;
                case ARM_SPE_CONTEXT:
+                       decoder->record.context_id = payload;
                        break;
                case ARM_SPE_OP_TYPE:
                        if (idx == SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC) {
index 59bdb73..46a8556 100644 (file)
@@ -38,6 +38,7 @@ struct arm_spe_record {
        u64 timestamp;
        u64 virt_addr;
        u64 phys_addr;
+       u64 context_id;
 };
 
 struct arm_spe_insn;
index 58b7069..4748bcf 100644 (file)
@@ -71,6 +71,7 @@ struct arm_spe {
        u64                             kernel_start;
 
        unsigned long                   num_events;
+       u8                              use_ctx_pkt_for_pid;
 };
 
 struct arm_spe_queue {
@@ -100,7 +101,7 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
        const char *color = PERF_COLOR_BLUE;
 
        color_fprintf(stdout, color,
-                     ". ... ARM SPE data: size %zu bytes\n",
+                     ". ... ARM SPE data: size %#zx bytes\n",
                      len);
 
        while (len) {
@@ -226,6 +227,44 @@ static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
                PERF_RECORD_MISC_USER;
 }
 
+static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
+                                   struct auxtrace_queue *queue)
+{
+       struct arm_spe_queue *speq = queue->priv;
+       pid_t tid;
+
+       tid = machine__get_current_tid(spe->machine, speq->cpu);
+       if (tid != -1) {
+               speq->tid = tid;
+               thread__zput(speq->thread);
+       } else
+               speq->tid = queue->tid;
+
+       if ((!speq->thread) && (speq->tid != -1)) {
+               speq->thread = machine__find_thread(spe->machine, -1,
+                                                   speq->tid);
+       }
+
+       if (speq->thread) {
+               speq->pid = speq->thread->pid_;
+               if (queue->cpu == -1)
+                       speq->cpu = speq->thread->cpu;
+       }
+}
+
+static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
+{
+       struct arm_spe *spe = speq->spe;
+       int err = machine__set_current_tid(spe->machine, speq->cpu, -1, tid);
+
+       if (err)
+               return err;
+
+       arm_spe_set_pid_tid_cpu(spe, &spe->queues.queue_array[speq->queue_nr]);
+
+       return 0;
+}
+
 static void arm_spe_prep_sample(struct arm_spe *spe,
                                struct arm_spe_queue *speq,
                                union perf_event *event,
@@ -460,6 +499,19 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
                 * can correlate samples between Arm SPE trace data and other
                 * perf events with correct time ordering.
                 */
+
+               /*
+                * Update pid/tid info.
+                */
+               record = &speq->decoder->record;
+               if (!spe->timeless_decoding && record->context_id != (u64)-1) {
+                       ret = arm_spe_set_tid(speq, record->context_id);
+                       if (ret)
+                               return ret;
+
+                       spe->use_ctx_pkt_for_pid = true;
+               }
+
                ret = arm_spe_sample(speq);
                if (ret)
                        return ret;
@@ -586,31 +638,6 @@ static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
        return timeless_decoding;
 }
 
-static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
-                                   struct auxtrace_queue *queue)
-{
-       struct arm_spe_queue *speq = queue->priv;
-       pid_t tid;
-
-       tid = machine__get_current_tid(spe->machine, speq->cpu);
-       if (tid != -1) {
-               speq->tid = tid;
-               thread__zput(speq->thread);
-       } else
-               speq->tid = queue->tid;
-
-       if ((!speq->thread) && (speq->tid != -1)) {
-               speq->thread = machine__find_thread(spe->machine, -1,
-                                                   speq->tid);
-       }
-
-       if (speq->thread) {
-               speq->pid = speq->thread->pid_;
-               if (queue->cpu == -1)
-                       speq->cpu = speq->thread->cpu;
-       }
-}
-
 static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
 {
        unsigned int queue_nr;
@@ -641,7 +668,12 @@ static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
                        ts = timestamp;
                }
 
-               arm_spe_set_pid_tid_cpu(spe, queue);
+               /*
+                * A previous context-switch event has set pid/tid in the machine's context, so
+                * here we need to update the pid/tid in the thread and SPE queue.
+                */
+               if (!spe->use_ctx_pkt_for_pid)
+                       arm_spe_set_pid_tid_cpu(spe, queue);
 
                ret = arm_spe_run_decoder(speq, &ts);
                if (ret < 0) {
@@ -681,6 +713,25 @@ static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
        return 0;
 }
 
+static int arm_spe_context_switch(struct arm_spe *spe, union perf_event *event,
+                                 struct perf_sample *sample)
+{
+       pid_t pid, tid;
+       int cpu;
+
+       if (!(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT))
+               return 0;
+
+       pid = event->context_switch.next_prev_pid;
+       tid = event->context_switch.next_prev_tid;
+       cpu = sample->cpu;
+
+       if (tid == -1)
+               pr_warning("context_switch event has no tid\n");
+
+       return machine__set_current_tid(spe->machine, cpu, pid, tid);
+}
+
 static int arm_spe_process_event(struct perf_session *session,
                                 union perf_event *event,
                                 struct perf_sample *sample,
@@ -718,6 +769,13 @@ static int arm_spe_process_event(struct perf_session *session,
                }
        } else if (timestamp) {
                err = arm_spe_process_queues(spe, timestamp);
+               if (err)
+                       return err;
+
+               if (!spe->use_ctx_pkt_for_pid &&
+                   (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE ||
+                   event->header.type == PERF_RECORD_SWITCH))
+                       err = arm_spe_context_switch(spe, event, sample);
        }
 
        return err;
@@ -783,7 +841,15 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused,
                return arm_spe_process_timeless_queues(spe, -1,
                                MAX_TIMESTAMP - 1);
 
-       return arm_spe_process_queues(spe, MAX_TIMESTAMP);
+       ret = arm_spe_process_queues(spe, MAX_TIMESTAMP);
+       if (ret)
+               return ret;
+
+       if (!spe->use_ctx_pkt_for_pid)
+               ui__warning("Arm SPE CONTEXT packets not found in the traces.\n"
+                           "Matching of TIDs to SPE events could be inaccurate.\n");
+
+       return 0;
 }
 
 static void arm_spe_free_queue(void *priv)
index 4d3b4cd..a517eaa 100644 (file)
@@ -33,6 +33,33 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
        return err ? ERR_PTR(err) : btf;
 }
 
+struct bpf_program * __weak
+bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+       return bpf_program__next(prev, obj);
+#pragma GCC diagnostic pop
+}
+
+struct bpf_map * __weak
+bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+       return bpf_map__next(prev, obj);
+#pragma GCC diagnostic pop
+}
+
+const void * __weak
+btf__raw_data(const struct btf *btf_ro, __u32 *size)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+       return btf__get_raw_data(btf_ro, size);
+#pragma GCC diagnostic pop
+}
+
 static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
 {
        int ret = 0;
@@ -119,7 +146,11 @@ static int perf_env__fetch_btf(struct perf_env *env,
        node->data_size = data_size;
        memcpy(node->data, data, data_size);
 
-       perf_env__insert_btf(env, node);
+       if (!perf_env__insert_btf(env, node)) {
+               /* Insertion failed because of a duplicate. */
+               free(node);
+               return -1;
+       }
        return 0;
 }
 
index 2df8a45..d3731a8 100644 (file)
@@ -12,8 +12,9 @@ extern "C" {
 extern void perf_clang__init(void);
 extern void perf_clang__cleanup(void);
 
-extern int test__clang_to_IR(void);
-extern int test__clang_to_obj(void);
+struct test_suite;
+extern int test__clang_to_IR(struct test_suite *test, int subtest);
+extern int test__clang_to_obj(struct test_suite *test, int subtest);
 
 extern int perf_clang__compile_bpf(const char *filename,
                                   void **p_obj_buf,
@@ -26,9 +27,6 @@ extern int perf_clang__compile_bpf(const char *filename,
 static inline void perf_clang__init(void) { }
 static inline void perf_clang__cleanup(void) { }
 
-static inline int test__clang_to_IR(void) { return -1; }
-static inline int test__clang_to_obj(void) { return -1;}
-
 static inline int
 perf_clang__compile_bpf(const char *filename __maybe_unused,
                        void **p_obj_buf __maybe_unused,
index 21b2360..a4683ca 100644 (file)
@@ -35,7 +35,8 @@ __test__clang_to_IR(void)
 }
 
 extern "C" {
-int test__clang_to_IR(void)
+int test__clang_to_IR(struct test_suite *test __maybe_unused,
+                      int subtest __maybe_unused)
 {
        perf_clang_scope _scope;
 
@@ -48,7 +49,8 @@ int test__clang_to_IR(void)
        return -1;
 }
 
-int test__clang_to_obj(void)
+int test__clang_to_obj(struct test_suite *test __maybe_unused,
+                       int subtest __maybe_unused)
 {
        perf_clang_scope _scope;
 
index ec77e2a..51b429c 100644 (file)
 #include "env.h"
 #include "pmu-hybrid.h"
 
-#define CORE_SIB_FMT \
+#define PACKAGE_CPUS_FMT \
+       "%s/devices/system/cpu/cpu%d/topology/package_cpus_list"
+#define PACKAGE_CPUS_FMT_OLD \
        "%s/devices/system/cpu/cpu%d/topology/core_siblings_list"
-#define DIE_SIB_FMT \
+#define DIE_CPUS_FMT \
        "%s/devices/system/cpu/cpu%d/topology/die_cpus_list"
-#define THRD_SIB_FMT \
-       "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list"
-#define THRD_SIB_FMT_NEW \
+#define CORE_CPUS_FMT \
        "%s/devices/system/cpu/cpu%d/topology/core_cpus_list"
+#define CORE_CPUS_FMT_OLD \
+       "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list"
 #define NODE_ONLINE_FMT \
        "%s/devices/system/node/online"
 #define NODE_MEMINFO_FMT \
@@ -39,8 +41,12 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu)
        u32 i = 0;
        int ret = -1;
 
-       scnprintf(filename, MAXPATHLEN, CORE_SIB_FMT,
+       scnprintf(filename, MAXPATHLEN, PACKAGE_CPUS_FMT,
                  sysfs__mountpoint(), cpu);
+       if (access(filename, F_OK) == -1) {
+               scnprintf(filename, MAXPATHLEN, PACKAGE_CPUS_FMT_OLD,
+                       sysfs__mountpoint(), cpu);
+       }
        fp = fopen(filename, "r");
        if (!fp)
                goto try_dies;
@@ -54,23 +60,23 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu)
        if (p)
                *p = '\0';
 
-       for (i = 0; i < tp->core_sib; i++) {
-               if (!strcmp(buf, tp->core_siblings[i]))
+       for (i = 0; i < tp->package_cpus_lists; i++) {
+               if (!strcmp(buf, tp->package_cpus_list[i]))
                        break;
        }
-       if (i == tp->core_sib) {
-               tp->core_siblings[i] = buf;
-               tp->core_sib++;
+       if (i == tp->package_cpus_lists) {
+               tp->package_cpus_list[i] = buf;
+               tp->package_cpus_lists++;
                buf = NULL;
                len = 0;
        }
        ret = 0;
 
 try_dies:
-       if (!tp->die_siblings)
+       if (!tp->die_cpus_list)
                goto try_threads;
 
-       scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT,
+       scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT,
                  sysfs__mountpoint(), cpu);
        fp = fopen(filename, "r");
        if (!fp)
@@ -85,23 +91,23 @@ try_dies:
        if (p)
                *p = '\0';
 
-       for (i = 0; i < tp->die_sib; i++) {
-               if (!strcmp(buf, tp->die_siblings[i]))
+       for (i = 0; i < tp->die_cpus_lists; i++) {
+               if (!strcmp(buf, tp->die_cpus_list[i]))
                        break;
        }
-       if (i == tp->die_sib) {
-               tp->die_siblings[i] = buf;
-               tp->die_sib++;
+       if (i == tp->die_cpus_lists) {
+               tp->die_cpus_list[i] = buf;
+               tp->die_cpus_lists++;
                buf = NULL;
                len = 0;
        }
        ret = 0;
 
 try_threads:
-       scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT_NEW,
+       scnprintf(filename, MAXPATHLEN, CORE_CPUS_FMT,
                  sysfs__mountpoint(), cpu);
        if (access(filename, F_OK) == -1) {
-               scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT,
+               scnprintf(filename, MAXPATHLEN, CORE_CPUS_FMT_OLD,
                          sysfs__mountpoint(), cpu);
        }
        fp = fopen(filename, "r");
@@ -115,13 +121,13 @@ try_threads:
        if (p)
                *p = '\0';
 
-       for (i = 0; i < tp->thread_sib; i++) {
-               if (!strcmp(buf, tp->thread_siblings[i]))
+       for (i = 0; i < tp->core_cpus_lists; i++) {
+               if (!strcmp(buf, tp->core_cpus_list[i]))
                        break;
        }
-       if (i == tp->thread_sib) {
-               tp->thread_siblings[i] = buf;
-               tp->thread_sib++;
+       if (i == tp->core_cpus_lists) {
+               tp->core_cpus_list[i] = buf;
+               tp->core_cpus_lists++;
                buf = NULL;
        }
        ret = 0;
@@ -139,16 +145,14 @@ void cpu_topology__delete(struct cpu_topology *tp)
        if (!tp)
                return;
 
-       for (i = 0 ; i < tp->core_sib; i++)
-               zfree(&tp->core_siblings[i]);
+       for (i = 0 ; i < tp->package_cpus_lists; i++)
+               zfree(&tp->package_cpus_list[i]);
 
-       if (tp->die_sib) {
-               for (i = 0 ; i < tp->die_sib; i++)
-                       zfree(&tp->die_siblings[i]);
-       }
+       for (i = 0 ; i < tp->die_cpus_lists; i++)
+               zfree(&tp->die_cpus_list[i]);
 
-       for (i = 0 ; i < tp->thread_sib; i++)
-               zfree(&tp->thread_siblings[i]);
+       for (i = 0 ; i < tp->core_cpus_lists; i++)
+               zfree(&tp->core_cpus_list[i]);
 
        free(tp);
 }
@@ -164,7 +168,7 @@ static bool has_die_topology(void)
        if (strncmp(uts.machine, "x86_64", 6))
                return false;
 
-       scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT,
+       scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT,
                  sysfs__mountpoint(), 0);
        if (access(filename, F_OK) == -1)
                return false;
@@ -205,13 +209,13 @@ struct cpu_topology *cpu_topology__new(void)
 
        tp = addr;
        addr += sizeof(*tp);
-       tp->core_siblings = addr;
+       tp->package_cpus_list = addr;
        addr += sz;
        if (has_die) {
-               tp->die_siblings = addr;
+               tp->die_cpus_list = addr;
                addr += sz;
        }
-       tp->thread_siblings = addr;
+       tp->core_cpus_list = addr;
 
        for (i = 0; i < nr; i++) {
                if (!cpu_map__has(map, i))
index d9af971..854e18f 100644 (file)
@@ -5,12 +5,33 @@
 #include <linux/types.h>
 
 struct cpu_topology {
-       u32       core_sib;
-       u32       die_sib;
-       u32       thread_sib;
-       char    **core_siblings;
-       char    **die_siblings;
-       char    **thread_siblings;
+       /* The number of unique package_cpus_lists below. */
+       u32       package_cpus_lists;
+       /* The number of unique die_cpu_lists below. */
+       u32       die_cpus_lists;
+       /* The number of unique core_cpu_lists below. */
+       u32       core_cpus_lists;
+       /*
+        * An array of strings where each string is unique and read from
+        * /sys/devices/system/cpu/cpuX/topology/package_cpus_list. From the ABI
+        * each of these is a human-readable list of CPUs sharing the same
+        * physical_package_id. The format is like 0-3, 8-11, 14,17.
+        */
+       const char **package_cpus_list;
+       /*
+        * An array of string where each string is unique and from
+        * /sys/devices/system/cpu/cpuX/topology/die_cpus_list. From the ABI
+        * each of these is a human-readable list of CPUs within the same die.
+        * The format is like 0-3, 8-11, 14,17.
+        */
+       const char **die_cpus_list;
+       /*
+        * An array of string where each string is unique and from
+        * /sys/devices/system/cpu/cpuX/topology/core_cpus_list. From the ABI
+        * each of these is a human-readable list of CPUs within the same
+        * core. The format is like 0-3, 8-11, 14,17.
+        */
+       const char **core_cpus_list;
 };
 
 struct numa_topology_node {
index f323adb..4f672f7 100644 (file)
@@ -537,7 +537,7 @@ static void cs_etm__dump_event(struct cs_etm_queue *etmq,
 
        fprintf(stdout, "\n");
        color_fprintf(stdout, color,
-                    ". ... CoreSight %s Trace data: size %zu bytes\n",
+                    ". ... CoreSight %s Trace data: size %#zx bytes\n",
                     cs_etm_decoder__get_name(etmq->decoder), buffer->size);
 
        do {
index 17f1dd0..b990489 100644 (file)
@@ -75,12 +75,13 @@ out:
        return node;
 }
 
-void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
+bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
 {
        struct rb_node *parent = NULL;
        __u32 btf_id = btf_node->id;
        struct btf_node *node;
        struct rb_node **p;
+       bool ret = true;
 
        down_write(&env->bpf_progs.lock);
        p = &env->bpf_progs.btfs.rb_node;
@@ -94,6 +95,7 @@ void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
                        p = &(*p)->rb_right;
                } else {
                        pr_debug("duplicated btf %u\n", btf_id);
+                       ret = false;
                        goto out;
                }
        }
@@ -103,6 +105,7 @@ void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node)
        env->bpf_progs.btfs_cnt++;
 out:
        up_write(&env->bpf_progs.lock);
+       return ret;
 }
 
 struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id)
index 1383876..163e5ec 100644 (file)
@@ -167,7 +167,7 @@ void perf_env__insert_bpf_prog_info(struct perf_env *env,
                                    struct bpf_prog_info_node *info_node);
 struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
                                                        __u32 prog_id);
-void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
+bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
 struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
 
 int perf_env__numa_node(struct perf_env *env, int cpu);
index ec967fb..a59fb2e 100644 (file)
@@ -3037,3 +3037,15 @@ void evsel__set_leader(struct evsel *evsel, struct evsel *leader)
 {
        evsel->core.leader = &leader->core;
 }
+
+int evsel__source_count(const struct evsel *evsel)
+{
+       struct evsel *pos;
+       int count = 0;
+
+       evlist__for_each_entry(evsel->evlist, pos) {
+               if (pos->metric_leader == evsel)
+                       count++;
+       }
+       return count;
+}
index 3ea6871..29d49a8 100644 (file)
@@ -489,6 +489,7 @@ struct evsel *evsel__leader(struct evsel *evsel);
 bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
 bool evsel__is_leader(struct evsel *evsel);
 void evsel__set_leader(struct evsel *evsel, struct evsel *leader);
+int evsel__source_count(const struct evsel *evsel);
 
 /*
  * Macro to swap the bit-field postition and size.
index 77c6ad8..1d532b9 100644 (file)
@@ -5,13 +5,17 @@
 #include <stdlib.h>
 #include <string.h>
 #include "metricgroup.h"
+#include "cpumap.h"
+#include "cputopo.h"
 #include "debug.h"
 #include "expr.h"
 #include "expr-bison.h"
 #include "expr-flex.h"
+#include "smt.h"
 #include <linux/kernel.h>
 #include <linux/zalloc.h>
 #include <ctype.h>
+#include <math.h>
 
 #ifdef PARSER_DEBUG
 extern int expr_debug;
@@ -19,7 +23,10 @@ extern int expr_debug;
 
 struct expr_id_data {
        union {
-               double val;
+               struct {
+                       double val;
+                       int source_count;
+               } val;
                struct {
                        double val;
                        const char *metric_name;
@@ -137,6 +144,13 @@ int expr__add_id(struct expr_parse_ctx *ctx, const char *id)
 /* Caller must make sure id is allocated */
 int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val)
 {
+       return expr__add_id_val_source_count(ctx, id, val, /*source_count=*/1);
+}
+
+/* Caller must make sure id is allocated */
+int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id,
+                                 double val, int source_count)
+{
        struct expr_id_data *data_ptr = NULL, *old_data = NULL;
        char *old_key = NULL;
        int ret;
@@ -144,7 +158,8 @@ int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val)
        data_ptr = malloc(sizeof(*data_ptr));
        if (!data_ptr)
                return -ENOMEM;
-       data_ptr->val = val;
+       data_ptr->val.val = val;
+       data_ptr->val.source_count = source_count;
        data_ptr->kind = EXPR_ID_DATA__VALUE;
 
        ret = hashmap__set(ctx->ids, id, data_ptr,
@@ -240,7 +255,7 @@ int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
 
        switch (data->kind) {
        case EXPR_ID_DATA__VALUE:
-               pr_debug2("lookup(%s): val %f\n", id, data->val);
+               pr_debug2("lookup(%s): val %f\n", id, data->val.val);
                break;
        case EXPR_ID_DATA__REF:
                pr_debug2("lookup(%s): ref metric name %s\n", id,
@@ -251,7 +266,7 @@ int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
                        pr_debug("%s failed to count\n", id);
                        return -1;
                }
-               pr_debug("processing metric: %s EXIT: %f\n", id, data->val);
+               pr_debug("processing metric: %s EXIT: %f\n", id, data->ref.val);
                break;
        case EXPR_ID_DATA__REF_VALUE:
                pr_debug2("lookup(%s): ref val %f metric name %s\n", id,
@@ -366,7 +381,47 @@ int expr__find_ids(const char *expr, const char *one,
 double expr_id_data__value(const struct expr_id_data *data)
 {
        if (data->kind == EXPR_ID_DATA__VALUE)
-               return data->val;
+               return data->val.val;
        assert(data->kind == EXPR_ID_DATA__REF_VALUE);
        return data->ref.val;
 }
+
+double expr_id_data__source_count(const struct expr_id_data *data)
+{
+       assert(data->kind == EXPR_ID_DATA__VALUE);
+       return data->val.source_count;
+}
+
+double expr__get_literal(const char *literal)
+{
+       static struct cpu_topology *topology;
+
+       if (!strcmp("#smt_on", literal))
+               return smt_on() > 0 ? 1.0 : 0.0;
+
+       if (!strcmp("#num_cpus", literal))
+               return cpu__max_present_cpu();
+
+       /*
+        * Assume that topology strings are consistent, such as CPUs "0-1"
+        * wouldn't be listed as "0,1", and so after deduplication the number of
+        * these strings gives an indication of the number of packages, dies,
+        * etc.
+        */
+       if (!topology) {
+               topology = cpu_topology__new();
+               if (!topology) {
+                       pr_err("Error creating CPU topology");
+                       return NAN;
+               }
+       }
+       if (!strcmp("#num_packages", literal))
+               return topology->package_cpus_lists;
+       if (!strcmp("#num_dies", literal))
+               return topology->die_cpus_lists;
+       if (!strcmp("#num_cores", literal))
+               return topology->core_cpus_lists;
+
+       pr_err("Unrecognized literal '%s'", literal);
+       return NAN;
+}
index cf81f91..bd21169 100644 (file)
@@ -40,6 +40,8 @@ void expr__ctx_free(struct expr_parse_ctx *ctx);
 void expr__del_id(struct expr_parse_ctx *ctx, const char *id);
 int expr__add_id(struct expr_parse_ctx *ctx, const char *id);
 int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val);
+int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id,
+                               double val, int source_count);
 int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref);
 int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
                 struct expr_id_data **data);
@@ -55,5 +57,7 @@ int expr__find_ids(const char *expr, const char *one,
                   struct expr_parse_ctx *ids);
 
 double expr_id_data__value(const struct expr_id_data *data);
+double expr_id_data__source_count(const struct expr_id_data *data);
+double expr__get_literal(const char *literal);
 
 #endif
index bd20f33..0a13eb2 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/compiler.h>
 #include "expr.h"
 #include "expr-bison.h"
+#include <math.h>
 
 char *expr_get_text(yyscan_t yyscanner);
 YYSTYPE *expr_get_lval(yyscan_t yyscanner);
@@ -77,6 +78,17 @@ static int str(yyscan_t scanner, int token, int runtime)
        yylval->str = normalize(yylval->str, runtime);
        return token;
 }
+
+static int literal(yyscan_t scanner)
+{
+       YYSTYPE *yylval = expr_get_lval(scanner);
+
+       yylval->num = expr__get_literal(expr_get_text(scanner));
+       if (isnan(yylval->num))
+               return EXPR_ERROR;
+
+       return LITERAL;
+}
 %}
 
 number         ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)
@@ -85,6 +97,7 @@ sch           [-,=]
 spec           \\{sch}
 sym            [0-9a-zA-Z_\.:@?]+
 symbol         ({spec}|{sym})+
+literal                #[0-9a-zA-Z_\.\-]+
 
 %%
        struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner);
@@ -94,7 +107,8 @@ max          { return MAX; }
 min            { return MIN; }
 if             { return IF; }
 else           { return ELSE; }
-#smt_on                { return SMT_ON; }
+source_count   { return SOURCE_COUNT; }
+{literal}      { return literal(yyscanner); }
 {number}       { return value(yyscanner); }
 {symbol}       { return str(yyscanner, ID, sctx->runtime); }
 "|"            { return '|'; }
index f969dfa..a30b825 100644 (file)
@@ -3,8 +3,8 @@
 #define YYDEBUG 1
 #include <assert.h>
 #include <math.h>
+#include <stdlib.h>
 #include "util/debug.h"
-#include "smt.h"
 #define IN_EXPR_Y 1
 #include "expr.h"
 %}
@@ -37,7 +37,7 @@
        } ids;
 }
 
-%token ID NUMBER MIN MAX IF ELSE SMT_ON D_RATIO EXPR_ERROR
+%token ID NUMBER MIN MAX IF ELSE LITERAL D_RATIO SOURCE_COUNT EXPR_ERROR
 %left MIN MAX IF
 %left '|'
 %left '^'
@@ -46,7 +46,7 @@
 %left '-' '+'
 %left '*' '/' '%'
 %left NEG NOT
-%type <num> NUMBER
+%type <num> NUMBER LITERAL
 %type <str> ID
 %destructor { free ($$); } <str>
 %type <ids> expr if_expr
@@ -83,6 +83,41 @@ static struct ids union_expr(struct ids ids1, struct ids ids2)
        return result;
 }
 
+static struct ids handle_id(struct expr_parse_ctx *ctx, char *id,
+                           bool compute_ids, bool source_count)
+{
+       struct ids result;
+
+       if (!compute_ids) {
+               /*
+                * Compute the event's value from ID. If the ID isn't known then
+                * it isn't used to compute the formula so set to NAN.
+                */
+               struct expr_id_data *data;
+
+               result.val = NAN;
+               if (expr__resolve_id(ctx, id, &data) == 0) {
+                       result.val = source_count
+                               ? expr_id_data__source_count(data)
+                               : expr_id_data__value(data);
+               }
+               result.ids = NULL;
+               free(id);
+       } else {
+               /*
+                * Set the value to BOTTOM to show that any value is possible
+                * when the event is computed. Create a set of just the ID.
+                */
+               result.val = BOTTOM;
+               result.ids = ids__new();
+               if (!result.ids || ids__insert(result.ids, id)) {
+                       pr_err("Error creating IDs for '%s'", id);
+                       free(id);
+               }
+       }
+       return result;
+}
+
 /*
  * If we're not computing ids or $1 and $3 are constants, compute the new
  * constant value using OP. Its invariant that there are no ids.  If computing
@@ -168,32 +203,8 @@ expr: NUMBER
        $$.val = $1;
        $$.ids = NULL;
 }
-| ID
-{
-       if (!compute_ids) {
-               /*
-                * Compute the event's value from ID. If the ID isn't known then
-                * it isn't used to compute the formula so set to NAN.
-                */
-               struct expr_id_data *data;
-
-               $$.val = NAN;
-               if (expr__resolve_id(ctx, $1, &data) == 0)
-                       $$.val = expr_id_data__value(data);
-
-               $$.ids = NULL;
-               free($1);
-       } else {
-               /*
-                * Set the value to BOTTOM to show that any value is possible
-                * when the event is computed. Create a set of just the ID.
-                */
-               $$.val = BOTTOM;
-               $$.ids = ids__new();
-               if (!$$.ids || ids__insert($$.ids, $1))
-                       YYABORT;
-       }
-}
+| ID                           { $$ = handle_id(ctx, $1, compute_ids, /*source_count=*/false); }
+| SOURCE_COUNT '(' ID ')'      { $$ = handle_id(ctx, $3, compute_ids, /*source_count=*/true); }
 | expr '|' expr { BINARY_LONG_OP($$, |, $1, $3); }
 | expr '&' expr { BINARY_LONG_OP($$, &, $1, $3); }
 | expr '^' expr { BINARY_LONG_OP($$, ^, $1, $3); }
@@ -280,9 +291,9 @@ expr: NUMBER
                $$ = union_expr($3, $5);
        }
 }
-| SMT_ON
+| LITERAL
 {
-       $$.val = smt_on() > 0 ? 1.0 : 0.0;
+       $$.val = $1;
        $$.ids = NULL;
 }
 ;
index 56511db..fda8d14 100644 (file)
@@ -583,21 +583,21 @@ static int write_cpu_topology(struct feat_fd *ff,
        if (!tp)
                return -1;
 
-       ret = do_write(ff, &tp->core_sib, sizeof(tp->core_sib));
+       ret = do_write(ff, &tp->package_cpus_lists, sizeof(tp->package_cpus_lists));
        if (ret < 0)
                goto done;
 
-       for (i = 0; i < tp->core_sib; i++) {
-               ret = do_write_string(ff, tp->core_siblings[i]);
+       for (i = 0; i < tp->package_cpus_lists; i++) {
+               ret = do_write_string(ff, tp->package_cpus_list[i]);
                if (ret < 0)
                        goto done;
        }
-       ret = do_write(ff, &tp->thread_sib, sizeof(tp->thread_sib));
+       ret = do_write(ff, &tp->core_cpus_lists, sizeof(tp->core_cpus_lists));
        if (ret < 0)
                goto done;
 
-       for (i = 0; i < tp->thread_sib; i++) {
-               ret = do_write_string(ff, tp->thread_siblings[i]);
+       for (i = 0; i < tp->core_cpus_lists; i++) {
+               ret = do_write_string(ff, tp->core_cpus_list[i]);
                if (ret < 0)
                        break;
        }
@@ -617,15 +617,15 @@ static int write_cpu_topology(struct feat_fd *ff,
                        return ret;
        }
 
-       if (!tp->die_sib)
+       if (!tp->die_cpus_lists)
                goto done;
 
-       ret = do_write(ff, &tp->die_sib, sizeof(tp->die_sib));
+       ret = do_write(ff, &tp->die_cpus_lists, sizeof(tp->die_cpus_lists));
        if (ret < 0)
                goto done;
 
-       for (i = 0; i < tp->die_sib; i++) {
-               ret = do_write_string(ff, tp->die_siblings[i]);
+       for (i = 0; i < tp->die_cpus_lists; i++) {
+               ret = do_write_string(ff, tp->die_cpus_list[i]);
                if (ret < 0)
                        goto done;
        }
index e4fb02b..5c7308e 100644 (file)
@@ -829,10 +829,12 @@ static int prepare_metric(struct evsel **metric_events,
                struct saved_value *v;
                struct stats *stats;
                u64 metric_total = 0;
+               int source_count;
 
                if (!strcmp(metric_events[i]->name, "duration_time")) {
                        stats = &walltime_nsecs_stats;
                        scale = 1e-9;
+                       source_count = 1;
                } else {
                        v = saved_value_lookup(metric_events[i], cpu, false,
                                               STAT_NONE, 0, st,
@@ -841,6 +843,7 @@ static int prepare_metric(struct evsel **metric_events,
                                break;
                        stats = &v->stats;
                        scale = 1.0;
+                       source_count = evsel__source_count(metric_events[i]);
 
                        if (v->metric_other)
                                metric_total = v->metric_total;
@@ -849,7 +852,9 @@ static int prepare_metric(struct evsel **metric_events,
                if (!n)
                        return -ENOMEM;
 
-               expr__add_id_val(pctx, n, metric_total ? : avg_stats(stats) * scale);
+               expr__add_id_val_source_count(pctx, n,
+                                       metric_total ? : avg_stats(stats) * scale,
+                                       source_count);
        }
 
        for (j = 0; metric_refs && metric_refs[j].metric_name; j++) {
index aa1b7c1..b2ed314 100644 (file)
@@ -274,7 +274,7 @@ struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *
        if (symbol_conf.priv_size) {
                if (symbol_conf.init_annotation) {
                        struct annotation *notes = (void *)sym;
-                       pthread_mutex_init(&notes->lock, NULL);
+                       annotation__init(notes);
                }
                sym = ((void *)sym) + symbol_conf.priv_size;
        }
@@ -294,6 +294,13 @@ struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *
 
 void symbol__delete(struct symbol *sym)
 {
+       if (symbol_conf.priv_size) {
+               if (symbol_conf.init_annotation) {
+                       struct annotation *notes = symbol__annotation(sym);
+
+                       annotation__exit(notes);
+               }
+       }
        free(((void *)sym) - symbol_conf.priv_size);
 }
 
index 1661966..fbf866d 100644 (file)
@@ -40,22 +40,33 @@ Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
                             GElf_Shdr *shp, const char *name, size_t *idx);
 #endif
 
-/** struct symbol - symtab entry
- *
- * @ignore - resolvable but tools ignore it (e.g. idle routines)
+/**
+ * A symtab entry. When allocated this may be preceded by an annotation (see
+ * symbol__annotation), a browser_index (see symbol__browser_index) and rb_node
+ * to sort by name (see struct symbol_name_rb_node).
  */
 struct symbol {
        struct rb_node  rb_node;
+       /** Range of symbol [start, end). */
        u64             start;
        u64             end;
+       /** Length of the string name. */
        u16             namelen;
+       /** ELF symbol type as defined for st_info. E.g STT_OBJECT or STT_FUNC. */
        u8              type:4;
+       /** ELF binding type as defined for st_info. E.g. STB_WEAK or STB_GLOBAL. */
        u8              binding:4;
+       /** Set true for kernel symbols of idle routines. */
        u8              idle:1;
+       /** Resolvable but tools ignore it (e.g. idle routines). */
        u8              ignore:1;
+       /** Symbol for an inlined function. */
        u8              inlined:1;
+       /** Has symbol__annotate2 been performed. */
+       u8              annotate2:1;
+       /** Architecture specific. Unused except on PPC where it holds st_other. */
        u8              arch_sym;
-       bool            annotate2;
+       /** The name of length namelen associated with the symbol. */
        char            name[];
 };
 
index 54b0a41..62fafbe 100644 (file)
@@ -187,7 +187,7 @@ DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool
 $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
        $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower            \
                    OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF)     \
-                   BPFTOOL_OUTPUT=$(BUILD_DIR)/bpftool/                       \
+                   BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/                  \
                    BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf                          \
                    BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) &&             \
                    cp $(RUNQSLOWER_OUTPUT)runqslower $@
diff --git a/tools/testing/selftests/bpf/prog_tests/helper_restricted.c b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c
new file mode 100644 (file)
index 0000000..e1de5f8
--- /dev/null
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_helper_restricted.skel.h"
+
+void test_helper_restricted(void)
+{
+       int prog_i = 0, prog_cnt;
+       int duration = 0;
+
+       do {
+               struct test_helper_restricted *test;
+               int maybeOK;
+
+               test = test_helper_restricted__open();
+               if (!ASSERT_OK_PTR(test, "open"))
+                       return;
+
+               prog_cnt = test->skeleton->prog_cnt;
+
+               for (int j = 0; j < prog_cnt; ++j) {
+                       struct bpf_program *prog = *test->skeleton->progs[j].prog;
+
+                       maybeOK = bpf_program__set_autoload(prog, prog_i == j);
+                       ASSERT_OK(maybeOK, "set autoload");
+               }
+
+               maybeOK = test_helper_restricted__load(test);
+               CHECK(!maybeOK, test->skeleton->progs[prog_i].name, "helper isn't restricted");
+
+               test_helper_restricted__destroy(test);
+       } while (++prog_i < prog_cnt);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_helper_restricted.c b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
new file mode 100644 (file)
index 0000000..68d64c3
--- /dev/null
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <time.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct timer {
+       struct bpf_timer t;
+};
+
+struct lock {
+       struct bpf_spin_lock l;
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(max_entries, 1);
+       __type(key, __u32);
+       __type(value, struct timer);
+} timers SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(max_entries, 1);
+       __type(key, __u32);
+       __type(value, struct lock);
+} locks SEC(".maps");
+
+static int timer_cb(void *map, int *key, struct timer *timer)
+{
+       return 0;
+}
+
+static void timer_work(void)
+{
+       struct timer *timer;
+       const int key = 0;
+
+       timer  = bpf_map_lookup_elem(&timers, &key);
+       if (timer) {
+               bpf_timer_init(&timer->t, &timers, CLOCK_MONOTONIC);
+               bpf_timer_set_callback(&timer->t, timer_cb);
+               bpf_timer_start(&timer->t, 10E9, 0);
+               bpf_timer_cancel(&timer->t);
+       }
+}
+
+static void spin_lock_work(void)
+{
+       const int key = 0;
+       struct lock *lock;
+
+       lock = bpf_map_lookup_elem(&locks, &key);
+       if (lock) {
+               bpf_spin_lock(&lock->l);
+               bpf_spin_unlock(&lock->l);
+       }
+}
+
+SEC("raw_tp/sys_enter")
+int raw_tp_timer(void *ctx)
+{
+       timer_work();
+
+       return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int tp_timer(void *ctx)
+{
+       timer_work();
+
+       return 0;
+}
+
+SEC("kprobe/sys_nanosleep")
+int kprobe_timer(void *ctx)
+{
+       timer_work();
+
+       return 0;
+}
+
+SEC("perf_event")
+int perf_event_timer(void *ctx)
+{
+       timer_work();
+
+       return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int raw_tp_spin_lock(void *ctx)
+{
+       spin_lock_work();
+
+       return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int tp_spin_lock(void *ctx)
+{
+       spin_lock_work();
+
+       return 0;
+}
+
+SEC("kprobe/sys_nanosleep")
+int kprobe_spin_lock(void *ctx)
+{
+       spin_lock_work();
+
+       return 0;
+}
+
+SEC("perf_event")
+int perf_event_spin_lock(void *ctx)
+{
+       spin_lock_work();
+
+       return 0;
+}
+
+const char LICENSE[] SEC("license") = "GPL";
index 25afe42..465ef3f 100644 (file)
@@ -92,6 +92,7 @@ struct bpf_test {
        int fixup_map_event_output[MAX_FIXUPS];
        int fixup_map_reuseport_array[MAX_FIXUPS];
        int fixup_map_ringbuf[MAX_FIXUPS];
+       int fixup_map_timer[MAX_FIXUPS];
        /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
         * Can be a tab-separated sequence of expected strings. An empty string
         * means no log verification.
@@ -604,8 +605,15 @@ static int create_cgroup_storage(bool percpu)
  *   int cnt;
  *   struct bpf_spin_lock l;
  * };
+ * struct bpf_timer {
+ *   __u64 :64;
+ *   __u64 :64;
+ * } __attribute__((aligned(8)));
+ * struct timer {
+ *   struct bpf_timer t;
+ * };
  */
-static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l";
+static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t";
 static __u32 btf_raw_types[] = {
        /* int */
        BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
@@ -616,6 +624,11 @@ static __u32 btf_raw_types[] = {
        BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
        BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */
        BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */
+       /* struct bpf_timer */                          /* [4] */
+       BTF_TYPE_ENC(25, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0), 16),
+       /* struct timer */                              /* [5] */
+       BTF_TYPE_ENC(35, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16),
+       BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */
 };
 
 static int load_btf(void)
@@ -696,6 +709,29 @@ static int create_sk_storage_map(void)
        return fd;
 }
 
+static int create_map_timer(void)
+{
+       struct bpf_create_map_attr attr = {
+               .name = "test_map",
+               .map_type = BPF_MAP_TYPE_ARRAY,
+               .key_size = 4,
+               .value_size = 16,
+               .max_entries = 1,
+               .btf_key_type_id = 1,
+               .btf_value_type_id = 5,
+       };
+       int fd, btf_fd;
+
+       btf_fd = load_btf();
+       if (btf_fd < 0)
+               return -1;
+       attr.btf_fd = btf_fd;
+       fd = bpf_create_map_xattr(&attr);
+       if (fd < 0)
+               printf("Failed to create map with timer\n");
+       return fd;
+}
+
 static char bpf_vlog[UINT_MAX >> 8];
 
 static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
@@ -722,6 +758,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
        int *fixup_map_event_output = test->fixup_map_event_output;
        int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
        int *fixup_map_ringbuf = test->fixup_map_ringbuf;
+       int *fixup_map_timer = test->fixup_map_timer;
 
        if (test->fill_helper) {
                test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -907,6 +944,13 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
                        fixup_map_ringbuf++;
                } while (*fixup_map_ringbuf);
        }
+       if (*fixup_map_timer) {
+               map_fds[21] = create_map_timer();
+               do {
+                       prog[*fixup_map_timer].imm = map_fds[21];
+                       fixup_map_timer++;
+               } while (*fixup_map_timer);
+       }
 }
 
 struct libcap {
diff --git a/tools/testing/selftests/bpf/verifier/helper_restricted.c b/tools/testing/selftests/bpf/verifier/helper_restricted.c
new file mode 100644 (file)
index 0000000..a067b70
--- /dev/null
@@ -0,0 +1,196 @@
+{
+       "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_KPROBE",
+       .insns = {
+               BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns),
+               BPF_MOV64_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "unknown func bpf_ktime_get_coarse_ns",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_KPROBE,
+},
+{
+       "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_TRACEPOINT",
+       .insns = {
+               BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns),
+               BPF_MOV64_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .errstr = "unknown func bpf_ktime_get_coarse_ns",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+       "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_PERF_EVENT",
+       .insns = {
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .errstr = "unknown func bpf_ktime_get_coarse_ns",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_PERF_EVENT,
+},
+{
+       "bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT",
+       .insns = {
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ktime_get_coarse_ns),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .errstr = "unknown func bpf_ktime_get_coarse_ns",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT,
+},
+{
+       "bpf_timer_init isn restricted in BPF_PROG_TYPE_KPROBE",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_LD_MAP_FD(BPF_REG_2, 0),
+       BPF_MOV64_IMM(BPF_REG_3, 1),
+       BPF_EMIT_CALL(BPF_FUNC_timer_init),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_timer = { 3, 8 },
+       .errstr = "tracing progs cannot use bpf_timer yet",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_KPROBE,
+},
+{
+       "bpf_timer_init is forbidden in BPF_PROG_TYPE_PERF_EVENT",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_LD_MAP_FD(BPF_REG_2, 0),
+       BPF_MOV64_IMM(BPF_REG_3, 1),
+       BPF_EMIT_CALL(BPF_FUNC_timer_init),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_timer = { 3, 8 },
+       .errstr = "tracing progs cannot use bpf_timer yet",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_PERF_EVENT,
+},
+{
+       "bpf_timer_init is forbidden in BPF_PROG_TYPE_TRACEPOINT",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_LD_MAP_FD(BPF_REG_2, 0),
+       BPF_MOV64_IMM(BPF_REG_3, 1),
+       BPF_EMIT_CALL(BPF_FUNC_timer_init),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_timer = { 3, 8 },
+       .errstr = "tracing progs cannot use bpf_timer yet",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+       "bpf_timer_init is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_LD_MAP_FD(BPF_REG_2, 0),
+       BPF_MOV64_IMM(BPF_REG_3, 1),
+       BPF_EMIT_CALL(BPF_FUNC_timer_init),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_timer = { 3, 8 },
+       .errstr = "tracing progs cannot use bpf_timer yet",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT,
+},
+{
+       "bpf_spin_lock is forbidden in BPF_PROG_TYPE_KPROBE",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_spin_lock),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_spin_lock = { 3 },
+       .errstr = "tracing progs cannot use bpf_spin_lock yet",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_KPROBE,
+},
+{
+       "bpf_spin_lock is forbidden in BPF_PROG_TYPE_TRACEPOINT",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_spin_lock),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_spin_lock = { 3 },
+       .errstr = "tracing progs cannot use bpf_spin_lock yet",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+       "bpf_spin_lock is forbidden in BPF_PROG_TYPE_PERF_EVENT",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_spin_lock),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_spin_lock = { 3 },
+       .errstr = "tracing progs cannot use bpf_spin_lock yet",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_PERF_EVENT,
+},
+{
+       "bpf_spin_lock is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT",
+       .insns = {
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_EMIT_CALL(BPF_FUNC_spin_lock),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_spin_lock = { 3 },
+       .errstr = "tracing progs cannot use bpf_spin_lock yet",
+       .result = REJECT,
+       .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT,
+},
index 2798927..128a348 100644 (file)
        .result = ACCEPT,
 },
 {
+       "map in map state pruning",
+       .insns = {
+       BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -4),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 11),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_in_map = { 4, 14 },
+       .flags = BPF_F_TEST_STATE_FREQ,
+       .result = VERBOSE_ACCEPT,
+       .errstr = "processed 25 insns",
+       .prog_type = BPF_PROG_TYPE_XDP,
+},
+{
        "invalid inner map pointer",
        .insns = {
        BPF_ST_MEM(0, BPF_REG_10, -4, 0),
index d4a8301..3763105 100644 (file)
@@ -23,6 +23,7 @@
 /x86_64/platform_info_test
 /x86_64/set_boot_cpu_id
 /x86_64/set_sregs_test
+/x86_64/sev_migrate_tests
 /x86_64/smm_test
 /x86_64/state_test
 /x86_64/svm_vmcall_test
index c23e89d..c4e3471 100644 (file)
@@ -73,7 +73,8 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test
 TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
 TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
-TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_pi_mmio_test
+TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
index 5d95113..d890903 100644 (file)
@@ -47,7 +47,7 @@
 #include "guest_modes.h"
 
 /* Global variable used to synchronize all of the vCPU threads. */
-static int iteration = -1;
+static int iteration;
 
 /* Defines what vCPU threads should do during a given iteration. */
 static enum {
@@ -215,12 +215,11 @@ static bool spin_wait_for_next_iteration(int *current_iteration)
        return true;
 }
 
-static void *vcpu_thread_main(void *arg)
+static void vcpu_thread_main(struct perf_test_vcpu_args *vcpu_args)
 {
-       struct perf_test_vcpu_args *vcpu_args = arg;
        struct kvm_vm *vm = perf_test_args.vm;
        int vcpu_id = vcpu_args->vcpu_id;
-       int current_iteration = -1;
+       int current_iteration = 0;
 
        while (spin_wait_for_next_iteration(&current_iteration)) {
                switch (READ_ONCE(iteration_work)) {
@@ -235,8 +234,6 @@ static void *vcpu_thread_main(void *arg)
 
                vcpu_last_completed_iteration[vcpu_id] = current_iteration;
        }
-
-       return NULL;
 }
 
 static void spin_wait_for_vcpu(int vcpu_id, int target_iteration)
@@ -277,8 +274,7 @@ static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description)
 static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access,
                          const char *description)
 {
-       perf_test_args.wr_fract = (access == ACCESS_READ) ? INT_MAX : 1;
-       sync_global_to_guest(vm, perf_test_args);
+       perf_test_set_wr_fract(vm, (access == ACCESS_READ) ? INT_MAX : 1);
        iteration_work = ITERATION_ACCESS_MEMORY;
        run_iteration(vm, vcpus, description);
 }
@@ -296,48 +292,16 @@ static void mark_memory_idle(struct kvm_vm *vm, int vcpus)
        run_iteration(vm, vcpus, "Mark memory idle");
 }
 
-static pthread_t *create_vcpu_threads(int vcpus)
-{
-       pthread_t *vcpu_threads;
-       int i;
-
-       vcpu_threads = malloc(vcpus * sizeof(vcpu_threads[0]));
-       TEST_ASSERT(vcpu_threads, "Failed to allocate vcpu_threads.");
-
-       for (i = 0; i < vcpus; i++) {
-               vcpu_last_completed_iteration[i] = iteration;
-               pthread_create(&vcpu_threads[i], NULL, vcpu_thread_main,
-                              &perf_test_args.vcpu_args[i]);
-       }
-
-       return vcpu_threads;
-}
-
-static void terminate_vcpu_threads(pthread_t *vcpu_threads, int vcpus)
-{
-       int i;
-
-       /* Set done to signal the vCPU threads to exit */
-       done = true;
-
-       for (i = 0; i < vcpus; i++)
-               pthread_join(vcpu_threads[i], NULL);
-}
-
 static void run_test(enum vm_guest_mode mode, void *arg)
 {
        struct test_params *params = arg;
        struct kvm_vm *vm;
-       pthread_t *vcpu_threads;
        int vcpus = params->vcpus;
 
        vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes, 1,
-                                params->backing_src);
+                                params->backing_src, !overlap_memory_access);
 
-       perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes,
-                             !overlap_memory_access);
-
-       vcpu_threads = create_vcpu_threads(vcpus);
+       perf_test_start_vcpu_threads(vcpus, vcpu_thread_main);
 
        pr_info("\n");
        access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory");
@@ -352,8 +316,10 @@ static void run_test(enum vm_guest_mode mode, void *arg)
        mark_memory_idle(vm, vcpus);
        access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory");
 
-       terminate_vcpu_threads(vcpu_threads, vcpus);
-       free(vcpu_threads);
+       /* Set done to signal the vCPU threads to exit */
+       done = true;
+
+       perf_test_join_vcpu_threads(vcpus);
        perf_test_destroy_vm(vm);
 }
 
index 1510b21..6a719d0 100644 (file)
@@ -42,10 +42,9 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
 static size_t demand_paging_size;
 static char *guest_data_prototype;
 
-static void *vcpu_worker(void *data)
+static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
 {
        int ret;
-       struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
        int vcpu_id = vcpu_args->vcpu_id;
        struct kvm_vm *vm = perf_test_args.vm;
        struct kvm_run *run;
@@ -68,8 +67,6 @@ static void *vcpu_worker(void *data)
        ts_diff = timespec_elapsed(start);
        PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
                       ts_diff.tv_sec, ts_diff.tv_nsec);
-
-       return NULL;
 }
 
 static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
@@ -282,7 +279,6 @@ struct test_params {
 static void run_test(enum vm_guest_mode mode, void *arg)
 {
        struct test_params *p = arg;
-       pthread_t *vcpu_threads;
        pthread_t *uffd_handler_threads = NULL;
        struct uffd_handler_args *uffd_args = NULL;
        struct timespec start;
@@ -293,9 +289,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
        int r;
 
        vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
-                                p->src_type);
-
-       perf_test_args.wr_fract = 1;
+                                p->src_type, p->partition_vcpu_memory_access);
 
        demand_paging_size = get_backing_src_pagesz(p->src_type);
 
@@ -304,12 +298,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
                    "Failed to allocate buffer for guest data pattern");
        memset(guest_data_prototype, 0xAB, demand_paging_size);
 
-       vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
-       TEST_ASSERT(vcpu_threads, "Memory allocation failed");
-
-       perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
-                             p->partition_vcpu_memory_access);
-
        if (p->uffd_mode) {
                uffd_handler_threads =
                        malloc(nr_vcpus * sizeof(*uffd_handler_threads));
@@ -322,26 +310,15 @@ static void run_test(enum vm_guest_mode mode, void *arg)
                TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
 
                for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
-                       vm_paddr_t vcpu_gpa;
+                       struct perf_test_vcpu_args *vcpu_args;
                        void *vcpu_hva;
                        void *vcpu_alias;
-                       uint64_t vcpu_mem_size;
-
 
-                       if (p->partition_vcpu_memory_access) {
-                               vcpu_gpa = guest_test_phys_mem +
-                                          (vcpu_id * guest_percpu_mem_size);
-                               vcpu_mem_size = guest_percpu_mem_size;
-                       } else {
-                               vcpu_gpa = guest_test_phys_mem;
-                               vcpu_mem_size = guest_percpu_mem_size * nr_vcpus;
-                       }
-                       PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
-                                      vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size);
+                       vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
 
                        /* Cache the host addresses of the region */
-                       vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
-                       vcpu_alias = addr_gpa2alias(vm, vcpu_gpa);
+                       vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
+                       vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa);
 
                        /*
                         * Set up user fault fd to handle demand paging
@@ -355,32 +332,18 @@ static void run_test(enum vm_guest_mode mode, void *arg)
                                            pipefds[vcpu_id * 2], p->uffd_mode,
                                            p->uffd_delay, &uffd_args[vcpu_id],
                                            vcpu_hva, vcpu_alias,
-                                           vcpu_mem_size);
+                                           vcpu_args->pages * perf_test_args.guest_page_size);
                }
        }
 
-       /* Export the shared variables to the guest */
-       sync_global_to_guest(vm, perf_test_args);
-
        pr_info("Finished creating vCPUs and starting uffd threads\n");
 
        clock_gettime(CLOCK_MONOTONIC, &start);
-
-       for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
-               pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
-                              &perf_test_args.vcpu_args[vcpu_id]);
-       }
-
+       perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
        pr_info("Started all vCPUs\n");
 
-       /* Wait for the vcpu threads to quit */
-       for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
-               pthread_join(vcpu_threads[vcpu_id], NULL);
-               PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
-       }
-
+       perf_test_join_vcpu_threads(nr_vcpus);
        ts_diff = timespec_elapsed(start);
-
        pr_info("All vCPU threads joined\n");
 
        if (p->uffd_mode) {
@@ -404,7 +367,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
        perf_test_destroy_vm(vm);
 
        free(guest_data_prototype);
-       free(vcpu_threads);
        if (p->uffd_mode) {
                free(uffd_handler_threads);
                free(uffd_args);
index 7ffab5b..1954b96 100644 (file)
@@ -31,7 +31,7 @@ static bool host_quit;
 static int iteration;
 static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
 
-static void *vcpu_worker(void *data)
+static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
 {
        int ret;
        struct kvm_vm *vm = perf_test_args.vm;
@@ -41,7 +41,6 @@ static void *vcpu_worker(void *data)
        struct timespec ts_diff;
        struct timespec total = (struct timespec){0};
        struct timespec avg;
-       struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
        int vcpu_id = vcpu_args->vcpu_id;
 
        run = vcpu_state(vm, vcpu_id);
@@ -83,8 +82,6 @@ static void *vcpu_worker(void *data)
        pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
                vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id],
                total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec);
-
-       return NULL;
 }
 
 struct test_params {
@@ -170,7 +167,6 @@ static void free_bitmaps(unsigned long *bitmaps[], int slots)
 static void run_test(enum vm_guest_mode mode, void *arg)
 {
        struct test_params *p = arg;
-       pthread_t *vcpu_threads;
        struct kvm_vm *vm;
        unsigned long **bitmaps;
        uint64_t guest_num_pages;
@@ -186,9 +182,10 @@ static void run_test(enum vm_guest_mode mode, void *arg)
        struct timespec clear_dirty_log_total = (struct timespec){0};
 
        vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
-                                p->slots, p->backing_src);
+                                p->slots, p->backing_src,
+                                p->partition_vcpu_memory_access);
 
-       perf_test_args.wr_fract = p->wr_fract;
+       perf_test_set_wr_fract(vm, p->wr_fract);
 
        guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
        guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
@@ -203,25 +200,15 @@ static void run_test(enum vm_guest_mode mode, void *arg)
                vm_enable_cap(vm, &cap);
        }
 
-       vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
-       TEST_ASSERT(vcpu_threads, "Memory allocation failed");
-
-       perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
-                             p->partition_vcpu_memory_access);
-
-       sync_global_to_guest(vm, perf_test_args);
-
        /* Start the iterations */
        iteration = 0;
        host_quit = false;
 
        clock_gettime(CLOCK_MONOTONIC, &start);
-       for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+       for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
                vcpu_last_completed_iteration[vcpu_id] = -1;
 
-               pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
-                              &perf_test_args.vcpu_args[vcpu_id]);
-       }
+       perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
 
        /* Allow the vCPUs to populate memory */
        pr_debug("Starting iteration %d - Populating\n", iteration);
@@ -290,8 +277,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
        /* Tell the vcpu thread to quit */
        host_quit = true;
-       for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
-               pthread_join(vcpu_threads[vcpu_id], NULL);
+       perf_test_join_vcpu_threads(nr_vcpus);
 
        avg = timespec_div(get_dirty_log_total, p->iterations);
        pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
@@ -306,7 +292,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
        }
 
        free_bitmaps(bitmaps, p->slots);
-       free(vcpu_threads);
        perf_test_destroy_vm(vm);
 }
 
index 792c60e..3fcd89e 100644 (file)
@@ -115,7 +115,7 @@ static void guest_code(void)
                        addr = guest_test_virt_mem;
                        addr += (READ_ONCE(random_array[i]) % guest_num_pages)
                                * guest_page_size;
-                       addr &= ~(host_page_size - 1);
+                       addr = align_down(addr, host_page_size);
                        *(uint64_t *)addr = READ_ONCE(iteration);
                }
 
@@ -737,14 +737,14 @@ static void run_test(enum vm_guest_mode mode, void *arg)
        if (!p->phys_offset) {
                guest_test_phys_mem = (vm_get_max_gfn(vm) -
                                       guest_num_pages) * guest_page_size;
-               guest_test_phys_mem &= ~(host_page_size - 1);
+               guest_test_phys_mem = align_down(guest_test_phys_mem, host_page_size);
        } else {
                guest_test_phys_mem = p->phys_offset;
        }
 
 #ifdef __s390x__
        /* Align to 1M (segment size) */
-       guest_test_phys_mem &= ~((1 << 20) - 1);
+       guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20);
 #endif
 
        pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
index f6b3794..6a1a37f 100644 (file)
@@ -82,6 +82,7 @@ struct vm_guest_mode_params {
 };
 extern const struct vm_guest_mode_params vm_guest_mode_params[];
 
+int open_path_or_exit(const char *path, int flags);
 int open_kvm_dev_path_or_exit(void);
 int kvm_check_cap(long cap);
 int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
index df9f1a3..a86f953 100644 (file)
@@ -8,6 +8,8 @@
 #ifndef SELFTEST_KVM_PERF_TEST_UTIL_H
 #define SELFTEST_KVM_PERF_TEST_UTIL_H
 
+#include <pthread.h>
+
 #include "kvm_util.h"
 
 /* Default guest test virtual memory offset */
@@ -18,6 +20,7 @@
 #define PERF_TEST_MEM_SLOT_INDEX       1
 
 struct perf_test_vcpu_args {
+       uint64_t gpa;
        uint64_t gva;
        uint64_t pages;
 
@@ -27,7 +30,7 @@ struct perf_test_vcpu_args {
 
 struct perf_test_args {
        struct kvm_vm *vm;
-       uint64_t host_page_size;
+       uint64_t gpa;
        uint64_t guest_page_size;
        int wr_fract;
 
@@ -36,19 +39,15 @@ struct perf_test_args {
 
 extern struct perf_test_args perf_test_args;
 
-/*
- * Guest physical memory offset of the testing memory slot.
- * This will be set to the topmost valid physical address minus
- * the test memory size.
- */
-extern uint64_t guest_test_phys_mem;
-
 struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
                                   uint64_t vcpu_memory_bytes, int slots,
-                                  enum vm_mem_backing_src_type backing_src);
+                                  enum vm_mem_backing_src_type backing_src,
+                                  bool partition_vcpu_memory_access);
 void perf_test_destroy_vm(struct kvm_vm *vm);
-void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
-                          uint64_t vcpu_memory_bytes,
-                          bool partition_vcpu_memory_access);
+
+void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract);
+
+void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
+void perf_test_join_vcpu_threads(int vcpus);
 
 #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
index f8fddc8..99e0dcd 100644 (file)
@@ -104,6 +104,7 @@ size_t get_trans_hugepagesz(void);
 size_t get_def_hugetlb_pagesz(void);
 const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i);
 size_t get_backing_src_pagesz(uint32_t i);
+bool is_backing_src_hugetlb(uint32_t i);
 void backing_src_help(const char *flag);
 enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
 long get_run_delay(void);
@@ -117,4 +118,29 @@ static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t)
        return vm_mem_backing_src_alias(t)->flag & MAP_SHARED;
 }
 
+/* Aligns x up to the next multiple of size. Size must be a power of 2. */
+static inline uint64_t align_up(uint64_t x, uint64_t size)
+{
+       uint64_t mask = size - 1;
+
+       TEST_ASSERT(size != 0 && !(size & (size - 1)),
+                   "size not a power of 2: %lu", size);
+       return ((x + mask) & ~mask);
+}
+
+static inline uint64_t align_down(uint64_t x, uint64_t size)
+{
+       uint64_t x_aligned_up = align_up(x, size);
+
+       if (x == x_aligned_up)
+               return x;
+       else
+               return x_aligned_up - size;
+}
+
+static inline void *align_ptr_up(void *x, size_t size)
+{
+       return (void *)align_up((unsigned long)x, size);
+}
+
 #endif /* SELFTEST_KVM_TEST_UTIL_H */
index b7531c8..587fbe4 100644 (file)
@@ -46,4 +46,6 @@ static inline bool cpu_has_svm(void)
        return ecx & CPUID_SVM;
 }
 
+int open_sev_dev_path_or_exit(void);
+
 #endif /* SELFTEST_KVM_SVM_UTILS_H */
index 36407cb..3836322 100644 (file)
@@ -280,7 +280,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
 #ifdef __s390x__
        alignment = max(0x100000, alignment);
 #endif
-       guest_test_phys_mem &= ~(alignment - 1);
+       guest_test_phys_mem = align_down(guest_test_virt_mem, alignment);
 
        /* Set up the shared data structure test_args */
        test_args.vm = vm;
index eac44f5..13e8e3d 100644 (file)
@@ -157,8 +157,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
                        "memsize of 0,\n"
                        "  phdr index: %u p_memsz: 0x%" PRIx64,
                        n1, (uint64_t) phdr.p_memsz);
-               vm_vaddr_t seg_vstart = phdr.p_vaddr;
-               seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1);
+               vm_vaddr_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size);
                vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1;
                seg_vend |= vm->page_size - 1;
                size_t seg_size = seg_vend - seg_vstart + 1;
index 041004c..8f2e0bb 100644 (file)
 
 static int vcpu_mmap_sz(void);
 
-/* Aligns x up to the next multiple of size. Size must be a power of 2. */
-static void *align(void *x, size_t size)
+int open_path_or_exit(const char *path, int flags)
 {
-       size_t mask = size - 1;
-       TEST_ASSERT(size != 0 && !(size & (size - 1)),
-                   "size not a power of 2: %lu", size);
-       return (void *) (((size_t) x + mask) & ~mask);
+       int fd;
+
+       fd = open(path, flags);
+       if (fd < 0) {
+               print_skip("%s not available (errno: %d)", path, errno);
+               exit(KSFT_SKIP);
+       }
+
+       return fd;
 }
 
 /*
@@ -42,16 +46,7 @@ static void *align(void *x, size_t size)
  */
 static int _open_kvm_dev_path_or_exit(int flags)
 {
-       int fd;
-
-       fd = open(KVM_DEV_PATH, flags);
-       if (fd < 0) {
-               print_skip("%s not available, is KVM loaded? (errno: %d)",
-                          KVM_DEV_PATH, errno);
-               exit(KSFT_SKIP);
-       }
-
-       return fd;
+       return open_path_or_exit(KVM_DEV_PATH, flags);
 }
 
 int open_kvm_dev_path_or_exit(void)
@@ -187,15 +182,15 @@ const char *vm_guest_mode_string(uint32_t i)
 }
 
 const struct vm_guest_mode_params vm_guest_mode_params[] = {
-       { 52, 48,  0x1000, 12 },
-       { 52, 48, 0x10000, 16 },
-       { 48, 48,  0x1000, 12 },
-       { 48, 48, 0x10000, 16 },
-       { 40, 48,  0x1000, 12 },
-       { 40, 48, 0x10000, 16 },
-       {  0,  0,  0x1000, 12 },
-       { 47, 64,  0x1000, 12 },
-       { 44, 64,  0x1000, 12 },
+       [VM_MODE_P52V48_4K]     = { 52, 48,  0x1000, 12 },
+       [VM_MODE_P52V48_64K]    = { 52, 48, 0x10000, 16 },
+       [VM_MODE_P48V48_4K]     = { 48, 48,  0x1000, 12 },
+       [VM_MODE_P48V48_64K]    = { 48, 48, 0x10000, 16 },
+       [VM_MODE_P40V48_4K]     = { 40, 48,  0x1000, 12 },
+       [VM_MODE_P40V48_64K]    = { 40, 48, 0x10000, 16 },
+       [VM_MODE_PXXV48_4K]     = {  0,  0,  0x1000, 12 },
+       [VM_MODE_P47V64_4K]     = { 47, 64,  0x1000, 12 },
+       [VM_MODE_P44V64_4K]     = { 44, 64,  0x1000, 12 },
 };
 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
               "Missing new mode params?");
@@ -875,9 +870,17 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
        alignment = 1;
 #endif
 
+       /*
+        * When using THP mmap is not guaranteed to returned a hugepage aligned
+        * address so we have to pad the mmap. Padding is not needed for HugeTLB
+        * because mmap will always return an address aligned to the HugeTLB
+        * page size.
+        */
        if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
                alignment = max(backing_src_pagesz, alignment);
 
+       ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
+
        /* Add enough memory to align up if necessary */
        if (alignment > 1)
                region->mmap_size += alignment;
@@ -910,8 +913,13 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
                    "test_malloc failed, mmap_start: %p errno: %i",
                    region->mmap_start, errno);
 
+       TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
+                   region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
+                   "mmap_start %p is not aligned to HugeTLB page size 0x%lx",
+                   region->mmap_start, backing_src_pagesz);
+
        /* Align host address */
-       region->host_mem = align(region->mmap_start, alignment);
+       region->host_mem = align_ptr_up(region->mmap_start, alignment);
 
        /* As needed perform madvise */
        if ((src_type == VM_MEM_SRC_ANONYMOUS ||
@@ -954,7 +962,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
                            "mmap of alias failed, errno: %i", errno);
 
                /* Align host alias address */
-               region->host_alias = align(region->mmap_alias, alignment);
+               region->host_alias = align_ptr_up(region->mmap_alias, alignment);
        }
 }
 
index 0ef80db..722df3a 100644 (file)
 
 struct perf_test_args perf_test_args;
 
-uint64_t guest_test_phys_mem;
-
 /*
  * Guest virtual memory offset of the testing memory slot.
  * Must not conflict with identity mapped test code.
  */
 static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
 
+struct vcpu_thread {
+       /* The id of the vCPU. */
+       int vcpu_id;
+
+       /* The pthread backing the vCPU. */
+       pthread_t thread;
+
+       /* Set to true once the vCPU thread is up and running. */
+       bool running;
+};
+
+/* The vCPU threads involved in this test. */
+static struct vcpu_thread vcpu_threads[KVM_MAX_VCPUS];
+
+/* The function run by each vCPU thread, as provided by the test. */
+static void (*vcpu_thread_fn)(struct perf_test_vcpu_args *);
+
+/* Set to true once all vCPU threads are up and running. */
+static bool all_vcpu_threads_running;
+
 /*
  * Continuously write to the first 8 bytes of each page in the
  * specified region.
  */
 static void guest_code(uint32_t vcpu_id)
 {
-       struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+       struct perf_test_args *pta = &perf_test_args;
+       struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id];
        uint64_t gva;
        uint64_t pages;
        int i;
@@ -37,9 +56,9 @@ static void guest_code(uint32_t vcpu_id)
 
        while (true) {
                for (i = 0; i < pages; i++) {
-                       uint64_t addr = gva + (i * perf_test_args.guest_page_size);
+                       uint64_t addr = gva + (i * pta->guest_page_size);
 
-                       if (i % perf_test_args.wr_fract == 0)
+                       if (i % pta->wr_fract == 0)
                                *(uint64_t *)addr = 0x0123456789ABCDEF;
                        else
                                READ_ONCE(*(uint64_t *)addr);
@@ -49,35 +68,81 @@ static void guest_code(uint32_t vcpu_id)
        }
 }
 
+void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
+                          uint64_t vcpu_memory_bytes,
+                          bool partition_vcpu_memory_access)
+{
+       struct perf_test_args *pta = &perf_test_args;
+       struct perf_test_vcpu_args *vcpu_args;
+       int vcpu_id;
+
+       for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+               vcpu_args = &pta->vcpu_args[vcpu_id];
+
+               vcpu_args->vcpu_id = vcpu_id;
+               if (partition_vcpu_memory_access) {
+                       vcpu_args->gva = guest_test_virt_mem +
+                                        (vcpu_id * vcpu_memory_bytes);
+                       vcpu_args->pages = vcpu_memory_bytes /
+                                          pta->guest_page_size;
+                       vcpu_args->gpa = pta->gpa + (vcpu_id * vcpu_memory_bytes);
+               } else {
+                       vcpu_args->gva = guest_test_virt_mem;
+                       vcpu_args->pages = (vcpus * vcpu_memory_bytes) /
+                                          pta->guest_page_size;
+                       vcpu_args->gpa = pta->gpa;
+               }
+
+               vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+
+               pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+                        vcpu_id, vcpu_args->gpa, vcpu_args->gpa +
+                        (vcpu_args->pages * pta->guest_page_size));
+       }
+}
+
 struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
                                   uint64_t vcpu_memory_bytes, int slots,
-                                  enum vm_mem_backing_src_type backing_src)
+                                  enum vm_mem_backing_src_type backing_src,
+                                  bool partition_vcpu_memory_access)
 {
+       struct perf_test_args *pta = &perf_test_args;
        struct kvm_vm *vm;
        uint64_t guest_num_pages;
+       uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
        int i;
 
        pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
 
-       perf_test_args.host_page_size = getpagesize();
-       perf_test_args.guest_page_size = vm_guest_mode_params[mode].page_size;
+       /* By default vCPUs will write to memory. */
+       pta->wr_fract = 1;
+
+       /*
+        * Snapshot the non-huge page size.  This is used by the guest code to
+        * access/dirty pages at the logging granularity.
+        */
+       pta->guest_page_size = vm_guest_mode_params[mode].page_size;
 
        guest_num_pages = vm_adjust_num_guest_pages(mode,
-                               (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size);
+                               (vcpus * vcpu_memory_bytes) / pta->guest_page_size);
 
-       TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0,
+       TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0,
                    "Guest memory size is not host page size aligned.");
-       TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
+       TEST_ASSERT(vcpu_memory_bytes % pta->guest_page_size == 0,
                    "Guest memory size is not guest page size aligned.");
        TEST_ASSERT(guest_num_pages % slots == 0,
                    "Guest memory cannot be evenly divided into %d slots.",
                    slots);
 
+       /*
+        * Pass guest_num_pages to populate the page tables for test memory.
+        * The memory is also added to memslot 0, but that's a benign side
+        * effect as KVM allows aliasing HVAs in meslots.
+        */
        vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES,
-                                 (vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size,
-                                 0, guest_code, NULL);
+                                 guest_num_pages, 0, guest_code, NULL);
 
-       perf_test_args.vm = vm;
+       pta->vm = vm;
 
        /*
         * If there should be more memory in the guest test region than there
@@ -90,20 +155,18 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
                    guest_num_pages, vm_get_max_gfn(vm), vcpus,
                    vcpu_memory_bytes);
 
-       guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
-                             perf_test_args.guest_page_size;
-       guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1);
+       pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size;
+       pta->gpa = align_down(pta->gpa, backing_src_pagesz);
 #ifdef __s390x__
        /* Align to 1M (segment size) */
-       guest_test_phys_mem &= ~((1 << 20) - 1);
+       pta->gpa = align_down(pta->gpa, 1 << 20);
 #endif
-       pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
+       pr_info("guest physical test memory offset: 0x%lx\n", pta->gpa);
 
        /* Add extra memory slots for testing */
        for (i = 0; i < slots; i++) {
                uint64_t region_pages = guest_num_pages / slots;
-               vm_paddr_t region_start = guest_test_phys_mem +
-                       region_pages * perf_test_args.guest_page_size * i;
+               vm_paddr_t region_start = pta->gpa + region_pages * pta->guest_page_size * i;
 
                vm_userspace_mem_region_add(vm, backing_src, region_start,
                                            PERF_TEST_MEM_SLOT_INDEX + i,
@@ -111,10 +174,15 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
        }
 
        /* Do mapping for the demand paging memory slot */
-       virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
+       virt_map(vm, guest_test_virt_mem, pta->gpa, guest_num_pages);
+
+       perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access);
 
        ucall_init(vm, NULL);
 
+       /* Export the shared variables to the guest. */
+       sync_global_to_guest(vm, perf_test_args);
+
        return vm;
 }
 
@@ -124,36 +192,60 @@ void perf_test_destroy_vm(struct kvm_vm *vm)
        kvm_vm_free(vm);
 }
 
-void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
-                          uint64_t vcpu_memory_bytes,
-                          bool partition_vcpu_memory_access)
+void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract)
+{
+       perf_test_args.wr_fract = wr_fract;
+       sync_global_to_guest(vm, perf_test_args);
+}
+
+static void *vcpu_thread_main(void *data)
+{
+       struct vcpu_thread *vcpu = data;
+
+       WRITE_ONCE(vcpu->running, true);
+
+       /*
+        * Wait for all vCPU threads to be up and running before calling the test-
+        * provided vCPU thread function. This prevents thread creation (which
+        * requires taking the mmap_sem in write mode) from interfering with the
+        * guest faulting in its memory.
+        */
+       while (!READ_ONCE(all_vcpu_threads_running))
+               ;
+
+       vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_id]);
+
+       return NULL;
+}
+
+void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *))
 {
-       vm_paddr_t vcpu_gpa;
-       struct perf_test_vcpu_args *vcpu_args;
        int vcpu_id;
 
+       vcpu_thread_fn = vcpu_fn;
+       WRITE_ONCE(all_vcpu_threads_running, false);
+
        for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
-               vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+               struct vcpu_thread *vcpu = &vcpu_threads[vcpu_id];
 
-               vcpu_args->vcpu_id = vcpu_id;
-               if (partition_vcpu_memory_access) {
-                       vcpu_args->gva = guest_test_virt_mem +
-                                        (vcpu_id * vcpu_memory_bytes);
-                       vcpu_args->pages = vcpu_memory_bytes /
-                                          perf_test_args.guest_page_size;
-                       vcpu_gpa = guest_test_phys_mem +
-                                  (vcpu_id * vcpu_memory_bytes);
-               } else {
-                       vcpu_args->gva = guest_test_virt_mem;
-                       vcpu_args->pages = (vcpus * vcpu_memory_bytes) /
-                                          perf_test_args.guest_page_size;
-                       vcpu_gpa = guest_test_phys_mem;
-               }
+               vcpu->vcpu_id = vcpu_id;
+               WRITE_ONCE(vcpu->running, false);
 
-               vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+               pthread_create(&vcpu->thread, NULL, vcpu_thread_main, vcpu);
+       }
 
-               pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
-                        vcpu_id, vcpu_gpa, vcpu_gpa +
-                        (vcpu_args->pages * perf_test_args.guest_page_size));
+       for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+               while (!READ_ONCE(vcpu_threads[vcpu_id].running))
+                       ;
        }
+
+       WRITE_ONCE(all_vcpu_threads_running, true);
+}
+
+void perf_test_join_vcpu_threads(int vcpus)
+{
+       int vcpu_id;
+
+       for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++)
+               pthread_join(vcpu_threads[vcpu_id].thread, NULL);
 }
index b724291..6d23878 100644 (file)
@@ -283,6 +283,11 @@ size_t get_backing_src_pagesz(uint32_t i)
        }
 }
 
+bool is_backing_src_hugetlb(uint32_t i)
+{
+       return !!(vm_mem_backing_src_alias(i)->flag & MAP_HUGETLB);
+}
+
 static void print_available_backing_src_types(const char *prefix)
 {
        int i;
index 161eba7..0ebc03c 100644 (file)
@@ -13,6 +13,8 @@
 #include "processor.h"
 #include "svm_util.h"
 
+#define SEV_DEV_PATH "/dev/sev"
+
 struct gpr64_regs guest_regs;
 u64 rflags;
 
@@ -172,3 +174,14 @@ void nested_svm_check_supported(void)
                exit(KSFT_SKIP);
        }
 }
+
+/*
+ * Open SEV_DEV_PATH if available, otherwise exit the entire program.
+ *
+ * Return:
+ *   The opened file descriptor of /dev/sev.
+ */
+int open_sev_dev_path_or_exit(void)
+{
+       return open_path_or_exit(SEV_DEV_PATH, 0);
+}
index 4cfcafe..1410d0a 100644 (file)
@@ -36,11 +36,9 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
 
 static bool run_vcpus = true;
 
-static void *vcpu_worker(void *data)
+static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
 {
        int ret;
-       struct perf_test_vcpu_args *vcpu_args =
-               (struct perf_test_vcpu_args *)data;
        int vcpu_id = vcpu_args->vcpu_id;
        struct kvm_vm *vm = perf_test_args.vm;
        struct kvm_run *run;
@@ -59,8 +57,6 @@ static void *vcpu_worker(void *data)
                            "Invalid guest sync status: exit_reason=%s\n",
                            exit_reason_str(run->exit_reason));
        }
-
-       return NULL;
 }
 
 struct memslot_antagonist_args {
@@ -80,7 +76,7 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
         * Add the dummy memslot just below the perf_test_util memslot, which is
         * at the top of the guest physical address space.
         */
-       gpa = guest_test_phys_mem - pages * vm_get_page_size(vm);
+       gpa = perf_test_args.gpa - pages * vm_get_page_size(vm);
 
        for (i = 0; i < nr_modifications; i++) {
                usleep(delay);
@@ -100,29 +96,15 @@ struct test_params {
 static void run_test(enum vm_guest_mode mode, void *arg)
 {
        struct test_params *p = arg;
-       pthread_t *vcpu_threads;
        struct kvm_vm *vm;
-       int vcpu_id;
 
        vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
-                                VM_MEM_SRC_ANONYMOUS);
-
-       perf_test_args.wr_fract = 1;
-
-       vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
-       TEST_ASSERT(vcpu_threads, "Memory allocation failed");
-
-       perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
-                             p->partition_vcpu_memory_access);
-
-       /* Export the shared variables to the guest */
-       sync_global_to_guest(vm, perf_test_args);
+                                VM_MEM_SRC_ANONYMOUS,
+                                p->partition_vcpu_memory_access);
 
        pr_info("Finished creating vCPUs\n");
 
-       for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
-               pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
-                              &perf_test_args.vcpu_args[vcpu_id]);
+       perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
 
        pr_info("Started all vCPUs\n");
 
@@ -131,16 +113,10 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
        run_vcpus = false;
 
-       /* Wait for the vcpu threads to quit */
-       for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
-               pthread_join(vcpu_threads[vcpu_id], NULL);
-
+       perf_test_join_vcpu_threads(nr_vcpus);
        pr_info("All vCPU threads joined\n");
 
-       ucall_uninit(vm);
-       kvm_vm_free(vm);
-
-       free(vcpu_threads);
+       perf_test_destroy_vm(vm);
 }
 
 static void help(char *name)
diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
new file mode 100644 (file)
index 0000000..5ba325c
--- /dev/null
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm.h>
+#include <linux/psp-sev.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "kselftest.h"
+#include "../lib/kvm_util_internal.h"
+
+#define SEV_POLICY_ES 0b100
+
+#define NR_MIGRATE_TEST_VCPUS 4
+#define NR_MIGRATE_TEST_VMS 3
+#define NR_LOCK_TESTING_THREADS 3
+#define NR_LOCK_TESTING_ITERATIONS 10000
+
+static void sev_ioctl(int vm_fd, int cmd_id, void *data)
+{
+       struct kvm_sev_cmd cmd = {
+               .id = cmd_id,
+               .data = (uint64_t)data,
+               .sev_fd = open_sev_dev_path_or_exit(),
+       };
+       int ret;
+
+       ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
+       TEST_ASSERT((ret == 0 || cmd.error == SEV_RET_SUCCESS),
+                   "%d failed: return code: %d, errno: %d, fw error: %d",
+                   cmd_id, ret, errno, cmd.error);
+}
+
+static struct kvm_vm *sev_vm_create(bool es)
+{
+       struct kvm_vm *vm;
+       struct kvm_sev_launch_start start = { 0 };
+       int i;
+
+       vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+       sev_ioctl(vm->fd, es ? KVM_SEV_ES_INIT : KVM_SEV_INIT, NULL);
+       for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+               vm_vcpu_add(vm, i);
+       if (es)
+               start.policy |= SEV_POLICY_ES;
+       sev_ioctl(vm->fd, KVM_SEV_LAUNCH_START, &start);
+       if (es)
+               sev_ioctl(vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+       return vm;
+}
+
+static struct kvm_vm *__vm_create(void)
+{
+       struct kvm_vm *vm;
+       int i;
+
+       vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+       for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+               vm_vcpu_add(vm, i);
+
+       return vm;
+}
+
+static int __sev_migrate_from(int dst_fd, int src_fd)
+{
+       struct kvm_enable_cap cap = {
+               .cap = KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM,
+               .args = { src_fd }
+       };
+
+       return ioctl(dst_fd, KVM_ENABLE_CAP, &cap);
+}
+
+
+static void sev_migrate_from(int dst_fd, int src_fd)
+{
+       int ret;
+
+       ret = __sev_migrate_from(dst_fd, src_fd);
+       TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno);
+}
+
+static void test_sev_migrate_from(bool es)
+{
+       struct kvm_vm *src_vm;
+       struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
+       int i;
+
+       src_vm = sev_vm_create(es);
+       for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+               dst_vms[i] = __vm_create();
+
+       /* Initial migration from the src to the first dst. */
+       sev_migrate_from(dst_vms[0]->fd, src_vm->fd);
+
+       for (i = 1; i < NR_MIGRATE_TEST_VMS; i++)
+               sev_migrate_from(dst_vms[i]->fd, dst_vms[i - 1]->fd);
+
+       /* Migrate the guest back to the original VM. */
+       sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd);
+
+       kvm_vm_free(src_vm);
+       for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+               kvm_vm_free(dst_vms[i]);
+}
+
+struct locking_thread_input {
+       struct kvm_vm *vm;
+       int source_fds[NR_LOCK_TESTING_THREADS];
+};
+
+static void *locking_test_thread(void *arg)
+{
+       int i, j;
+       struct locking_thread_input *input = (struct locking_thread_input *)arg;
+
+       for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) {
+               j = i % NR_LOCK_TESTING_THREADS;
+               __sev_migrate_from(input->vm->fd, input->source_fds[j]);
+       }
+
+       return NULL;
+}
+
+static void test_sev_migrate_locking(void)
+{
+       struct locking_thread_input input[NR_LOCK_TESTING_THREADS];
+       pthread_t pt[NR_LOCK_TESTING_THREADS];
+       int i;
+
+       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) {
+               input[i].vm = sev_vm_create(/* es= */ false);
+               input[0].source_fds[i] = input[i].vm->fd;
+       }
+       for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i)
+               memcpy(input[i].source_fds, input[0].source_fds,
+                      sizeof(input[i].source_fds));
+
+       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+               pthread_create(&pt[i], NULL, locking_test_thread, &input[i]);
+
+       for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+               pthread_join(pt[i], NULL);
+}
+
+static void test_sev_migrate_parameters(void)
+{
+       struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev,
+               *sev_es_vm_no_vmsa;
+       int ret;
+
+       sev_vm = sev_vm_create(/* es= */ false);
+       sev_es_vm = sev_vm_create(/* es= */ true);
+       vm_no_vcpu = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+       vm_no_sev = __vm_create();
+       sev_es_vm_no_vmsa = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+       sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL);
+       vm_vcpu_add(sev_es_vm_no_vmsa, 1);
+
+
+       ret = __sev_migrate_from(sev_vm->fd, sev_es_vm->fd);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d\n",
+               ret, errno);
+
+       ret = __sev_migrate_from(sev_es_vm->fd, sev_vm->fd);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d\n",
+               ret, errno);
+
+       ret = __sev_migrate_from(vm_no_vcpu->fd, sev_es_vm->fd);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d\n",
+               ret, errno);
+
+       ret = __sev_migrate_from(vm_no_vcpu->fd, sev_es_vm_no_vmsa->fd);
+       TEST_ASSERT(
+               ret == -1 && errno == EINVAL,
+               "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n",
+               ret, errno);
+
+       ret = __sev_migrate_from(vm_no_vcpu->fd, vm_no_sev->fd);
+       TEST_ASSERT(ret == -1 && errno == EINVAL,
+                   "Migrations require SEV enabled. ret %d, errno: %d\n", ret,
+                   errno);
+}
+
+int main(int argc, char *argv[])
+{
+       test_sev_migrate_from(/* es= */ false);
+       test_sev_migrate_from(/* es= */ true);
+       test_sev_migrate_locking();
+       test_sev_migrate_parameters();
+       return 0;
+}
index eda0d2a..a0699f0 100644 (file)
 
 #define PVTIME_ADDR    (SHINFO_REGION_GPA + PAGE_SIZE)
 #define RUNSTATE_ADDR  (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
+#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
 
 #define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
+#define VCPU_INFO_VADDR        (SHINFO_REGION_GVA + 0x40)
+
+#define EVTCHN_VECTOR  0x10
 
 static struct kvm_vm *vm;
 
@@ -56,15 +60,44 @@ struct vcpu_runstate_info {
     uint64_t time[4];
 };
 
+struct arch_vcpu_info {
+    unsigned long cr2;
+    unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+};
+
+struct vcpu_info {
+        uint8_t evtchn_upcall_pending;
+        uint8_t evtchn_upcall_mask;
+        unsigned long evtchn_pending_sel;
+        struct arch_vcpu_info arch;
+        struct pvclock_vcpu_time_info time;
+}; /* 64 bytes (x86) */
+
 #define RUNSTATE_running  0
 #define RUNSTATE_runnable 1
 #define RUNSTATE_blocked  2
 #define RUNSTATE_offline  3
 
+static void evtchn_handler(struct ex_regs *regs)
+{
+       struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
+       vi->evtchn_upcall_pending = 0;
+
+       GUEST_SYNC(0x20);
+}
+
 static void guest_code(void)
 {
        struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
 
+       __asm__ __volatile__(
+               "sti\n"
+               "nop\n"
+       );
+
+       /* Trigger an interrupt injection */
+       GUEST_SYNC(0);
+
        /* Test having the host set runstates manually */
        GUEST_SYNC(RUNSTATE_runnable);
        GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
@@ -153,7 +186,7 @@ int main(int argc, char *argv[])
 
        struct kvm_xen_vcpu_attr vi = {
                .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
-               .u.gpa = SHINFO_REGION_GPA + 0x40,
+               .u.gpa = VCPU_INFO_ADDR,
        };
        vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi);
 
@@ -163,6 +196,16 @@ int main(int argc, char *argv[])
        };
        vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
 
+       struct kvm_xen_hvm_attr vec = {
+               .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
+               .u.vector = EVTCHN_VECTOR,
+       };
+       vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
+
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vm, VCPU_ID);
+       vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
+
        if (do_runstate_tests) {
                struct kvm_xen_vcpu_attr st = {
                        .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
@@ -171,9 +214,14 @@ int main(int argc, char *argv[])
                vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
        }
 
+       struct vcpu_info *vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
+       vinfo->evtchn_upcall_pending = 0;
+
        struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
        rs->state = 0x5a;
 
+       bool evtchn_irq_expected = false;
+
        for (;;) {
                volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
                struct ucall uc;
@@ -193,16 +241,21 @@ int main(int argc, char *argv[])
                        struct kvm_xen_vcpu_attr rst;
                        long rundelay;
 
-                       /* If no runstate support, bail out early */
-                       if (!do_runstate_tests)
-                               goto done;
-
-                       TEST_ASSERT(rs->state_entry_time == rs->time[0] +
-                                   rs->time[1] + rs->time[2] + rs->time[3],
-                                   "runstate times don't add up");
+                       if (do_runstate_tests)
+                               TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+                                           rs->time[1] + rs->time[2] + rs->time[3],
+                                           "runstate times don't add up");
 
                        switch (uc.args[1]) {
-                       case RUNSTATE_running...RUNSTATE_offline:
+                       case 0:
+                               evtchn_irq_expected = true;
+                               vinfo->evtchn_upcall_pending = 1;
+                               break;
+
+                       case RUNSTATE_runnable...RUNSTATE_offline:
+                               TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
+                               if (!do_runstate_tests)
+                                       goto done;
                                rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
                                rst.u.runstate.state = uc.args[1];
                                vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
@@ -236,6 +289,10 @@ int main(int argc, char *argv[])
                                        sched_yield();
                                } while (get_run_delay() < rundelay);
                                break;
+                       case 0x20:
+                               TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
+                               evtchn_irq_expected = false;
+                               break;
                        }
                        break;
                }
index 7615f29..9897fa9 100644 (file)
@@ -34,6 +34,7 @@ TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
 TEST_PROGS += vrf_strict_mode_test.sh
+TEST_PROGS += arp_ndisc_evict_nocarrier.sh
 TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
 TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
 TEST_GEN_FILES =  socket nettest
index b5a69ad..d444ee6 100755 (executable)
@@ -629,6 +629,66 @@ ipv6_fcnal()
        log_test $? 0 "Nexthops removed on admin down"
 }
 
+ipv6_grp_refs()
+{
+       if [ ! -x "$(command -v mausezahn)" ]; then
+               echo "SKIP: Could not run test; need mausezahn tool"
+               return
+       fi
+
+       run_cmd "$IP link set dev veth1 up"
+       run_cmd "$IP link add veth1.10 link veth1 up type vlan id 10"
+       run_cmd "$IP link add veth1.20 link veth1 up type vlan id 20"
+       run_cmd "$IP -6 addr add 2001:db8:91::1/64 dev veth1.10"
+       run_cmd "$IP -6 addr add 2001:db8:92::1/64 dev veth1.20"
+       run_cmd "$IP -6 neigh add 2001:db8:91::2 lladdr 00:11:22:33:44:55 dev veth1.10"
+       run_cmd "$IP -6 neigh add 2001:db8:92::2 lladdr 00:11:22:33:44:55 dev veth1.20"
+       run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1.10"
+       run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth1.20"
+       run_cmd "$IP nexthop add id 102 group 100"
+       run_cmd "$IP route add 2001:db8:101::1/128 nhid 102"
+
+       # create per-cpu dsts through nh 100
+       run_cmd "ip netns exec me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1"
+
+       # remove nh 100 from the group to delete the route potentially leaving
+       # a stale per-cpu dst which holds a reference to the nexthop's net
+       # device and to the IPv6 route
+       run_cmd "$IP nexthop replace id 102 group 101"
+       run_cmd "$IP route del 2001:db8:101::1/128"
+
+       # add both nexthops to the group so a reference is taken on them
+       run_cmd "$IP nexthop replace id 102 group 100/101"
+
+       # if the bug described in commit "net: nexthop: release IPv6 per-cpu
+       # dsts when replacing a nexthop group" exists at this point we have
+       # an unlinked IPv6 route (but not freed due to stale dst) with a
+       # reference over the group so we delete the group which will again
+       # only unlink it due to the route reference
+       run_cmd "$IP nexthop del id 102"
+
+       # delete the nexthop with stale dst, since we have an unlinked
+       # group with a ref to it and an unlinked IPv6 route with ref to the
+       # group, the nh will only be unlinked and not freed so the stale dst
+       # remains forever and we get a net device refcount imbalance
+       run_cmd "$IP nexthop del id 100"
+
+       # if a reference was lost this command will hang because the net device
+       # cannot be removed
+       timeout -s KILL 5 ip netns exec me ip link del veth1.10 >/dev/null 2>&1
+
+       # we can't cleanup if the command is hung trying to delete the netdev
+       if [ $? -eq 137 ]; then
+               return 1
+       fi
+
+       # cleanup
+       run_cmd "$IP link del veth1.20"
+       run_cmd "$IP nexthop flush"
+
+       return 0
+}
+
 ipv6_grp_fcnal()
 {
        local rc
@@ -734,6 +794,9 @@ ipv6_grp_fcnal()
 
        run_cmd "$IP nexthop add id 108 group 31/24"
        log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
+
+       ipv6_grp_refs
+       log_test $? 0 "Nexthop group replace refcounts"
 }
 
 ipv6_res_grp_fcnal()
index a4bd1b0..697994a 100644 (file)
@@ -6,6 +6,7 @@ CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_NET_VRF=m
 CONFIG_BPF_SYSCALL=y
 CONFIG_CGROUP_BPF=y
+CONFIG_NET_ACT_CT=m
 CONFIG_NET_ACT_MIRRED=m
 CONFIG_NET_ACT_MPLS=m
 CONFIG_NET_ACT_VLAN=m
index d9eca22..de19eb6 100755 (executable)
@@ -3,7 +3,7 @@
 
 ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
        mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
-       gact_trap_test"
+       gact_trap_test mirred_egress_to_ingress_test"
 NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
@@ -13,10 +13,12 @@ tcflags="skip_hw"
 h1_create()
 {
        simple_if_init $h1 192.0.2.1/24
+       tc qdisc add dev $h1 clsact
 }
 
 h1_destroy()
 {
+       tc qdisc del dev $h1 clsact
        simple_if_fini $h1 192.0.2.1/24
 }
 
@@ -153,6 +155,49 @@ gact_trap_test()
        log_test "trap ($tcflags)"
 }
 
+mirred_egress_to_ingress_test()
+{
+       RET=0
+
+       tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \
+               ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 8 action \
+                       ct commit nat src addr 192.0.2.2 pipe \
+                       ct clear pipe \
+                       ct commit nat dst addr 192.0.2.1 pipe \
+                       mirred ingress redirect dev $h1
+
+       tc filter add dev $swp1 protocol ip pref 11 handle 111 ingress flower \
+               ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 8 action drop
+       tc filter add dev $swp1 protocol ip pref 12 handle 112 ingress flower \
+               ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 0 action pass
+
+       $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+               -t icmp "ping,id=42,seq=10" -q
+
+       tc_check_packets "dev $h1 egress" 100 1
+       check_err $? "didn't mirror first packet"
+
+       tc_check_packets "dev $swp1 ingress" 111 1
+       check_fail $? "didn't redirect first packet"
+       tc_check_packets "dev $swp1 ingress" 112 1
+       check_err $? "didn't receive reply to first packet"
+
+       ping 192.0.2.2 -I$h1 -c1 -w1 -q 1>/dev/null 2>&1
+
+       tc_check_packets "dev $h1 egress" 100 2
+       check_err $? "didn't mirror second packet"
+       tc_check_packets "dev $swp1 ingress" 111 1
+       check_fail $? "didn't redirect second packet"
+       tc_check_packets "dev $swp1 ingress" 112 2
+       check_err $? "didn't receive reply to second packet"
+
+       tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
+       tc filter del dev $swp1 ingress protocol ip pref 11 handle 111 flower
+       tc filter del dev $swp1 ingress protocol ip pref 12 handle 112 flower
+
+       log_test "mirred_egress_to_ingress ($tcflags)"
+}
+
 setup_prepare()
 {
        h1=${NETIFS[p1]}
index 8748199..ffca314 100644 (file)
@@ -5,7 +5,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
        conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
        nft_concat_range.sh nft_conntrack_helper.sh \
        nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
-       ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh
+       ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
+       conntrack_vrf.sh
 
 LDLIBS = -lmnl
 TEST_GEN_FILES =  nf-queue
diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh
new file mode 100755 (executable)
index 0000000..91f3ef0
--- /dev/null
@@ -0,0 +1,219 @@
+#!/bin/sh
+
+# This script demonstrates interaction of conntrack and vrf.
+# The vrf driver calls the netfilter hooks again, with oif/iif
+# pointing at the VRF device.
+#
+# For ingress, this means first iteration has iifname of lower/real
+# device.  In this script, thats veth0.
+# Second iteration is iifname set to vrf device, tvrf in this script.
+#
+# For egress, this is reversed: first iteration has the vrf device,
+# second iteration is done with the lower/real/veth0 device.
+#
+# test_ct_zone_in demonstrates unexpected change of nftables
+# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
+# connection on VRF rcv"
+#
+# It was possible to assign conntrack zone to a packet (or mark it for
+# `notracking`) in the prerouting chain before conntrack, based on real iif.
+#
+# After the change, the zone assignment is lost and the zone is assigned based
+# on the VRF master interface (in case such a rule exists).
+# assignment is lost. Instead, assignment based on the `iif` matching
+# Thus it is impossible to distinguish packets based on the original
+# interface.
+#
+# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
+# that was supposed to be fixed by the commit mentioned above to make sure
+# that any fix to test case 1 won't break masquerade again.
+
+ksft_skip=4
+
+IP0=172.30.30.1
+IP1=172.30.30.2
+PFXL=30
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+ns1="ns1-$sfx"
+
+cleanup()
+{
+       ip netns pids $ns0 | xargs kill 2>/dev/null
+       ip netns pids $ns1 | xargs kill 2>/dev/null
+
+       ip netns del $ns0 $ns1
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without nft tool"
+       exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without ip tool"
+       exit $ksft_skip
+fi
+
+ip netns add "$ns0"
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not create net namespace $ns0"
+       exit $ksft_skip
+fi
+ip netns add "$ns1"
+
+trap cleanup EXIT
+
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+
+ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not add veth device"
+       exit $ksft_skip
+fi
+
+ip -net $ns0 li add tvrf type vrf table 9876
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not add vrf device"
+       exit $ksft_skip
+fi
+
+ip -net $ns0 li set lo up
+
+ip -net $ns0 li set veth0 master tvrf
+ip -net $ns0 li set tvrf up
+ip -net $ns0 li set veth0 up
+ip -net $ns1 li set veth0 up
+
+ip -net $ns0 addr add $IP0/$PFXL dev veth0
+ip -net $ns1 addr add $IP1/$PFXL dev veth0
+
+ip netns exec $ns1 iperf3 -s > /dev/null 2>&1&
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not start iperf3"
+       exit $ksft_skip
+fi
+
+# test vrf ingress handling.
+# The incoming connection should be placed in conntrack zone 1,
+# as decided by the first iteration of the ruleset.
+test_ct_zone_in()
+{
+ip netns exec $ns0 nft -f - <<EOF
+table testct {
+       chain rawpre {
+               type filter hook prerouting priority raw;
+
+               iif { veth0, tvrf } counter meta nftrace set 1
+               iif veth0 counter ct zone set 1 counter return
+               iif tvrf counter ct zone set 2 counter return
+               ip protocol icmp counter
+               notrack counter
+       }
+
+       chain rawout {
+               type filter hook output priority raw;
+
+               oif veth0 counter ct zone set 1 counter return
+               oif tvrf counter ct zone set 2 counter return
+               notrack counter
+       }
+}
+EOF
+       ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null
+
+       # should be in zone 1, not zone 2
+       count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
+       if [ $count -eq 1 ]; then
+               echo "PASS: entry found in conntrack zone 1"
+       else
+               echo "FAIL: entry not found in conntrack zone 1"
+               count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
+               if [ $count -eq 1 ]; then
+                       echo "FAIL: entry found in zone 2 instead"
+               else
+                       echo "FAIL: entry not in zone 1 or 2, dumping table"
+                       ip netns exec $ns0 conntrack -L
+                       ip netns exec $ns0 nft list ruleset
+               fi
+       fi
+}
+
+# add masq rule that gets evaluated w. outif set to vrf device.
+# This tests the first iteration of the packet through conntrack,
+# oifname is the vrf device.
+test_masquerade_vrf()
+{
+       ip netns exec $ns0 conntrack -F 2>/dev/null
+
+ip netns exec $ns0 nft -f - <<EOF
+flush ruleset
+table ip nat {
+       chain postrouting {
+               type nat hook postrouting priority 0;
+               # NB: masquerade should always be combined with 'oif(name) bla',
+               # lack of this is intentional here, we want to exercise double-snat.
+               ip saddr 172.30.30.0/30 counter masquerade random
+       }
+}
+EOF
+       ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null
+       if [ $? -ne 0 ]; then
+               echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device"
+               ret=1
+               return
+       fi
+
+       # must also check that nat table was evaluated on second (lower device) iteration.
+       ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
+       if [ $? -eq 0 ]; then
+               echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device"
+       else
+               echo "FAIL: vrf masq rule has unexpected counter value"
+               ret=1
+       fi
+}
+
+# add masq rule that gets evaluated w. outif set to veth device.
+# This tests the 2nd iteration of the packet through conntrack,
+# oifname is the lower device (veth0 in this case).
+test_masquerade_veth()
+{
+       ip netns exec $ns0 conntrack -F 2>/dev/null
+ip netns exec $ns0 nft -f - <<EOF
+flush ruleset
+table ip nat {
+       chain postrouting {
+               type nat hook postrouting priority 0;
+               meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
+       }
+}
+EOF
+       ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null
+       if [ $? -ne 0 ]; then
+               echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device"
+               ret=1
+               return
+       fi
+
+       # must also check that nat table was evaluated on second (lower device) iteration.
+       ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
+       if [ $? -eq 0 ]; then
+               echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device"
+       else
+               echo "FAIL: vrf masq rule has unexpected counter value"
+               ret=1
+       fi
+}
+
+test_ct_zone_in
+test_masquerade_vrf
+test_masquerade_veth
+
+exit $ret
index da1c1e4..d88867d 100755 (executable)
@@ -759,19 +759,21 @@ test_port_shadow()
        local result=""
        local logmsg=""
 
-       echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
-       nc_r=$!
+       # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
+       echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405
 
-       echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
-       nc_c=$!
+       echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 &
+       sc_r=$!
 
-       # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
-       echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null
+       echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport &
+       sc_c=$!
+
+       sleep 0.3
 
        # ns1 tries to connect to ns0:1405.  With default settings this should connect
        # to client, it matches the conntrack entry created above.
 
-       result=$(echo "" | ip netns exec "$ns1" nc -w 1 -p 41404 -u "$daddrs" 1405)
+       result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404)
 
        if [ "$result" = "$expect" ] ;then
                echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}"
@@ -780,7 +782,7 @@ test_port_shadow()
                ret=1
        fi
 
-       kill $nc_r $nc_c 2>/dev/null
+       kill $sc_r $sc_c 2>/dev/null
 
        # flush udp entries for next test round, if any
        ip netns exec "$ns0" conntrack -F >/dev/null 2>&1
@@ -816,11 +818,10 @@ table $family raw {
        chain prerouting {
                type filter hook prerouting priority -300; policy accept;
                meta iif veth0 udp dport 1405 notrack
-               udp dport 1405 notrack
        }
        chain output {
                type filter hook output priority -300; policy accept;
-               udp sport 1405 notrack
+               meta oif veth0 udp sport 1405 notrack
        }
 }
 EOF
@@ -851,6 +852,18 @@ test_port_shadowing()
 {
        local family="ip"
 
+       conntrack -h >/dev/null 2>&1
+       if [ $? -ne 0 ];then
+               echo "SKIP: Could not run nat port shadowing test without conntrack tool"
+               return
+       fi
+
+       socat -h > /dev/null 2>&1
+       if [ $? -ne 0 ];then
+               echo "SKIP: Could not run nat port shadowing test without socat tool"
+               return
+       fi
+
        ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
        ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
 
index 3d202b9..7d27f1f 100755 (executable)
@@ -16,6 +16,10 @@ timeout=4
 
 cleanup()
 {
+       ip netns pids ${ns1} | xargs kill 2>/dev/null
+       ip netns pids ${ns2} | xargs kill 2>/dev/null
+       ip netns pids ${nsrouter} | xargs kill 2>/dev/null
+
        ip netns del ${ns1}
        ip netns del ${ns2}
        ip netns del ${nsrouter}
@@ -332,6 +336,55 @@ EOF
        echo "PASS: tcp via loopback and re-queueing"
 }
 
+test_icmp_vrf() {
+       ip -net $ns1 link add tvrf type vrf table 9876
+       if [ $? -ne 0 ];then
+               echo "SKIP: Could not add vrf device"
+               return
+       fi
+
+       ip -net $ns1 li set eth0 master tvrf
+       ip -net $ns1 li set tvrf up
+
+       ip -net $ns1 route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
+ip netns exec ${ns1} nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+       chain output {
+               type filter hook output priority 0; policy accept;
+               meta oifname "tvrf" icmp type echo-request counter queue num 1
+               meta oifname "eth0" icmp type echo-request counter queue num 1
+       }
+       chain post {
+               type filter hook postrouting priority 0; policy accept;
+               meta oifname "tvrf" icmp type echo-request counter queue num 1
+               meta oifname "eth0" icmp type echo-request counter queue num 1
+       }
+}
+EOF
+       ip netns exec ${ns1} ./nf-queue -q 1 -t $timeout &
+       local nfqpid=$!
+
+       sleep 1
+       ip netns exec ${ns1} ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
+
+       for n in output post; do
+               for d in tvrf eth0; do
+                       ip netns exec ${ns1} nft list chain inet filter $n | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"
+                       if [ $? -ne 0 ] ; then
+                               echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
+                               ip netns exec ${ns1} nft list ruleset
+                               ret=1
+                               return
+                       fi
+               done
+       done
+
+       wait $nfqpid
+       [ $? -eq 0 ] && echo "PASS: icmp+nfqueue via vrf"
+       wait 2>/dev/null
+}
+
 ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
 ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
 ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
@@ -372,5 +425,6 @@ test_queue 20
 test_tcp_forward
 test_tcp_localhost
 test_tcp_localhost_requeue
+test_icmp_vrf
 
 exit $ret
index 503982b..9183240 100644 (file)
@@ -68,7 +68,7 @@
         "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667",
         "expExitCode": "0",
         "verifyCmd": "$TC action get action bpf index 667",
-        "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
+        "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
         "matchCount": "1",
         "teardown": [
             "$TC action flush action bpf"
index 88a20c7..c604609 100644 (file)
@@ -15,7 +15,7 @@
            "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
            "expExitCode": "0",
            "verifyCmd": "$TC qdisc show dev $ETH",
-           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
            "matchCount": "4",
            "teardown": [
                    "echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -37,7 +37,7 @@
            "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
            "expExitCode": "0",
            "verifyCmd": "$TC qdisc show dev $ETH",
-           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-9,a-f][0-9,a-f]{0,2}",
            "matchCount": "256",
            "teardown": [
                    "echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -60,7 +60,7 @@
            "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
            "expExitCode": "2",
            "verifyCmd": "$TC qdisc show dev $ETH",
-           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
            "matchCount": "4",
            "teardown": [
                    "echo \"1\" > /sys/bus/netdevsim/del_device"
@@ -82,7 +82,7 @@
            "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
            "expExitCode": "2",
            "verifyCmd": "$TC qdisc show dev $ETH",
-           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
            "matchCount": "0",
            "teardown": [
                    "echo \"1\" > /sys/bus/netdevsim/del_device"
            "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
            "expExitCode": "2",
            "verifyCmd": "$TC qdisc show dev $ETH",
-           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
            "matchCount": "0",
            "teardown": [
                    "echo \"1\" > /sys/bus/netdevsim/del_device"
            "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
            "expExitCode": "2",
            "verifyCmd": "$TC qdisc show dev $ETH",
-           "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+           "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
            "matchCount": "0",
            "teardown": [
                    "echo \"1\" > /sys/bus/netdevsim/del_device"
index 3f6d450..9646bb9 100644 (file)
@@ -2548,72 +2548,36 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(gfn_to_page);
 
-void kvm_release_pfn(kvm_pfn_t pfn, bool dirty, struct gfn_to_pfn_cache *cache)
+void kvm_release_pfn(kvm_pfn_t pfn, bool dirty)
 {
        if (pfn == 0)
                return;
 
-       if (cache)
-               cache->pfn = cache->gfn = 0;
-
        if (dirty)
                kvm_release_pfn_dirty(pfn);
        else
                kvm_release_pfn_clean(pfn);
 }
 
-static void kvm_cache_gfn_to_pfn(struct kvm_memory_slot *slot, gfn_t gfn,
-                                struct gfn_to_pfn_cache *cache, u64 gen)
-{
-       kvm_release_pfn(cache->pfn, cache->dirty, cache);
-
-       cache->pfn = gfn_to_pfn_memslot(slot, gfn);
-       cache->gfn = gfn;
-       cache->dirty = false;
-       cache->generation = gen;
-}
-
-static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
-                        struct kvm_host_map *map,
-                        struct gfn_to_pfn_cache *cache,
-                        bool atomic)
+int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
 {
        kvm_pfn_t pfn;
        void *hva = NULL;
        struct page *page = KVM_UNMAPPED_PAGE;
-       struct kvm_memory_slot *slot = __gfn_to_memslot(slots, gfn);
-       u64 gen = slots->generation;
 
        if (!map)
                return -EINVAL;
 
-       if (cache) {
-               if (!cache->pfn || cache->gfn != gfn ||
-                       cache->generation != gen) {
-                       if (atomic)
-                               return -EAGAIN;
-                       kvm_cache_gfn_to_pfn(slot, gfn, cache, gen);
-               }
-               pfn = cache->pfn;
-       } else {
-               if (atomic)
-                       return -EAGAIN;
-               pfn = gfn_to_pfn_memslot(slot, gfn);
-       }
+       pfn = gfn_to_pfn(vcpu->kvm, gfn);
        if (is_error_noslot_pfn(pfn))
                return -EINVAL;
 
        if (pfn_valid(pfn)) {
                page = pfn_to_page(pfn);
-               if (atomic)
-                       hva = kmap_atomic(page);
-               else
-                       hva = kmap(page);
+               hva = kmap(page);
 #ifdef CONFIG_HAS_IOMEM
-       } else if (!atomic) {
-               hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
        } else {
-               return -EINVAL;
+               hva = memremap(pfn_to_hpa(pfn), PAGE_SIZE, MEMREMAP_WB);
 #endif
        }
 
@@ -2627,27 +2591,9 @@ static int __kvm_map_gfn(struct kvm_memslots *slots, gfn_t gfn,
 
        return 0;
 }
-
-int kvm_map_gfn(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map,
-               struct gfn_to_pfn_cache *cache, bool atomic)
-{
-       return __kvm_map_gfn(kvm_memslots(vcpu->kvm), gfn, map,
-                       cache, atomic);
-}
-EXPORT_SYMBOL_GPL(kvm_map_gfn);
-
-int kvm_vcpu_map(struct kvm_vcpu *vcpu, gfn_t gfn, struct kvm_host_map *map)
-{
-       return __kvm_map_gfn(kvm_vcpu_memslots(vcpu), gfn, map,
-               NULL, false);
-}
 EXPORT_SYMBOL_GPL(kvm_vcpu_map);
 
-static void __kvm_unmap_gfn(struct kvm *kvm,
-                       struct kvm_memory_slot *memslot,
-                       struct kvm_host_map *map,
-                       struct gfn_to_pfn_cache *cache,
-                       bool dirty, bool atomic)
+void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
 {
        if (!map)
                return;
@@ -2655,45 +2601,21 @@ static void __kvm_unmap_gfn(struct kvm *kvm,
        if (!map->hva)
                return;
 
-       if (map->page != KVM_UNMAPPED_PAGE) {
-               if (atomic)
-                       kunmap_atomic(map->hva);
-               else
-                       kunmap(map->page);
-       }
+       if (map->page != KVM_UNMAPPED_PAGE)
+               kunmap(map->page);
 #ifdef CONFIG_HAS_IOMEM
-       else if (!atomic)
-               memunmap(map->hva);
        else
-               WARN_ONCE(1, "Unexpected unmapping in atomic context");
+               memunmap(map->hva);
 #endif
 
        if (dirty)
-               mark_page_dirty_in_slot(kvm, memslot, map->gfn);
+               kvm_vcpu_mark_page_dirty(vcpu, map->gfn);
 
-       if (cache)
-               cache->dirty |= dirty;
-       else
-               kvm_release_pfn(map->pfn, dirty, NULL);
+       kvm_release_pfn(map->pfn, dirty);
 
        map->hva = NULL;
        map->page = NULL;
 }
-
-int kvm_unmap_gfn(struct kvm_vcpu *vcpu, struct kvm_host_map *map, 
-                 struct gfn_to_pfn_cache *cache, bool dirty, bool atomic)
-{
-       __kvm_unmap_gfn(vcpu->kvm, gfn_to_memslot(vcpu->kvm, map->gfn), map,
-                       cache, dirty, atomic);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(kvm_unmap_gfn);
-
-void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty)
-{
-       __kvm_unmap_gfn(vcpu->kvm, kvm_vcpu_gfn_to_memslot(vcpu, map->gfn),
-                       map, NULL, dirty, false);
-}
 EXPORT_SYMBOL_GPL(kvm_vcpu_unmap);
 
 struct page *kvm_vcpu_gfn_to_page(struct kvm_vcpu *vcpu, gfn_t gfn)
@@ -3747,7 +3669,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
        struct kvm_fpu *fpu = NULL;
        struct kvm_sregs *kvm_sregs = NULL;
 
-       if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
+       if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead)
                return -EIO;
 
        if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
@@ -3957,7 +3879,7 @@ static long kvm_vcpu_compat_ioctl(struct file *filp,
        void __user *argp = compat_ptr(arg);
        int r;
 
-       if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_bugged)
+       if (vcpu->kvm->mm != current->mm || vcpu->kvm->vm_dead)
                return -EIO;
 
        switch (ioctl) {
@@ -4023,7 +3945,7 @@ static long kvm_device_ioctl(struct file *filp, unsigned int ioctl,
 {
        struct kvm_device *dev = filp->private_data;
 
-       if (dev->kvm->mm != current->mm || dev->kvm->vm_bugged)
+       if (dev->kvm->mm != current->mm || dev->kvm->vm_dead)
                return -EIO;
 
        switch (ioctl) {
@@ -4345,7 +4267,7 @@ static long kvm_vm_ioctl(struct file *filp,
        void __user *argp = (void __user *)arg;
        int r;
 
-       if (kvm->mm != current->mm || kvm->vm_bugged)
+       if (kvm->mm != current->mm || kvm->vm_dead)
                return -EIO;
        switch (ioctl) {
        case KVM_CREATE_VCPU:
@@ -4556,7 +4478,7 @@ static long kvm_vm_compat_ioctl(struct file *filp,
        struct kvm *kvm = filp->private_data;
        int r;
 
-       if (kvm->mm != current->mm || kvm->vm_bugged)
+       if (kvm->mm != current->mm || kvm->vm_dead)
                return -EIO;
        switch (ioctl) {
 #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT