[Library] Update zlibng (#1255)

* Update zlibng * Set cmake path more directly in zlibng to hopefully fix an issue with the build on drone * I'm dumb, missing / in path * Mackal helps with a dumb gitignore issue * Adding all the files, not sure what's ignoring them and im tired of looking * Some tweaks to zlibng build to hopefully get it to build properly. works on msvc now
2025-12-11 16:51:29 +00:00 · 2021-02-23 17:00:26 -08:00 · 2021-02-23 17:00:26 -08:00 · 2957f5084d
commit 2957f5084d
parent e6dee96266
184 changed files with 22029 additions and 11703 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -252,6 +252,7 @@ IF(ZLIB_FOUND)
 		SET(ZLIB_LIBRARY_TYPE "zlib-ng")
 		SET(ZLIB_LIBRARY_LIBS "zlibstatic")
 		SET(ZLIB_LIBRARY_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/libs/zlibng")
+		INCLUDE_DIRECTORIES(SYSTEM "${CMAKE_CURRENT_BINARY_DIR}/libs/zlibng")
 	ELSE()
 		SET(ZLIB_LIBRARY_TYPE "   zlib")
 		SET(ZLIB_LIBRARY_LIBS ${ZLIB_LIBRARY})
--- a/libs/zlibng/.github/workflows/analyze.yml
+++ b/libs/zlibng/.github/workflows/analyze.yml
@ -0,0 +1,39 @@
+name: CI Static Analysis
+on: [push, pull_request]
+jobs:
+  GCC-10:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v1
+    - name: Install packages (Ubuntu)
+      run: |
+        sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y
+        sudo apt-get update
+        sudo apt-get install -y gcc-10
+    - name: Generate project files
+      run: |
+        cmake . -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DWITH_FUZZERS=OFF -DWITH_CODE_COVERAGE=OFF -DWITH_MAINTAINER_WARNINGS=OFF
+      env:
+        CC: gcc-10
+        CFLAGS: "-fanalyzer -Werror -Wanalyzer-double-fclose -Wanalyzer-double-free -Wanalyzer-exposure-through-output-file -Wanalyzer-file-leak -Wanalyzer-free-of-non-heap -Wanalyzer-malloc-leak -Wanalyzer-null-argument -Wanalyzer-null-dereference -Wanalyzer-possible-null-argument -Wanalyzer-possible-null-dereference -Wanalyzer-stale-setjmp-buffer -Wanalyzer-tainted-array-index -Wanalyzer-unsafe-call-within-signal-handler -Wanalyzer-use-after-free -Wanalyzer-use-of-pointer-in-stale-stack-frame"
+        CI: true
+    - name: Compile source code
+      run: |
+        cmake --build . --config Release > /dev/null
+  Clang-12:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v1
+    - name: Install packages (Ubuntu)
+      run: |
+        wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
+        sudo apt-add-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic main" -y
+        sudo apt install clang-tools-12 -y
+    - name: Generate project files
+      run: |
+        scan-build-12 --status-bugs cmake . -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DWITH_FUZZERS=OFF -DWITH_CODE_COVERAGE=OFF -DWITH_MAINTAINER_WARNINGS=OFF
+      env:
+        CI: true
+    - name: Compile source code
+      run: |
+        scan-build-12 --status-bugs cmake --build . --config Release > /dev/null
--- a/libs/zlibng/.github/workflows/cmake.yml
+++ b/libs/zlibng/.github/workflows/cmake.yml
@ -0,0 +1,381 @@
+name: CI CMake
+on: [push, pull_request]
+jobs:
+  ci-cmake:
+    name: ${{ matrix.name }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: Ubuntu GCC
+            os: ubuntu-latest
+            compiler: gcc
+            cmake-args: -DWITH_SANITIZER=Address
+            codecov: ubuntu_gcc
+
+          - name: Ubuntu GCC OSB -O1 No Unaligned64
+            os: ubuntu-latest
+            compiler: gcc
+            cmake-args: -DWITH_UNALIGNED=ON -DUNALIGNED64_OK=OFF -DWITH_SANITIZER=Undefined
+            build-dir: ../build
+            build-src-dir: ../zlib-ng
+            codecov: ubuntu_gcc_osb
+            cflags: -O1 -g3
+
+          - name: Ubuntu GCC -O3 No Unaligned
+            os: ubuntu-latest
+            compiler: gcc
+            cmake-args: -DWITH_UNALIGNED=OFF
+            codecov: ubuntu_gcc_o3
+            cflags: -O3
+
+          - name: Ubuntu GCC Link Zlib
+            os: ubuntu-latest
+            compiler: gcc
+            cmake-args: -DZLIB_DUAL_LINK=ON
+
+          - name: Ubuntu GCC No AVX2
+            os: ubuntu-latest
+            compiler: gcc
+            cmake-args: -DWITH_AVX2=OFF -DWITH_SANITIZER=Undefined
+            codecov: ubuntu_gcc_no_avx2
+
+          - name: Ubuntu GCC No SSE2
+            os: ubuntu-latest
+            compiler: gcc
+            cmake-args: -DWITH_SSE2=OFF -DWITH_SANITIZER=Undefined
+            codecov: ubuntu_gcc_no_sse2
+
+          - name: Ubuntu GCC No SSE4
+            os: ubuntu-latest
+            compiler: gcc
+            cmake-args: -DWITH_SSE4=OFF -DWITH_SANITIZER=Undefined
+            codecov: ubuntu_gcc_no_sse4
+
+          - name: Ubuntu GCC No PCLMULQDQ
+            os: ubuntu-latest
+            compiler: gcc
+            cmake-args: -DWITH_PCLMULQDQ=OFF -DWITH_SANITIZER=Undefined
+            codecov: ubuntu_gcc_no_pclmulqdq
+
+          - name: Ubuntu GCC Compat No Opt
+            os: ubuntu-latest
+            compiler: gcc
+            cmake-args: -DZLIB_COMPAT=ON -DWITH_NEW_STRATEGIES=OFF -DWITH_OPTIM=OFF -DWITH_SANITIZER=Address
+            codecov: ubuntu_gcc_compat_no_opt
+            cflags: -DNOT_TWEAK_COMPILER
+
+          - name: Ubuntu GCC ARM SF
+            os: ubuntu-latest
+            compiler: arm-linux-gnueabi-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi -DWITH_SANITIZER=Address
+            packages: qemu gcc-arm-linux-gnueabi libc-dev-armel-cross
+            codecov: ubuntu_gcc_armsf
+
+          - name: Ubuntu GCC ARM SF Compat No Opt
+            os: ubuntu-latest
+            compiler: arm-linux-gnueabi-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi -DZLIB_COMPAT=ON -DWITH_NEW_STRATEGIES=OFF -DWITH_OPTIM=OFF -DWITH_SANITIZER=Undefined
+            packages: qemu gcc-arm-linux-gnueabi libc-dev-armel-cross
+            codecov: ubuntu_gcc_armsf_compat_no_opt
+
+          - name: Ubuntu GCC ARM HF
+            os: ubuntu-latest
+            compiler: arm-linux-gnueabihf-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf -DWITH_SANITIZER=Address
+            packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
+            codecov: ubuntu_gcc_armhf
+
+          - name: Ubuntu GCC ARM HF No ACLE
+            os: ubuntu-latest
+            compiler: arm-linux-gnueabihf-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf -DWITH_ACLE=OFF -DWITH_SANITIZER=Address
+            packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
+            codecov: ubuntu_gcc_armhf_no_acle
+
+          - name: Ubuntu GCC ARM HF No NEON
+            os: ubuntu-latest
+            compiler: arm-linux-gnueabihf-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf -DWITH_NEON=OFF -DWITH_SANITIZER=Address
+            packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
+            codecov: ubuntu_gcc_armhf_no_neon
+
+          - name: Ubuntu GCC ARM HF Compat No Opt
+            os: ubuntu-latest
+            compiler: arm-linux-gnueabihf-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf -DZLIB_COMPAT=ON -DWITH_NEW_STRATEGIES=OFF -DWITH_OPTIM=OFF -DWITH_SANITIZER=Undefined
+            packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
+            codecov: ubuntu_gcc_armhf_compat_no_opt
+
+          - name: Ubuntu GCC AARCH64
+            os: ubuntu-latest
+            compiler: aarch64-linux-gnu-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake -DWITH_SANITIZER=Address
+            asan-options: detect_leaks=0
+            packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
+            codecov: ubuntu_gcc_aarch64
+
+          - name: Ubuntu GCC AARCH64 No ACLE
+            os: ubuntu-latest
+            compiler: aarch64-linux-gnu-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake -DWITH_ACLE=OFF -DWITH_SANITIZER=Undefined
+            asan-options: detect_leaks=0
+            packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
+            codecov: ubuntu_gcc_aarch64_no_acle
+
+          - name: Ubuntu GCC AARCH64 No NEON
+            os: ubuntu-latest
+            compiler: aarch64-linux-gnu-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake -DWITH_NEON=OFF -DWITH_SANITIZER=Undefined
+            asan-options: detect_leaks=0
+            packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
+            codecov: ubuntu_gcc_aarch64_no_neon
+
+          - name: Ubuntu GCC AARCH64 Compat No Opt
+            os: ubuntu-latest
+            compiler: aarch64-linux-gnu-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake -DZLIB_COMPAT=ON -DWITH_NEW_STRATEGIES=OFF -DWITH_OPTIM=OFF -DWITH_SANITIZER=Undefined
+            asan-options: detect_leaks=0
+            packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
+            codecov: ubuntu_gcc_aarch64_compat_no_opt
+
+          - name: Ubuntu GCC PPC
+            os: ubuntu-latest
+            compiler: powerpc-linux-gnu-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-powerpc.cmake
+            packages: qemu gcc-powerpc-linux-gnu libc-dev-powerpc-cross
+            ldflags: -static
+            codecov: ubuntu_gcc_ppc
+
+          - name: Ubuntu GCC PPC64
+            os: ubuntu-latest
+            compiler: powerpc64-linux-gnu-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-powerpc64.cmake
+            packages: qemu gcc-powerpc64-linux-gnu libc-dev-ppc64-cross
+            ldflags: -static
+            codecov: ubuntu_gcc_ppc64
+
+          - name: Ubuntu GCC PPC64LE
+            os: ubuntu-latest
+            compiler: powerpc64le-linux-gnu-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-powerpc64le.cmake
+            packages: qemu gcc-powerpc64le-linux-gnu libc-dev-ppc64el-cross
+            codecov: ubuntu_gcc_ppc64le
+
+          - name: Ubuntu GCC SPARC64
+            os: ubuntu-latest
+            compiler: sparc64-linux-gnu-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-sparc64.cmake
+            packages: qemu gcc-sparc64-linux-gnu libc-dev-sparc64-cross
+            ldflags: -static
+            codecov: ubuntu_gcc_sparc64
+
+          - name: Ubuntu GCC S390X
+            os: ubuntu-latest
+            compiler: s390x-linux-gnu-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-s390x.cmake -DWITH_SANITIZER=Address
+            packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
+            ldflags: -static
+            codecov: ubuntu_gcc_s390x
+
+          - name: Ubuntu GCC S390X DFLTCC
+            os: ubuntu-latest
+            compiler: s390x-linux-gnu-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-s390x.cmake -DWITH_DFLTCC_DEFLATE=ON -DWITH_DFLTCC_INFLATE=ON -DWITH_SANITIZER=Address
+            packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
+            ldflags: -static
+            codecov: ubuntu_gcc_s390x
+
+          - name: Ubuntu GCC S390X DFLTCC Compat
+            os: ubuntu-latest
+            compiler: s390x-linux-gnu-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-s390x.cmake -DZLIB_COMPAT=ON -DWITH_DFLTCC_DEFLATE=ON -DWITH_DFLTCC_INFLATE=ON -DWITH_SANITIZER=Undefined
+            packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
+            ldflags: -static
+            codecov: ubuntu_gcc_s390x
+
+          - name: Ubuntu MinGW i686
+            os: ubuntu-latest
+            compiler: i686-w64-mingw32-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-mingw-i686.cmake
+            packages: wine32 gcc-mingw-w64
+            # Codecov disabled due to gcov locking issue error
+
+          - name: Ubuntu MinGW x86_64
+            os: ubuntu-latest
+            compiler: x86_64-w64-mingw32-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-mingw-x86_64.cmake
+            packages: wine-stable gcc-mingw-w64
+            codecov: ubuntu_gcc_mingw_x86_64
+
+          - name: Ubuntu Clang
+            os: ubuntu-latest
+            compiler: clang
+            packages: llvm-6.0
+            gcov-exec: llvm-cov-6.0 gcov
+            codecov: ubuntu_clang
+
+          - name: Ubuntu Clang Inflate Strict
+            os: ubuntu-latest
+            compiler: clang
+            cmake-args: -DWITH_INFLATE_STRICT=ON
+            packages: llvm-6.0
+            gcov-exec: llvm-cov-6.0 gcov
+            codecov: ubuntu_clang_inflate_strict
+
+          - name: Ubuntu Clang Inflate Allow Invalid Dist
+            os: ubuntu-latest
+            compiler: clang
+            cmake-args: -DWITH_INFLATE_ALLOW_INVALID_DIST=ON
+            packages: llvm-6.0
+            gcov-exec: llvm-cov-6.0 gcov
+            codecov: ubuntu_clang_inflate_allow_invalid_dist
+
+          - name: Ubuntu Clang Memory Map
+            os: ubuntu-latest
+            compiler: clang
+            cflags: -DUSE_MMAP
+            packages: llvm-6.0
+            gcov-exec: llvm-cov-6.0 gcov
+            codecov: ubuntu_clang_mmap
+
+          - name: Ubuntu Clang Debug
+            os: ubuntu-latest
+            compiler: clang
+            packages: llvm-6.0
+            gcov-exec: llvm-cov-6.0 gcov
+            codecov: ubuntu_clang_debug
+            build-config: Debug
+
+          - name: Ubuntu Clang MSAN
+            os: ubuntu-latest
+            compiler: clang
+            cmake-args: -GNinja -DWITH_SANITIZER=Memory
+            packages:  ninja-build llvm-6.0
+            gcov-exec: llvm-cov-6.0 gcov
+            cflags: -g3 -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanitize-memory-track-origins
+            codecov: ubuntu_clang_msan
+
+          - name: Windows MSVC Win32
+            os: windows-latest
+            compiler: cl
+            cmake-args: -A Win32
+
+          - name: Windows MSVC Win64
+            os: windows-latest
+            compiler: cl
+            cmake-args: -A x64
+
+          - name: Windows MSVC ARM No Test
+            os: windows-latest
+            compiler: cl
+            cmake-args: -A ARM
+
+          - name: Windows MSVC ARM64 No Test
+            os: windows-latest
+            compiler: cl
+            cmake-args: -A ARM64
+
+          - name: Windows GCC
+            os: windows-latest
+            compiler: gcc
+            cmake-args: -G Ninja
+            codecov: win64_gcc
+
+          - name: Windows GCC Compat No Opt
+            os: windows-latest
+            compiler: gcc
+            cmake-args: -G Ninja -DZLIB_COMPAT=ON -DWITH_NEW_STRATEGIES=OFF -DWITH_OPTIM=OFF
+            codecov: win64_gcc_compat_no_opt
+
+          - name: macOS Clang
+            os: macos-latest
+            compiler: clang
+            cmake-args: -DWITH_SANITIZER=Address
+            codecov: macos_clang
+
+          - name: macOS GCC
+            os: macos-latest
+            compiler: gcc-10
+            cmake-args: -DWITH_SANITIZER=Undefined
+            packages: gcc@10
+            gcov-exec: gcov-10
+            codecov: macos_gcc
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+
+    - name: Checkout test corpora
+      uses: actions/checkout@v2
+      with:
+        repository: nmoinvaz/corpora
+        path: test/data/corpora
+
+    - name: Install packages (Ubuntu)
+      if: runner.os == 'Linux' && matrix.packages
+      run: |
+        sudo dpkg --add-architecture i386 # Required for wine32
+        sudo apt-get update
+        sudo apt-get install -y ${{ matrix.packages }}
+
+    - name: Install packages (Windows)
+      if: runner.os == 'Windows'
+      run: |
+        choco install ninja ${{ matrix.packages }} --no-progress
+
+    - name: Install packages (macOS)
+      if: runner.os == 'macOS'
+      run: |
+        brew install ninja ${{ matrix.packages }}
+      env:
+        HOMEBREW_NO_INSTALL_CLEANUP: 1
+
+    - name: Install codecov.io tools
+      if: matrix.codecov
+      run: |
+        python -u -m pip install codecov
+
+    - name: Generate project files
+      # Shared libaries turned off for qemu ppc* and sparc & reduce code coverage sources
+      run: |
+        mkdir ${{ matrix.build-dir || '.not-used' }}
+        cd ${{ matrix.build-dir || '.' }}
+        cmake ${{ matrix.build-src-dir || '.' }} ${{ matrix.cmake-args }} -DCMAKE_BUILD_TYPE=${{ matrix.build-config || 'Release' }} -DBUILD_SHARED_LIBS=OFF -DWITH_FUZZERS=ON -DWITH_CODE_COVERAGE=ON -DWITH_MAINTAINER_WARNINGS=ON
+      env:
+        CC: ${{ matrix.compiler }}
+        CFLAGS: ${{ matrix.cflags }}
+        LDFLAGS: ${{ matrix.ldflags }}
+        CI: true
+
+    - name: Compile source code
+      run: |
+        cd ${{ matrix.build-dir || '.' }}
+        cmake --build . --config ${{ matrix.build-config || 'Release' }}
+
+    - name: Run test cases
+      # Don't run tests on Windows ARM
+      if: runner.os != 'Windows' || contains(matrix.name, 'ARM') == false
+      run: |
+        cd ${{ matrix.build-dir || '.' }}
+        ctest --verbose -C Release --output-on-failure --max-width 120 -j 6
+      env:
+        ASAN_OPTIONS: ${{ matrix.asan-options || 'verbosity=0' }}:abort_on_error=1
+        MSAN_OPTIONS: ${{ matrix.msan-options || 'verbosity=0' }}:abort_on_error=1
+        TSAN_OPTIONS: ${{ matrix.tsan-options || 'verbosity=0' }}:abort_on_error=1
+        LSAN_OPTIONS: ${{ matrix.lsan-options || 'verbosity=0' }}:abort_on_error=1
+
+    - name: Upload coverage report
+      if: matrix.codecov && ( env.CODECOV_TOKEN_SECRET != '' || github.repository == 'zlib-ng/zlib-ng' )
+      shell: bash
+      run: |
+        bash tools/codecov-upload.sh
+      env:
+        # Codecov does not yet support GitHub Actions
+        CODECOV_TOKEN_SECRET: "${{secrets.CODECOV_TOKEN}}"
+        CODECOV_TOKEN: "${{ secrets.CODECOV_TOKEN || 'e4fdf847-f541-4ab1-9d50-3d27e5913906' }}"
+        CODECOV_FLAGS: "${{ matrix.codecov }}"
+        CODECOV_NAME:  "${{ matrix.name }}"
+        CODECOV_EXEC:  "${{ matrix.gcov-exec || 'gcov' }}"
+        CODECOV_DIR:   "${{ matrix.build-dir || '.' }}"
--- a/libs/zlibng/.github/workflows/configure.yml
+++ b/libs/zlibng/.github/workflows/configure.yml
@ -0,0 +1,185 @@
+name: CI Configure
+on: [push, pull_request]
+jobs:
+  ci-configure:
+    name: ${{ matrix.name }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: Ubuntu GCC
+            os: ubuntu-latest
+            compiler: gcc
+            configure-args: --warn
+
+          - name: Ubuntu GCC OSB
+            os: ubuntu-latest
+            compiler: gcc
+            configure-args: --warn
+            build-dir: ../build
+            build-src-dir: ../zlib-ng
+
+          - name: Ubuntu GCC Compat No Opt
+            os: ubuntu-latest
+            compiler: gcc
+            configure-args: --warn --zlib-compat --without-optimizations --without-new-strategies
+
+          - name: Ubuntu GCC ARM SF
+            os: ubuntu-latest
+            compiler: arm-linux-gnueabi-gcc
+            configure-args: --warn
+            chost: arm-linux-gnueabi
+            packages: qemu gcc-arm-linux-gnueabi libc-dev-armel-cross
+
+          - name: Ubuntu GCC ARM SF Compat No Opt
+            os: ubuntu-latest
+            compiler: arm-linux-gnueabi-gcc
+            configure-args: --warn --zlib-compat --without-optimizations --without-new-strategies
+            chost: arm-linux-gnueabi
+            packages: qemu gcc-arm-linux-gnueabi libc-dev-armel-cross
+
+          - name: Ubuntu GCC ARM HF
+            os: ubuntu-latest
+            compiler: arm-linux-gnueabihf-gcc
+            configure-args: --warn
+            chost: arm-linux-gnueabihf
+            packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
+
+          - name: Ubuntu GCC ARM HF No ACLE
+            os: ubuntu-latest
+            compiler: arm-linux-gnueabihf-gcc
+            configure-args: --warn --without-acle
+            chost: arm-linux-gnueabihf
+            packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
+
+          - name: Ubuntu GCC ARM HF No NEON
+            os: ubuntu-latest
+            compiler: arm-linux-gnueabihf-gcc
+            configure-args: --warn --without-neon
+            chost: arm-linux-gnueabihf
+            packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
+
+          - name: Ubuntu GCC ARM HF Compat No Opt
+            os: ubuntu-latest
+            compiler: arm-linux-gnueabihf-gcc
+            configure-args: --warn --zlib-compat --without-optimizations --without-new-strategies
+            chost: arm-linux-gnueabihf
+            packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
+
+          - name: Ubuntu GCC AARCH64
+            os: ubuntu-latest
+            compiler: aarch64-linux-gnu-gcc
+            configure-args: --warn
+            chost: aarch64-linux-gnu
+            packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
+
+          - name: Ubuntu GCC AARCH64 No ACLE
+            os: ubuntu-latest
+            compiler: aarch64-linux-gnu-gcc
+            configure-args: --warn --without-acle
+            chost: aarch64-linux-gnu
+            packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
+
+          - name: Ubuntu GCC AARCH64 No NEON
+            os: ubuntu-latest
+            compiler: aarch64-linux-gnu-gcc
+            configure-args: --warn --without-neon
+            chost: aarch64-linux-gnu
+            packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
+
+          - name: Ubuntu GCC AARCH64 Compat No Opt
+            os: ubuntu-latest
+            compiler: aarch64-linux-gnu-gcc
+            configure-args: --warn --zlib-compat --without-optimizations --without-new-strategies
+            chost: aarch64-linux-gnu
+            packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
+
+          - name: Ubuntu GCC PPC
+            os: ubuntu-latest
+            compiler: powerpc-linux-gnu-gcc
+            configure-args: --warn --static
+            chost: powerpc-linux-gnu
+            packages: qemu gcc-powerpc-linux-gnu libc-dev-powerpc-cross
+            cflags: -static
+            ldflags: -static
+
+          - name: Ubuntu GCC PPC64
+            os: ubuntu-latest
+            compiler: powerpc64-linux-gnu-gcc
+            configure-args: --warn --static
+            chost: powerpc-linux-gnu
+            packages: qemu gcc-powerpc64-linux-gnu libc-dev-ppc64-cross
+            cflags: -static
+            ldflags: -static
+
+          - name: Ubuntu GCC PPC64LE
+            os: ubuntu-latest
+            compiler: powerpc64le-linux-gnu-gcc
+            configure-args: --warn
+            chost: powerpc64le-linux-gnu
+            packages: qemu gcc-powerpc64le-linux-gnu libc-dev-ppc64el-cross
+
+          - name: Ubuntu GCC S390X
+            os: ubuntu-latest
+            compiler: s390x-linux-gnu-gcc
+            configure-args: --warn --static
+            chost: s390x-linux-gnu
+            packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
+            cflags: -static
+            ldflags: -static
+
+          - name: Ubuntu GCC S390X DFLTCC
+            os: ubuntu-latest
+            compiler: s390x-linux-gnu-gcc
+            configure-args: --warn --static --with-dfltcc-deflate --with-dfltcc-inflate
+            chost: s390x-linux-gnu
+            packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
+            cflags: -static
+            ldflags: -static
+
+          - name: Ubuntu GCC S390X DFLTCC Compat
+            os: ubuntu-latest
+            compiler: s390x-linux-gnu-gcc
+            configure-args: --warn --zlib-compat --static --with-dfltcc-deflate --with-dfltcc-inflate
+            chost: s390x-linux-gnu
+            packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
+            cflags: -static
+            ldflags: -static
+
+          - name: macOS GCC
+            os: macOS-latest
+            compiler: gcc
+            configure-args: --warn
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v1
+
+    - name: Install packages (Ubuntu)
+      if: runner.os == 'Linux' && matrix.packages
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y ${{ matrix.packages }}
+
+    - name: Generate project files
+      run: |
+        mkdir ${{ matrix.build-dir || '.not-used' }}
+        cd ${{ matrix.build-dir || '.' }}
+        ${{ matrix.build-src-dir || '.' }}/configure ${{ matrix.configure-args }}
+      env:
+        CC: ${{ matrix.compiler }}
+        CFLAGS: ${{ matrix.cflags }}
+        LDFLAGS: ${{ matrix.ldflags }}
+        CHOST: ${{ matrix.chost }}
+        CI: true
+
+    - name: Compile source code
+      run: |
+        cd ${{ matrix.build-dir || '.' }}
+        make -j2
+
+    - name: Run test cases
+      run: |
+        cd ${{ matrix.build-dir || '.' }}
+        make test
--- a/libs/zlibng/.github/workflows/fuzz.yml
+++ b/libs/zlibng/.github/workflows/fuzz.yml
@ -0,0 +1,23 @@
+name: CI Fuzz
+on: [pull_request]
+jobs:
+  Fuzzing:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Build Fuzzers
+      uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
+      with:
+        oss-fuzz-project-name: 'zlib-ng'
+        dry-run: false
+    - name: Run Fuzzers
+      uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
+      with:
+        oss-fuzz-project-name: 'zlib-ng'
+        fuzz-seconds: 600
+        dry-run: false
+    - name: Upload Crash
+      uses: actions/upload-artifact@v1
+      if: failure()
+      with:
+        name: artifacts
+        path: ./out/artifacts
--- a/libs/zlibng/.github/workflows/libpng.yml
+++ b/libs/zlibng/.github/workflows/libpng.yml
@ -0,0 +1,46 @@
+name: CI Libpng
+on: [pull_request]
+jobs:
+  pngtest:
+    name: Ubuntu Clang
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout repository (zlib-ng)
+      uses: actions/checkout@v1
+
+    - name: Generate project files (zlib-ng)
+      run: |
+        cmake . -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DZLIB_COMPAT=ON -DZLIB_ENABLE_TESTS=OFF
+      env:
+        CC: clang
+        CFLAGS: -fPIC
+        CI: true
+
+    - name: Compile source code (zlib-ng)
+      run: |
+        cmake --build . --config Release
+
+    - name: Checkout repository (libpng)
+      uses: actions/checkout@v2
+      with:
+        repository: glennrp/libpng
+        path: libpng
+
+    - name: Generate project files (libpng)
+      run: |
+        cd libpng
+        cmake . -DCMAKE_BUILD_TYPE=Release -DPNG_TESTS=ON -DPNG_STATIC=OFF -DZLIB_INCLUDE_DIR=.. -DZLIB_LIBRARY=$PWD/../libz.a
+      env:
+        CC: clang
+        CI: true
+
+    - name: Compile source code (libpng)
+      run: |
+        cd libpng
+        cmake --build . --config Release
+
+    - name: Run test cases (libpng)
+      run: |
+        cd libpng
+        ctest -C Release --output-on-failure --max-width 120
--- a/libs/zlibng/.github/workflows/nmake.yml
+++ b/libs/zlibng/.github/workflows/nmake.yml
@ -0,0 +1,48 @@
+name: CI NMake
+on: [push, pull_request]
+jobs:
+  ci-cmake:
+    name: ${{ matrix.name }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: Windows NMake x86
+            os: windows-latest
+            makefile: win32/Makefile.msc
+            vc-vars: x86
+
+          - name: Windows NMake x64
+            os: windows-latest
+            makefile: win32/Makefile.msc
+            vc-vars: x86_amd64
+
+          - name: Windows NMake ARM No Test
+            os: windows-latest
+            makefile: win32/Makefile.arm
+            vc-vars: x86_arm
+
+          - name: Windows NMake ARM64 No Test
+            os: windows-latest
+            makefile: win32/Makefile.a64
+            vc-vars: x86_arm64
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v1
+
+    - name: Compile source code
+      shell: cmd
+      run: |
+        call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.vc-vars }}
+        nmake -f ${{ matrix.makefile }}
+
+    - name: Run test cases
+      shell: cmd
+      # Don't run tests on Windows ARM
+      if: contains(matrix.vc-vars, 'arm') == false
+      run: |
+        call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.vc-vars }}
+        nmake -f ${{ matrix.makefile }} test
+        nmake -f ${{ matrix.makefile }} testdll
--- a/libs/zlibng/.github/workflows/pkgcheck.yml
+++ b/libs/zlibng/.github/workflows/pkgcheck.yml
@ -0,0 +1,121 @@
+name: CI Pkgcheck
+on: [push, pull_request]
+jobs:
+  ci-pkgcheck:
+    name: ${{ matrix.name }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: Ubuntu GCC
+            os: ubuntu-latest
+            compiler: gcc
+
+          - name: Ubuntu GCC -m32
+            os: ubuntu-latest
+            compiler: gcc
+            packages: gcc-multilib
+            cmake-args: -DCMAKE_C_FLAGS=-m32
+            cflags: -m32
+            ldflags: -m32
+
+          - name: Ubuntu GCC ARM HF
+            os: ubuntu-latest
+            chost: arm-linux-gnueabihf
+            compiler: arm-linux-gnueabihf-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf
+            packages: qemu gcc-arm-linux-gnueabihf libc6-dev-armhf-cross
+
+          - name: Ubuntu GCC AARCH64
+            os: ubuntu-latest
+            chost: aarch64-linux-gnu
+            compiler: aarch64-linux-gnu-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake
+            packages: qemu gcc-aarch64-linux-gnu libc6-dev-arm64-cross
+
+          - name: Ubuntu GCC PPC
+            os: ubuntu-latest
+            chost: powerpc-linux-gnu
+            compiler: powerpc-linux-gnu-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-powerpc.cmake
+            packages: qemu gcc-powerpc-linux-gnu libc6-dev-powerpc-cross
+
+          - name: Ubuntu GCC PPC64LE
+            os: ubuntu-latest
+            chost: powerpc64le-linux-gnu
+            compiler: powerpc64le-linux-gnu-gcc
+            cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-powerpc64le.cmake
+            packages: qemu gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross
+
+          - name: macOS Clang
+            os: macOS-latest
+            compiler: clang
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v1
+
+    - name: Install packages (Ubuntu)
+      if: runner.os == 'Linux'
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y --no-install-recommends abigail-tools ninja-build diffoscope ${{ matrix.packages }}
+
+    - name: Install packages (macOS)
+      if: runner.os == 'macOS'
+      run: |
+        brew install ninja diffoscope ${{ matrix.packages }}
+      env:
+        HOMEBREW_NO_INSTALL_CLEANUP: 1
+
+    - name: Select Xcode version (macOS)
+      # Use a version of Xcode that supports ZERO_AR_DATE until CMake supports
+      # AppleClang linking with libtool using -D argument
+      # https://gitlab.kitware.com/cmake/cmake/-/issues/19852
+      if: runner.os == 'macOS'
+      uses: maxim-lobanov/setup-xcode@v1
+      with:
+        xcode-version: '12.1.1'
+
+    - name: Compare builds
+      run: |
+        sh test/pkgcheck.sh
+      env:
+        CC: ${{ matrix.compiler }}
+        CFLAGS: ${{ matrix.cflags }}
+        CHOST: ${{ matrix.chost }}
+        CMAKE_ARGS: ${{ matrix.cmake-args }}
+        LDFLAGS: ${{ matrix.ldflags }}
+
+    - name: Compare builds (compat)
+      run: |
+        sh test/pkgcheck.sh --zlib-compat
+      env:
+        CC: ${{ matrix.compiler }}
+        CFLAGS: ${{ matrix.cflags }}
+        CHOST: ${{ matrix.chost }}
+        CMAKE_ARGS: ${{ matrix.cmake-args }}
+        LDFLAGS: ${{ matrix.ldflags }}
+
+    - name: Check ABI
+      # macOS runner does not contain abigail
+      if: runner.os != 'macOS'
+      run: |
+        sh test/abicheck.sh --refresh_if
+      env:
+        CC: ${{ matrix.compiler }}
+        CFLAGS: ${{ matrix.cflags }}
+        CHOST: ${{ matrix.chost }}
+        LDFLAGS: ${{ matrix.ldflags }}
+
+    - name: Check ABI (compat)
+      # macOS runner does not contain abigail
+      if: runner.os != 'macOS'
+      run: |
+        sh test/abicheck.sh --zlib-compat --refresh_if
+      env:
+        CC: ${{ matrix.compiler }}
+        CFLAGS: ${{ matrix.cflags }}
+        CHOST: ${{ matrix.chost }}
+        LDFLAGS: ${{ matrix.ldflags }}
--- a/libs/zlibng/.github/workflows/release.yml
+++ b/libs/zlibng/.github/workflows/release.yml
@ -0,0 +1,73 @@
+name: CI Release
+on:
+  push:
+    tags:
+    - '*'
+jobs:
+  ci-cmake:
+    name: ${{ matrix.name }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: Windows MSVC Win32
+            os: windows-latest
+            compiler: cl
+            cmake-args: -A Win32
+            deploy-name: win32
+
+          - name: Windows MSVC Win32 Compat
+            os: windows-latest
+            compiler: cl
+            cmake-args: -A Win32 -DZLIB_COMPAT=ON
+            deploy-name: win32-compat
+
+          - name: Windows MSVC Win64
+            os: windows-latest
+            compiler: cl
+            cmake-args: -A x64
+            deploy-name: win64
+
+          - name: Windows MSVC Win64 Compat
+            os: windows-latest
+            compiler: cl
+            cmake-args: -A x64 -DZLIB_COMPAT=ON
+            deploy-name: win64-compat
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v1
+
+    - name: Set environment variables
+      shell: bash
+      run:  echo "tag=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_ENV
+
+    - name: Generate project files
+      run: |
+        cmake . ${{ matrix.cmake-args }} -DCMAKE_BUILD_TYPE=Release -DZLIB_ENABLE_TESTS=ON -DCMAKE_INSTALL_PREFIX=out -DINSTALL_UTILS=ON
+      env:
+        CC: ${{ matrix.compiler }}
+        CI: true
+
+    - name: Compile source code
+      run: |
+        cmake --build . --config Release --target install
+
+    - name: Package release (Windows)
+      if: runner.os == 'Windows'
+      run: |
+        cd out
+        7z a -tzip ../zlib-ng-${{ matrix.deploy-name }}.zip bin include lib ../LICENSE.md ../README.md
+
+    - name: Upload release (Windows)
+      uses: svenstaro/upload-release-action@v1-release
+      if: runner.os == 'Windows'
+      with:
+        asset_name: zlib-ng-${{ matrix.deploy-name }}.zip
+        file: zlib-ng-${{ matrix.deploy-name }}.zip
+        tag: ${{env.tag}}
+        repo_token: ${{ secrets.GITHUB_TOKEN }}
+        overwrite: true
+      env:
+        GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
--- a/libs/zlibng/.gitignore
+++ b/libs/zlibng/.gitignore
@ -13,14 +13,18 @@
 *.gcno
 *.gcov

+/adler32_test
+/adler32_testsh
 /example
 /example64
 /examplesh
 /libz.so*
 /libz-ng.so*
+/makefixed
 /minigzip
 /minigzip64
 /minigzipsh
+/switchlevels
 /zlib.pc
 /zlib-ng.pc
 /CVE-2003-0107
@ -46,8 +50,9 @@ foo.gz
 CMakeCache.txt
 CMakeFiles
 Testing
-*.cmake
+/*.cmake
 *.stackdump
+*._h
 zconf.h
 zconf.h.cmakein
 zconf.h.included
@ -61,6 +66,7 @@ a.out
 /Makefile
 /arch/arm/Makefile
 /arch/generic/Makefile
+/arch/power/Makefile
 /arch/x86/Makefile
 .kdev4
 *.kdev4
@ -71,6 +77,10 @@ a.out
 /zlib.dir
 /zlibstatic.dir
 /win32/Debug
+/build/
+/build[.-]*/
+/btmp[12]/
+/pkgtmp[12]/

 /.idea
 /cmake-build-debug
--- a/libs/zlibng/.shellcheckrc
+++ b/libs/zlibng/.shellcheckrc
@ -0,0 +1 @@
+disable=SC2140,SC2086,SC2046,SC2015,SC1097,SC1035,SC1036,SC1007,SC2154,SC2155,SC2000,SC2034,SC2016,SC1091,SC1090,SC2212,SC2143,SC2129,SC2102,SC2069,SC1041,SC1042,SC1044,SC1046,SC1119,SC1110,SC1111,SC1112,SC1102,SC1105,SC1101,SC1004,SC1003,SC1012,SC2068,SC2065,SC2064,SC2063,SC2059,SC2053,SC2048,SC2044,SC2032,SC2031,SC2030,SC2029,SC2025,SC2024,SC2022,SC2018,SC2019,SC2017,SC2014,SC2013,SC2012,SC2009,SC2001,SC2098,SC2096,SC2094,SC2091,SC2092,SC2088,SC2087,SC2076,SC2072,SC2071,SC2223,SC2221,SC2222,SC2217,SC2207,SC2206,SC2205,SC2190,SC2188,SC2187,SC2185,SC2179,SC2178,SC2174,SC2168,SC2167,SC2163,SC2161,SC2160,SC2153,SC2150,SC2148,SC2147,SC2146,SC2142,SC2139,SC2126,SC2123,SC2120,SC2119,SC2117,SC2114,SC1117,SC2164,SC1083,SC2004,SC2125,SC2128,SC2011,SC1008,SC1019,SC2093,SC1132,SC1129,SC2236,SC2237,SC2231,SC2230,SC2229,SC2106,SC2102,SC2243,SC2244,SC2245,SC2247,SC2248,SC2249,SC2250,SC2251,SC2252,SC2181
--- a/libs/zlibng/.travis.yml
+++ b/libs/zlibng/.travis.yml
@ -1,283 +0,0 @@
-language: c
-cache: ccache
-dist: xenial
-
-env:
-  global:
-    - BUILDDIR=.
-    - MAKER="make -j2"
-    - TESTER="make test"
-
-matrix:
-  include:
-    - os: windows
-      compiler: clang
-      env:
-        - GENERATOR="cmake . "
-        - MAKER="cmake --build . --config Release"
-        - TESTER="ctest --verbose -C Release"
-    - os: windows
-      compiler: clang
-      env:
-        - GENERATOR="cmake ..\\zlib-ng -DZLIB_COMPAT=ON"
-        - MAKER="cmake --build . --config Release"
-        - TESTER="ctest --verbose -C Release"
-        - BUILDDIR=..\\build
-    - os: windows
-      compiler: gcc
-      env:
-        - GENERATOR="cmake ."
-        - MAKER="cmake --build . --config Release"
-        - TESTER="ctest --verbose -C Release"
-
-    - os: linux
-      compiler: gcc
-      env: GENERATOR="./configure --warn"
-    - os: linux
-      compiler: gcc
-      env: GENERATOR="cmake . -DZLIB_COMPAT=OFF -DWITH_GZFILEOP=ON -DWITH_NEW_STRATEGIES=YES -DWITH_OPTIM=ON"
-    - os: linux
-      compiler: gcc
-      env:
-        - GENERATOR="../zlib-ng/configure --warn --zlib-compat"
-        - BUILDDIR=../build
-    - os: linux
-      compiler: gcc
-      env: GENERATOR="./configure --warn --zlib-compat --without-optimizations --without-new-strategies"
-    - os: linux
-      compiler: gcc
-      env: GENERATOR="cmake ."
-    - os: linux
-      compiler: gcc
-      env:
-        - GENERATOR="cmake ../zlib-ng"
-        - BUILDDIR=../build
-
-    - os: linux
-      compiler: clang
-      env: GENERATOR="./configure --warn --zlib-compat"
-    - os: linux
-      compiler: clang
-      env:
-        - GENERATOR="cmake ../zlib-ng"
-        - BUILDDIR=../build
-    - os: linux
-      compiler: clang
-      env:
-        - GENERATOR="scan-build -v --status-bugs cmake ../zlib-ng"
-        - MAKER="scan-build -v --status-bugs make"
-        - BUILDDIR=../build
-
-    - os: osx
-      compiler: gcc
-      env: GENERATOR="./configure --warn --zlib-compat"
-    - os: osx
-      compiler: gcc
-      env:
-        - GENERATOR="../zlib-ng/configure --warn --zlib-compat"
-        - BUILDDIR=../build
-    - os: osx
-      compiler: gcc
-      env: GENERATOR="cmake ."
-
-    - os: osx
-      compiler: clang
-      env: GENERATOR="./configure --warn --zlib-compat"
-    - os: osx
-      compiler: clang
-      env:
-        - GENERATOR="cmake ../zlib-ng"
-        - BUILDDIR=../build
-
-    # compiling for linux-ppc64le variants
-    - os: linux-ppc64le
-      compiler: gcc
-      env: GENERATOR="cmake ."
-    - os: linux-ppc64le
-      compiler: gcc
-      env:
-        - GENERATOR="cmake ../zlib-ng"
-        - BUILDDIR=../build
-
-    - os: linux-ppc64le
-      compiler: clang
-      env: GENERATOR="./configure --warn --zlib-compat"
-    - os: linux-ppc64le
-      compiler: clang
-      env:
-        - GENERATOR="cmake ../zlib-ng"
-        - BUILDDIR=../build
-
-    # Cross compiling for arm variants
-    - os: linux
-      compiler: aarch64-linux-gnu-gcc
-      addons:
-        apt:
-          packages:
-            - qemu
-            - gcc-aarch64-linux-gnu
-            - libc-dev-arm64-cross
-      # For all aarch64 implementations NEON is mandatory, while crypto/crc are not.
-      env:
-        - GENERATOR="./configure --warn --zlib-compat"
-        - CHOST=aarch64-linux-gnu
-    - os: linux
-      compiler: aarch64-linux-gnu-gcc
-      addons:
-        apt:
-          packages:
-            - qemu
-            - gcc-aarch64-linux-gnu
-            - libc-dev-arm64-cross
-      # For all aarch64 implementations NEON is mandatory, while crypto/crc are not.
-      env:
-        - GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake . -DZLIB_COMPAT=ON"
-        - MAKER="cmake --build . --config Release"
-        - TESTER="ctest --verbose -C Release"
-    - os: linux
-      compiler: aarch64-linux-gnu-gcc
-      addons:
-        apt:
-          packages:
-            - qemu
-            - gcc-aarch64-linux-gnu
-            - libc-dev-arm64-cross
-      env:
-        - GENERATOR="./configure --warn --zlib-compat"
-        - CHOST=aarch64-linux-gnu
-    - os: linux
-      compiler: aarch64-linux-gnu-gcc
-      addons:
-        apt:
-          packages:
-            - qemu
-            - gcc-aarch64-linux-gnu
-            - libc-dev-arm64-cross
-      env:
-        - GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake ."
-        - MAKER="cmake --build . --config Release"
-        - TESTER="ctest --verbose -C Release"
-    # Hard-float subsets
-    - os: linux
-      compiler: arm-linux-gnueabihf-gcc
-      addons:
-        apt:
-          packages:
-            - qemu
-            - gcc-arm-linux-gnueabihf
-            - libc-dev-armhf-cross
-      env:
-        - GENERATOR="./configure --warn"
-        - CHOST=arm-linux-gnueabihf
-    - os: linux
-      compiler: arm-linux-gnueabihf-gcc
-      addons:
-        apt:
-          packages:
-            - qemu
-            - gcc-arm-linux-gnueabihf
-            - libc-dev-armhf-cross
-      env:
-        - GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake . -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf"
-        - MAKER="cmake --build . --config Release"
-        - TESTER="ctest --verbose -C Release"
-    - os: linux
-      compiler: arm-linux-gnueabihf-gcc
-      addons:
-        apt:
-          packages:
-            - qemu
-            - gcc-arm-linux-gnueabihf
-            - libc-dev-armhf-cross
-      env:
-        - GENERATOR="./configure --warn --zlib-compat --without-neon"
-        - CHOST=arm-linux-gnueabihf
-    - os: linux
-      compiler: arm-linux-gnueabihf-gcc
-      addons:
-        apt:
-          packages:
-            - qemu
-            - gcc-arm-linux-gnueabihf
-            - libc-dev-armhf-cross
-      env:
-        - GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake . -DZLIB_COMPAT=ON -DWITH_NEON=OFF -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf"
-        - MAKER="cmake --build . --config Release"
-        - TESTER="ctest --verbose -C Release"
-    - os: linux
-      compiler: arm-linux-gnueabihf-gcc
-      addons:
-        apt:
-          packages:
-            - qemu
-            - gcc-arm-linux-gnueabihf
-            - libc-dev-armhf-cross
-      env:
-        - GENERATOR="./configure --warn --zlib-compat"
-        - CHOST=arm-linux-gnueabihf
-    - os: linux
-      compiler: arm-linux-gnueabihf-gcc
-      addons:
-        apt:
-          packages:
-            - qemu
-            - gcc-arm-linux-gnueabihf
-            - libc-dev-armhf-cross
-      env:
-        - GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake . -DZLIB_COMPAT=ON -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf"
-        - MAKER="cmake --build . --config Release"
-        - TESTER="ctest --verbose -C Release"
-    # Soft-float subset
-    - os: linux
-      compiler: arm-linux-gnueabi-gcc
-      addons:
-        apt:
-          packages:
-            - qemu
-            - gcc-arm-linux-gnueabi
-            - libc-dev-armel-cross
-      env:
-        - GENERATOR="./configure"
-        - CHOST=arm-linux-gnueabi
-    - os: linux
-      compiler: arm-linux-gnueabi-gcc
-      addons:
-        apt:
-          packages:
-            - qemu
-            - gcc-arm-linux-gnueabi
-            - libc-dev-armel-cross
-      env:
-        - GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake . -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi"
-        - MAKER="cmake --build . --config Release"
-        - TESTER="ctest --verbose -C Release"
-    - os: linux
-      compiler: arm-linux-gnueabi-gcc
-      addons:
-        apt:
-          packages:
-            - qemu
-            - gcc-arm-linux-gnueabi
-            - libc-dev-armel-cross
-      env:
-        - GENERATOR="./configure --zlib-compat"
-        - CHOST=arm-linux-gnueabi
-    - os: linux
-      compiler: arm-linux-gnueabi-gcc
-      addons:
-        apt:
-          packages:
-            - qemu
-            - gcc-arm-linux-gnueabi
-            - libc-dev-armel-cross
-      env:
-        - GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake . -DZLIB_COMPAT=ON -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi"
-        - MAKER="cmake --build . --config Release"
-        - TESTER="ctest --verbose -C Release"
-
-script:
-  - mkdir -p $BUILDDIR
-  - cd $BUILDDIR
-  - $GENERATOR
-  - $MAKER
-  - $TESTER
--- a/libs/zlibng/CMakeLists.txt
+++ b/libs/zlibng/CMakeLists.txt
--- a/libs/zlibng/ChangeLog.zlib
+++ b/libs/zlibng/ChangeLog.zlib
--- a/libs/zlibng/INDEX
+++ b/libs/zlibng/INDEX
@ -1,55 +0,0 @@
-CMakeLists.txt  cmake build file
-ChangeLog.zlib  history of changes up to the fork from zlib 1.2.11
-FAQ.zlib        Frequently Asked Questions about zlib, as distributed in zlib 1.2.11
-INDEX           this file
-Makefile        dummy Makefile that tells you to ./configure
-Makefile.in     template for Unix Makefile
-README          guess what
-README.zlib     Copy of the original README file distributed in zlib 1.2.11
-configure       configure script for Unix
-test/example.c  zlib usages examples for build testing
-test/minigzip.c minimal gzip-like functionality for build testing
-test/infcover.c inf*.c code coverage for build coverage testing
-treebuild.xml   XML description of source file dependencies
-zconf.h.cmakein zconf.h template for cmake
-zconf.h.in      zconf.h template for configure
-zlib.3          Man page for zlib
-zlib.3.pdf      Man page in PDF format
-zlib.map        Linux symbol information
-zlib.pc.in      Template for pkg-config descriptor
-zlib.pc.cmakein zlib.pc template for cmake
-zlib2ansi       perl script to convert source files for C++ compilation
-
-arch/		architecture-specific code
-doc/            documentation for formats and algorithms
-win32/          makefiles for Windows
-
-                zlib public header files (required for library use):
-zconf.h
-zlib.h
-
-                private source files used to build the zlib library:
-adler32.c
-compress.c
-crc32.c
-crc32.h
-deflate.c
-deflate.h
-gzclose.c
-gzguts.h
-gzlib.c
-gzread.c
-gzwrite.c
-infback.c
-inffast.c
-inffast.h
-inffixed.h
-inflate.c
-inflate.h
-inftrees.c
-inftrees.h
-trees.c
-trees.h
-uncompr.c
-zutil.c
-zutil.h
--- a/libs/zlibng/INDEX.md
+++ b/libs/zlibng/INDEX.md
@ -0,0 +1,37 @@
+Contents
+--------
+
+| Name             | Description                                                    |
+|:-----------------|:---------------------------------------------------------------|
+| arch/            | Architecture-specific code                                     |
+| doc/             | Documentation for formats and algorithms                       |
+| test/example.c   | Zlib usages examples for build testing                         |
+| test/minigzip.c  | Minimal gzip-like functionality for build testing              |
+| test/infcover.c  | Inflate code coverage for build testing                        |
+| win32/           | Shared library version resources for Windows                   |
+| CMakeLists.txt   | Cmake build script                                             |
+| configure        | Bash configure/build script                                    |
+| adler32.c        | Compute the Adler-32 checksum of a data stream                 |
+| chunkset.*       | Inline functions to copy small data chunks                     |
+| compress.c       | Compress a memory buffer                                       |
+| deflate.*        | Compress data using the deflate algorithm                      |
+| deflate_fast.c   | Compress data using the deflate algorithm with fast strategy   |
+| deflate_medium.c | Compress data using the deflate algorithm with medium strategy |
+| deflate_slow.c   | Compress data using the deflate algorithm with slow strategy   |
+| functable.*      | Struct containing function pointers to optimized functions     |
+| gzguts.h         | Internal definitions for gzip operations                       |
+| gzlib.c          | Functions common to reading and writing gzip files             |
+| gzread.c         | Read gzip files                                                |
+| gzwrite.c        | Write gzip files                                               |
+| infback.*        | Inflate using a callback interface                             |
+| inflate.*        | Decompress data                                                |
+| inffast.*        | Decompress data with speed optimizations                       |
+| inffixed_tbl.h   | Table for decoding fixed codes                                 |
+| inftrees.h       | Generate Huffman trees for efficient decoding                  |
+| trees.*          | Output deflated data using Huffman coding                      |
+| uncompr.c        | Decompress a memory buffer                                     |
+| zconf.h.cmakein  | zconf.h template for cmake                                     |
+| zendian.h        | BYTE_ORDER for endian tests                                    |
+| zlib.3           | Man page for zlib                                              |
+| zlib.map         | Linux symbol information                                       |
+| zlib.pc.in       | Pkg-config template                                            |
--- a/libs/zlibng/INSTALL
+++ b/libs/zlibng/INSTALL
@ -1,64 +0,0 @@
-Overview
-========
-
-There are several methods for compiling and installing zlib-ng, depending
-on your favorite operating system and development toolkits.
-This document will attempt to give a general overview of some of them.
-
-PS: We do not recommend running 'make install' unless you know what you
-    are doing, as this can override the system default zlib library, and
-    any wrong configuration or incompatability of zlib-ng can make the
-    whole system unusable.
-
-On linux distros, an alternative way to use zlib-ng instead of zlib
-for specific programs exist, use LD_PRELOAD.
-If the program is dynamically linked with zlib, then zlib-ng can take
-its place without risking system-wide instability. Ex:
-LD_PRELOAD=/opt/zlib-ng/libz.so.1.2.11.zlib-ng /usr/bin/program
-
-
-Configure
-=========
-
-Using the configure script is currently the main method of setting up the
-makefiles and preparing for compilation. Configure will attempt to detect
-the specifics of your system, and enable some of the relevant options for you.
-
-Configure accepts several command-line options, some of the most important
-ones are detailed below.
-
--zlib-compat
- This enables options that will ensure that zlib-ng is compiled with all the
- functions that a standard zlib library contains, you will need to use this
- if you are going to be using zlib-ng as a drop-in replacement for zlib.
-
--without-optimizations
- This will disable zlib-ng specific optimizations (does not disable strategies).
-
--without-new-strategies
- This will disable specially optimized strategies, such as deflate_quick and
- deflate_medium.
-
-Run configure like this:
-./configure --zlib-compat
-
-Then you can compile using make:
-make
-make test
-
-
-Cmake
-=====
-
-Cmake is an alternative to configure, basically letting you do the same thing,
-but with different tools and user interfaces.
-
-Start by initializing cmake:
-cmake .
-
-Then you can start the configuration tui to set the wanted options
-ccmake .
-
-You can now compile using make:
-make
-make test
--- a/libs/zlibng/Makefile.in
+++ b/libs/zlibng/Makefile.in
@ -29,8 +29,8 @@ TEST_LIBS=$(LIBNAME1).a
 LDSHARED=$(CC)
 LDSHAREDFLAGS=-shared

-VER=1.9.9
-VER1=1
+VER=2.0.0-RC2
+VER1=2

 STATICLIB=$(LIBNAME1).a
 SHAREDLIB=$(LIBNAME1).so
@ -51,7 +51,7 @@ RCOBJS=
 STRIP=
 RANLIB=ranlib
 LDCONFIG=ldconfig
-LDSHAREDLIBC=-lc
+LDSHAREDLIBC=
 EXE=

 SRCDIR=.
@ -71,12 +71,64 @@ mandir = ${prefix}/share/man
 man3dir = ${mandir}/man3
 pkgconfigdir = ${libdir}/pkgconfig

-OBJZ = adler32.o compress.o crc32.o deflate.o deflate_fast.o deflate_medium.o deflate_slow.o functable.o infback.o inffast.o inflate.o inftrees.o trees.o uncompr.o zutil.o $(ARCH_STATIC_OBJS)
-OBJG = gzclose.o gzlib.o gzread.o gzwrite.o
+OBJZ = \
+	adler32.o \
+	chunkset.o \
+	compare258.o \
+	compress.o \
+	crc32.o \
+	crc32_comb.o \
+	deflate.o \
+	deflate_fast.o \
+	deflate_medium.o \
+	deflate_quick.o \
+	deflate_slow.o \
+	functable.o \
+	infback.o \
+	inffast.o \
+	inflate.o \
+	inftrees.o \
+	insert_string.o \
+	trees.o \
+	uncompr.o \
+	zutil.o \
+	$(ARCH_STATIC_OBJS)
+
+OBJG = \
+	gzlib.o \
+	gzread.o \
+	gzwrite.o
+
 OBJC = $(OBJZ) $(OBJG)

-PIC_OBJZ = adler32.lo compress.lo crc32.lo deflate.lo deflate_fast.lo deflate_medium.lo deflate_slow.lo functable.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo uncompr.lo zutil.lo $(ARCH_SHARED_OBJS)
-PIC_OBJG = gzclose.lo gzlib.lo gzread.lo gzwrite.lo
+PIC_OBJZ = \
+	adler32.lo \
+	chunkset.lo \
+	compare258.lo \
+	compress.lo \
+	crc32.lo \
+	crc32_comb.lo \
+	deflate.lo \
+	deflate_fast.lo \
+	deflate_medium.lo \
+	deflate_quick.lo \
+	deflate_slow.lo \
+	functable.lo \
+	infback.lo \
+	inffast.lo \
+	inflate.lo \
+	inftrees.lo \
+	insert_string.lo \
+	trees.lo \
+	uncompr.lo \
+	zutil.lo \
+	$(ARCH_SHARED_OBJS)
+
+PIC_OBJG = \
+	gzlib.lo \
+	gzread.lo \
+	gzwrite.lo
+
 PIC_OBJC = $(PIC_OBJZ) $(PIC_OBJG)

 OBJS = $(OBJC)
@ -85,11 +137,9 @@ PIC_OBJS = $(PIC_OBJC)

 all: static shared

-static: example$(EXE) minigzip$(EXE) fuzzers
+static: adler32_test$(EXE) example$(EXE) minigzip$(EXE) fuzzers makefixed$(EXE) maketrees$(EXE) makecrct$(EXE)

-shared: examplesh$(EXE) minigzipsh$(EXE)
-
-all64: example64$(EXE) minigzip64$(EXE)
+shared: adler32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE)

 check: test

@ -181,17 +231,23 @@ $(STATICLIB): $(OBJS)
 	$(AR) $(ARFLAGS) $@ $(OBJS)
 	-@ ($(RANLIB) $@ || true) >/dev/null 2>&1

+adler32_test.o:
+	$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/test/adler32_test.c
+
 example.o:
 	$(CC) $(CFLAGS) -DWITH_GZFILEOP $(INCLUDES) -c -o $@ $(SRCDIR)/test/example.c

 minigzip.o:
 	$(CC) $(CFLAGS) -DWITH_GZFILEOP $(INCLUDES) -c -o $@ $(SRCDIR)/test/minigzip.c

-example64.o:
-	$(CC) $(CFLAGS) -DWITH_GZFILEOP -D_FILE_OFFSET_BITS=64 $(INCLUDES) -c -o $@ $(SRCDIR)/test/example.c
+makefixed.o:
+	$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/tools/makefixed.c

-minigzip64.o:
-	$(CC) $(CFLAGS) -DWITH_GZFILEOP -D_FILE_OFFSET_BITS=64 $(INCLUDES) -c -o $@ $(SRCDIR)/test/minigzip.c
+maketrees.o:
+	$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/tools/maketrees.c
+
+makecrct.o:
+	$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/tools/makecrct.c

 zlibrc.o: win32/zlib$(SUFFIX)1.rc
 	$(RC) $(RCFLAGS) -o $@ win32/zlib$(SUFFIX)1.rc
@ -209,7 +265,7 @@ $(OBJG): %.o: $(SRCDIR)/%.c

 $(SHAREDTARGET): $(PIC_OBJS) $(DEFFILE) $(RCOBJS)
 ifneq ($(SHAREDTARGET),)
-	$(LDSHARED) $(LDSHAREDFLAGS) $(LDFLAGS) -o $@ $(DEFFILE) $(PIC_OBJS) $(RCOBJS) $(LDSHAREDLIBC)
+	$(LDSHARED) $(CFLAGS) $(LDSHAREDFLAGS) $(LDFLAGS) -o $@ $(DEFFILE) $(PIC_OBJS) $(RCOBJS) $(LDSHAREDLIBC)
 ifneq ($(STRIP),)
 	$(STRIP) $@
 endif
@ -220,38 +276,56 @@ ifneq ($(SHAREDLIB),$(SHAREDTARGET))
 endif
 endif

+adler32_test$(EXE): adler32_test.o $(OBJG) $(STATICLIB)
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ adler32_test.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
+ifneq ($(STRIP),)
+	$(STRIP) $@
+endif
+
 example$(EXE): example.o $(OBJG) $(STATICLIB)
-	$(CC) $(LDFLAGS) -o $@ example.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ example.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
 ifneq ($(STRIP),)
 	$(STRIP) $@
 endif

 minigzip$(EXE): minigzip.o $(OBJG) $(STATICLIB)
-	$(CC) $(LDFLAGS) -o $@ minigzip.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ minigzip.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
+ifneq ($(STRIP),)
+	$(STRIP) $@
+endif
+
+adler32_testsh$(EXE): adler32_test.o $(OBJG) $(SHAREDTARGET)
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ adler32_test.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC)
 ifneq ($(STRIP),)
 	$(STRIP) $@
 endif

 examplesh$(EXE): example.o $(OBJG) $(SHAREDTARGET)
-	$(CC) $(LDFLAGS) -o $@ example.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC)
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ example.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC)
 ifneq ($(STRIP),)
 	$(STRIP) $@
 endif

 minigzipsh$(EXE): minigzip.o $(OBJG) $(SHAREDTARGET)
-	$(CC) $(LDFLAGS) -o $@ minigzip.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC)
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ minigzip.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC)
 ifneq ($(STRIP),)
 	$(STRIP) $@
 endif

-example64$(EXE): example64.o $(OBJG) $(STATICLIB)
-	$(CC) $(LDFLAGS) -o $@ example64.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
+makefixed$(EXE): makefixed.o $(OBJG) $(STATICLIB)
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ makefixed.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
 ifneq ($(STRIP),)
 	$(STRIP) $@
 endif

-minigzip64$(EXE): minigzip64.o $(OBJG) $(STATICLIB)
-	$(CC) $(LDFLAGS) -o $@ minigzip64.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
+maketrees$(EXE): maketrees.o $(OBJG) $(STATICLIB)
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ maketrees.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
+ifneq ($(STRIP),)
+	$(STRIP) $@
+endif
+
+makecrct$(EXE): makecrct.o $(OBJG) $(STATICLIB)
+	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ makecrct.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
 ifneq ($(STRIP),)
 	$(STRIP) $@
 endif
@ -326,11 +400,11 @@ clean:
 	@if [ -f $(ARCHDIR)/Makefile ]; then $(MAKE) -C $(ARCHDIR) clean; fi
 	@if [ -f test/Makefile ]; then $(MAKE) -C test clean; fi
 	rm -f *.o *.lo *~ \
-	   example$(EXE) minigzip$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
-	   example64$(EXE) minigzip64$(EXE) \
+	   adler32_test$(EXE) example$(EXE) minigzip$(EXE) \
+	   adler32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
 	   checksum_fuzzer$(EXE) compress_fuzzer$(EXE) example_small_fuzzer$(EXE) example_large_fuzzer$(EXE) \
 	   example_flush_fuzzer$(EXE) example_dict_fuzzer$(EXE) minigzip_fuzzer$(EXE) \
-	   infcover \
+	   infcover makefixed$(EXE) maketrees$(EXE) makecrct$(EXE) \
 	   $(STATICLIB) $(IMPORTLIB) $(SHAREDLIB) $(SHAREDLIBV) $(SHAREDLIBM) \
 	   foo.gz so_locations \
 	   _match.s maketree
@ -338,6 +412,8 @@ clean:
 	rm -f *.gcda *.gcno *.gcov
 	rm -f a.out a.exe
 	rm -f *.pc
+	rm -f *._h
+	rm -rf btmp1 btmp2 pkgtmp1 pkgtmp2

 maintainer-clean: distclean
 distclean: clean
--- a/libs/zlibng/README.md
+++ b/libs/zlibng/README.md
@ -1,10 +1,41 @@
-zlib-ng - zlib for the next generation systems
+## zlib-ng
+*zlib data compression library for the next generation systems*

 Maintained by Hans Kristian Rosbach
          aka Dead2 (zlib-ng àt circlestorm dót org)

+|CI|Status|
+|:-|-|
+|GitHub Actions|[![Master Branch Status](https://github.com/zlib-ng/zlib-ng/workflows/CI%20CMake/badge.svg)](https://github.com/zlib-ng/zlib-ng/actions) [![Master Branch Status](https://github.com/zlib-ng/zlib-ng/workflows/CI%20Configure/badge.svg)](https://github.com/zlib-ng/zlib-ng/actions) [![Master Branch Status](https://github.com/zlib-ng/zlib-ng/workflows/CI%20NMake/badge.svg)](https://github.com/zlib-ng/zlib-ng/actions)|
+|Buildkite|[![Build status](https://badge.buildkite.com/7bb1ef84356d3baee26202706cc053ee1de871c0c712b65d26.svg?branch=develop)](https://buildkite.com/circlestorm-productions/zlib-ng)|
+|CodeFactor|[![CodeFactor](https://www.codefactor.io/repository/github/zlib-ng/zlib-ng/badge)](https://www.codefactor.io/repository/github/zlib-ng/zlib-ng)|
+|OSS-Fuzz|[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/zlib-ng.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:zlib-ng)
+|Codecov|[![codecov.io](https://codecov.io/github/zlib-ng/zlib-ng/coverage.svg?branch=develop)](https://codecov.io/github/zlib-ng/zlib-ng/)|

-Fork Motivation and History
+Features
+--------
+
+* Zlib compatible API with support for dual-linking
+* Modernized native API based on zlib API for ease of porting
+* Modern C99 syntax and a clean code layout
+* Deflate medium and quick algorithms based on Intels zlib fork
+* Support for CPU intrinsics when available
+  * Adler32 implementation using SSSE3, AVX2, Neon & VSX
+  * CRC32-B implementation using PCLMULQDQ & ACLE
+  * Hash table implementation using CRC32-C intrinsics on x86 and ARM
+  * Slide hash implementations using SSE2, AVX2, Neon & VSX
+  * Compare256/258 implementations using SSE4.2 & AVX2
+  * Inflate chunk copying using SSE2, AVX2 & Neon
+  * Support for hardware-accelerated deflate using IBM Z DFLTCC
+* Unaligned memory read/writes and large bit buffer improvements
+* Includes improvements from Cloudflare and Intel forks
+* Configure, CMake, and NMake build system support
+* Comprehensive set of CMake unit tests
+* Code sanitizers, fuzzing, and coverage
+* GitHub Actions continuous integration on Windows, macOS, and Linux
+  * Emulated CI for ARM, AARCH64, PPC, PPC64, SPARC64, S390x using qemu
+
+Fork Motivation
 ---------------------------

 The motivation for this fork was due to seeing several 3rd party
@ -38,17 +69,97 @@ various dead code, all contrib and example code as there is little
 point in having those in this fork for various reasons.

 A lot of improvements have gone into zlib-ng since its start, and
-numerous people have contributed both small and big improvements,
-or valuable testing. 
+numerous people and companies have contributed both small and big
+improvements, or valuable testing.

 Please read LICENSE.md, it is very simple and very liberal.

+Build
+-----
+
+There are two ways to build zlib-ng:
+
+### Cmake
+
+To build zlib-ng using the cross-platform makefile generator cmake.
+
+```
+cmake .
+cmake --build . --config Release
+ctest --verbose -C Release
+```
+
+Alternatively, you can use the cmake configuration GUI tool ccmake:
+
+```
+ccmake .
+```
+
+### Configure
+
+To build zlib-ng using the bash configure script:
+
+```
+./configure
+make
+make test
+```
+
+Build Options
+-------------
+| CMake                    | configure                | Description                                                                           | Default |
+|:-------------------------|:-------------------------|:--------------------------------------------------------------------------------------|---------|
+| ZLIB_COMPAT              | --zlib-compat            | Compile with zlib compatible API                                                      | OFF     |
+| ZLIB_ENABLE_TESTS        |                          | Build test binaries                                                                   | ON      |
+| WITH_GZFILEOP            | --without-gzfileops      | Compile with support for gzFile related functions                                     | ON      |
+| WITH_OPTIM               | --without-optimizations  | Build with optimisations                                                              | ON      |
+| WITH_NEW_STRATEGIES      | --without-new-strategies | Use new strategies                                                                    | ON      |
+| WITH_NATIVE_INSTRUCTIONS | --native                 | Compiles with full instruction set supported on this host (gcc/clang -march=native)   | OFF     |
+| WITH_SANITIZER           | --with-sanitizer         | Build with sanitizer (memory, address, undefined)                                     | OFF     |
+| WITH_FUZZERS             | --with-fuzzers           | Build test/fuzz                                                                       | OFF     |
+| WITH_MAINTAINER_WARNINGS |                          | Build with project maintainer warnings                                                | OFF     |
+| WITH_CODE_COVERAGE       |                          | Enable code coverage reporting                                                        | OFF     |
+
+Install
+-------
+
+WARNING: We do not recommend manually installing unless you really
+know what you are doing, because this can potentially override the system
+default zlib library, and any incompatibility or wrong configuration of
+zlib-ng can make the whole system unusable, requiring recovery or reinstall.
+If you still want a manual install, we recommend using the /opt/ path prefix.
+
+For Linux distros, an alternative way to use zlib-ng (if compiled in
+zlib-compat mode) instead of zlib, is through the use of the
+_LD_PRELOAD_ environment variable. If the program is dynamically linked
+with zlib, then zlib-ng will temporarily be used instead by the program,
+without risking system-wide instability.
+
+```
+LD_PRELOAD=/opt/zlib-ng/libz.so.1.2.11.zlib-ng /usr/bin/program
+```
+
+### Cmake
+
+To install zlib-ng system-wide using cmake:
+
+```
+cmake --build . --target install
+```
+
+### Configure
+
+To install zlib-ng system-wide using the configure script:
+
+```
+make install
+```

 Contributing
 ------------

-Zlib-ng is a young project, and we aim to be open to contributions,
-and we would be delighted to receive pull requests on github.
+Zlib-ng is a aiming to be open to contributions, and we would be
+delighted to receive pull requests on github.
 Just remember that any code you submit must be your own and it must
 be zlib licensed.
 Help with testing and reviewing of pull requests etc is also very
@ -73,9 +184,23 @@ The deflate and zlib specifications were written by L. Peter Deutsch.
 zlib was originally created by Jean-loup Gailly (compression)
 and Mark Adler (decompression).

+Advanced Build Options
+----------------------

-Build Status
------------
-
-Travis CI: [![build status](https://api.travis-ci.org/zlib-ng/zlib-ng.svg)](https://travis-ci.org/zlib-ng/zlib-ng/)
-Buildkite: [![Build status](https://badge.buildkite.com/7bb1ef84356d3baee26202706cc053ee1de871c0c712b65d26.svg?branch=develop)](https://buildkite.com/circlestorm-productions/zlib-ng)
+| CMake                           | configure             | Description                                                         | Default                |
+|:--------------------------------|:----------------------|:--------------------------------------------------------------------|------------------------|
+| ZLIB_DUAL_LINK                  |                       | Dual link tests with system zlib                                    | OFF                    |
+|                                 | --force-sse2          | Assume SSE2 instructions are always available                       | ON (x86), OFF (x86_64) |
+| WITH_AVX2                       |                       | Build with AVX2 intrinsics                                          | ON                     |
+| WITH_SSE2                       |                       | Build with SSE2 intrinsics                                          | ON                     |
+| WITH_SSE4                       |                       | Build with SSE4 intrinsics                                          | ON                     |
+| WITH_PCLMULQDQ                  |                       | Build with PCLMULQDQ intrinsics                                     | ON                     |
+| WITH_ACLE                       | --without-acle        | Build with ACLE intrinsics                                          | ON                     |
+| WITH_NEON                       | --without-neon        | Build with NEON intrinsics                                          | ON                     |
+| WITH_POWER8                     |                       | Build with POWER8 optimisations                                     | ON                     |
+| WITH_DFLTCC_DEFLATE             | --with-dfltcc-deflate | Use DEFLATE COMPRESSION CALL instruction for compression on IBM Z   | OFF                    |
+| WITH_DFLTCC_INFLATE             | --with-dfltcc-inflate | Use DEFLATE COMPRESSION CALL instruction for decompression on IBM Z | OFF                    |
+| WITH_UNALIGNED                  |                       | Allow optimizations that use unaligned reads if safe on current arch| ON                    |
+| WITH_INFLATE_STRICT             |                       | Build with strict inflate distance checking                         | OFF                    |
+| WITH_INFLATE_ALLOW_INVALID_DIST |                       | Build with zero fill for inflate invalid distances                  | OFF                    |
+| INSTALL_UTILS                   |                       | Copy minigzip and minideflate during install                        | OFF                    |
--- a/libs/zlibng/README.zlib
+++ b/libs/zlibng/README.zlib
@ -1,118 +0,0 @@
-ZLIB DATA COMPRESSION LIBRARY
-
-zlib 1.2.11 is a general purpose data compression library.  All the code is
-thread safe.  The data format used by the zlib library is described by RFCs
-(Request for Comments) 1950 to 1952 in the files
-http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and
-rfc1952 (gzip format).
-
-All functions of the compression library are documented in the file zlib.h
-(volunteer to write man pages welcome, contact zlib@gzip.org).  A usage example
-of the library is given in the file test/example.c which also tests that
-the library is working correctly.  Another example is given in the file
-test/minigzip.c.  The compression library itself is composed of all source
-files in the root directory.
-
-To compile all files and run the test program, follow the instructions given at
-the top of Makefile.in.  In short "./configure; make test", and if that goes
-well, "make install" should work for most flavors of Unix.  For Windows, use
-one of the special makefiles in win32/ or contrib/vstudio/ .  For VMS, use
-make_vms.com.
-
-Questions about zlib should be sent to <zlib@gzip.org>, or to Gilles Vollant
-<info@winimage.com> for the Windows DLL version.  The zlib home page is
-http://zlib.net/ .  Before reporting a problem, please check this site to
-verify that you have the latest version of zlib; otherwise get the latest
-version and check whether the problem still exists or not.
-
-PLEASE read the zlib FAQ http://zlib.net/zlib_faq.html before asking for help.
-
-Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan.  1997
-issue of Dr.  Dobb's Journal; a copy of the article is available at
-http://marknelson.us/1997/01/01/zlib-engine/ .
-
-The changes made in version 1.2.11 are documented in the file ChangeLog.
-
-Unsupported third party contributions are provided in directory contrib/ .
-
-zlib is available in Java using the java.util.zip package, documented at
-http://java.sun.com/developer/technicalArticles/Programming/compression/ .
-
-A Perl interface to zlib written by Paul Marquess <pmqs@cpan.org> is available
-at CPAN (Comprehensive Perl Archive Network) sites, including
-http://search.cpan.org/~pmqs/IO-Compress-Zlib/ .
-
-A Python interface to zlib written by A.M. Kuchling <amk@amk.ca> is
-available in Python 1.5 and later versions, see
-http://docs.python.org/library/zlib.html .
-
-zlib is built into tcl: http://wiki.tcl.tk/4610 .
-
-An experimental package to read and write files in .zip format, written on top
-of zlib by Gilles Vollant <info@winimage.com>, is available in the
-contrib/minizip directory of zlib.
-
-
-Notes for some targets:
-
- For Windows DLL versions, please see win32/DLL_FAQ.txt
-
- For 64-bit Irix, deflate.c must be compiled without any optimization. With
-  -O, one libpng test fails. The test works in 32 bit mode (with the -n32
-  compiler flag). The compiler bug has been reported to SGI.
-
- zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works
-  when compiled with cc.
-
- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is
-  necessary to get gzprintf working correctly. This is done by configure.
-
- zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with
-  other compilers. Use "make test" to check your compiler.
-
- gzdopen is not supported on RISCOS or BEOS.
-
- For PalmOs, see http://palmzlib.sourceforge.net/
-
-
-Acknowledgments:
-
-  The deflate format used by zlib was defined by Phil Katz.  The deflate and
-  zlib specifications were written by L.  Peter Deutsch.  Thanks to all the
-  people who reported problems and suggested various improvements in zlib; they
-  are too numerous to cite here.
-
-Copyright notice:
-
- (C) 1995-2017 Jean-loup Gailly and Mark Adler
-
-  This software is provided 'as-is', without any express or implied
-  warranty.  In no event will the authors be held liable for any damages
-  arising from the use of this software.
-
-  Permission is granted to anyone to use this software for any purpose,
-  including commercial applications, and to alter it and redistribute it
-  freely, subject to the following restrictions:
-
-  1. The origin of this software must not be misrepresented; you must not
-     claim that you wrote the original software. If you use this software
-     in a product, an acknowledgment in the product documentation would be
-     appreciated but is not required.
-  2. Altered source versions must be plainly marked as such, and must not be
-     misrepresented as being the original software.
-  3. This notice may not be removed or altered from any source distribution.
-
-  Jean-loup Gailly        Mark Adler
-  jloup@gzip.org          madler@alumni.caltech.edu
-
-If you use the zlib library in a product, we would appreciate *not* receiving
-lengthy legal documents to sign.  The sources are provided for free but without
-warranty of any kind.  The library has been entirely written by Jean-loup
-Gailly and Mark Adler; it does not include third-party code.  We make all
-contributions to and distributions of this project solely in our personal
-capacity, and are not conveying any rights to any intellectual property of
-any third parties.
-
-If you redistribute modified sources, we would appreciate that you include in
-the file ChangeLog history information documenting your changes.  Please read
-the FAQ for more information on the distribution of modified source versions.
--- a/libs/zlibng/adler32.c
+++ b/libs/zlibng/adler32.c
@ -3,24 +3,13 @@
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

-/* @(#) $Id$ */
-
 #include "zbuild.h"
 #include "zutil.h"
 #include "functable.h"
 #include "adler32_p.h"

-uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len);
-static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len2);
-
-#define DO1(buf, i)  {adler += (buf)[i]; sum2 += adler;}
-#define DO2(buf, i)  DO1(buf, i); DO1(buf, i+1);
-#define DO4(buf, i)  DO2(buf, i); DO2(buf, i+2);
-#define DO8(buf, i)  DO4(buf, i); DO4(buf, i+4);
-#define DO16(buf)    DO8(buf, 0); DO8(buf, 8);
-
 /* ========================================================================= */
-uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
+Z_INTERNAL uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
    uint32_t sum2;
    unsigned n;

@ -29,15 +18,15 @@ uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
    adler &= 0xffff;

    /* in case user likes doing a byte at a time, keep it fast */
-    if (len == 1)
+    if (UNLIKELY(len == 1))
        return adler32_len_1(adler, buf, sum2);

    /* initial Adler-32 value (deferred check for len == 1 speed) */
-    if (buf == NULL)
+    if (UNLIKELY(buf == NULL))
        return 1L;

    /* in case short lengths are provided, keep it somewhat fast */
-    if (len < 16)
+    if (UNLIKELY(len < 16))
        return adler32_len_16(adler, buf, len, sum2);

    /* do length NMAX blocks -- requires just one modulo operation */
@ -50,15 +39,15 @@ uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
 #endif
        do {
 #ifdef UNROLL_MORE
-            DO16(buf);          /* 16 sums unrolled */
+            DO16(adler, sum2, buf);          /* 16 sums unrolled */
            buf += 16;
 #else
-            DO8(buf, 0);         /* 8 sums unrolled */
+            DO8(adler, sum2, buf, 0);         /* 8 sums unrolled */
            buf += 8;
 #endif
        } while (--n);
-        MOD(adler);
-        MOD(sum2);
+        adler %= BASE;
+        sum2 %= BASE;
    }

    /* do remaining bytes (less than NMAX, still just one modulo) */
@ -66,12 +55,12 @@ uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
 #ifdef UNROLL_MORE
        while (len >= 16) {
            len -= 16;
-            DO16(buf);
+            DO16(adler, sum2, buf);
            buf += 16;
 #else
        while (len >= 8) {
            len -= 8;
-            DO8(buf, 0);
+            DO8(adler, sum2, buf, 0);
            buf += 8;
 #endif
        }
@ -80,22 +69,34 @@ uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
            adler += *buf++;
            sum2 += adler;
        }
-        MOD(adler);
-        MOD(sum2);
+        adler %= BASE;
+        sum2 %= BASE;
    }

    /* return recombined sums */
    return adler | (sum2 << 16);
 }

-uint32_t ZEXPORT PREFIX(adler32_z)(uint32_t adler, const unsigned char *buf, size_t len) {
+#ifdef ZLIB_COMPAT
+unsigned long Z_EXPORT PREFIX(adler32_z)(unsigned long adler, const unsigned char *buf, size_t len) {
+    return (unsigned long)functable.adler32((uint32_t)adler, buf, len);
+}
+#else
+uint32_t Z_EXPORT PREFIX(adler32_z)(uint32_t adler, const unsigned char *buf, size_t len) {
    return functable.adler32(adler, buf, len);
 }
+#endif

 /* ========================================================================= */
-uint32_t ZEXPORT PREFIX(adler32)(uint32_t adler, const unsigned char *buf, uint32_t len) {
+#ifdef ZLIB_COMPAT
+unsigned long Z_EXPORT PREFIX(adler32)(unsigned long adler, const unsigned char *buf, unsigned int len) {
+    return (unsigned long)functable.adler32((uint32_t)adler, buf, len);
+}
+#else
+uint32_t Z_EXPORT PREFIX(adler32)(uint32_t adler, const unsigned char *buf, uint32_t len) {
    return functable.adler32(adler, buf, len);
 }
+#endif

 /* ========================================================================= */
 static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len2) {
@ -108,11 +109,11 @@ static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len
        return 0xffffffff;

    /* the derivation of this formula is left as an exercise for the reader */
-    MOD63(len2);                /* assumes len2 >= 0 */
+    len2 %= BASE;                 /* assumes len2 >= 0 */
    rem = (unsigned)len2;
    sum1 = adler1 & 0xffff;
    sum2 = rem * sum1;
-    MOD(sum2);
+    sum2 %= BASE;
    sum1 += (adler2 & 0xffff) + BASE - 1;
    sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
    if (sum1 >= BASE) sum1 -= BASE;
@ -123,10 +124,16 @@ static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len
 }

 /* ========================================================================= */
-uint32_t ZEXPORT PREFIX(adler32_combine)(uint32_t adler1, uint32_t adler2, z_off_t len2) {
-    return adler32_combine_(adler1, adler2, len2);
+#ifdef ZLIB_COMPAT
+unsigned long Z_EXPORT PREFIX(adler32_combine)(unsigned long adler1, unsigned long adler2, z_off_t len2) {
+    return (unsigned long)adler32_combine_((uint32_t)adler1, (uint32_t)adler2, len2);
 }

-uint32_t ZEXPORT PREFIX(adler32_combine64)(uint32_t adler1, uint32_t adler2, z_off64_t len2) {
+unsigned long Z_EXPORT PREFIX4(adler32_combine)(unsigned long adler1, unsigned long adler2, z_off64_t len2) {
+    return (unsigned long)adler32_combine_((uint32_t)adler1, (uint32_t)adler2, len2);
+}
+#else
+uint32_t Z_EXPORT PREFIX4(adler32_combine)(uint32_t adler1, uint32_t adler2, z_off64_t len2) {
    return adler32_combine_(adler1, adler2, len2);
 }
+#endif
--- a/libs/zlibng/adler32_p.h
+++ b/libs/zlibng/adler32_p.h
@ -12,45 +12,11 @@
 #define NMAX 5552
 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */

-/* use NO_DIVIDE if your processor does not do division in hardware --
-   try it both ways to see which is faster */
-#ifdef NO_DIVIDE
-/* note that this assumes BASE is 65521, where 65536 % 65521 == 15
-   (thank you to John Reiser for pointing this out) */
-#  define CHOP(a) \
-    do { \
-        uint32_t tmp = a >> 16; \
-        a &= 0xffff; \
-        a += (tmp << 4) - tmp; \
-    } while (0)
-#  define MOD28(a) \
-    do { \
-        CHOP(a); \
-        if (a >= BASE) a -= BASE; \
-    } while (0)
-#  define MOD(a) \
-    do { \
-        CHOP(a); \
-        MOD28(a); \
-    } while (0)
-#  define MOD63(a) \
-    do { /* this assumes a is not negative */ \
-        z_off64_t tmp = a >> 32; \
-        a &= 0xffffffffL; \
-        a += (tmp << 8) - (tmp << 5) + tmp; \
-        tmp = a >> 16; \
-        a &= 0xffffL; \
-        a += (tmp << 4) - tmp; \
-        tmp = a >> 16; \
-        a &= 0xffffL; \
-        a += (tmp << 4) - tmp; \
-        if (a >= BASE) a -= BASE; \
-    } while (0)
-#else
-#  define MOD(a) a %= BASE
-#  define MOD28(a) a %= BASE
-#  define MOD63(a) a %= BASE
-#endif
+#define DO1(sum1, sum2, buf, i)  {(sum1) += buf[(i)]; (sum2) += (sum1);}
+#define DO2(sum1, sum2, buf, i)  {DO1(sum1, sum2, buf, i); DO1(sum1, sum2, buf, i+1);}
+#define DO4(sum1, sum2, buf, i)  {DO2(sum1, sum2, buf, i); DO2(sum1, sum2, buf, i+2);}
+#define DO8(sum1, sum2, buf, i)  {DO4(sum1, sum2, buf, i); DO4(sum1, sum2, buf, i+4);}
+#define DO16(sum1, sum2, buf)    {DO8(sum1, sum2, buf, 0); DO8(sum1, sum2, buf, 8);}

 static inline uint32_t adler32_len_1(uint32_t adler, const unsigned char *buf, uint32_t sum2) {
    adler += buf[0];
@ -70,8 +36,18 @@ static inline uint32_t adler32_len_16(uint32_t adler, const unsigned char *buf,
    }
    if (adler >= BASE)
        adler -= BASE;
-    MOD28(sum2);            /* only added so many BASE's */
+    sum2 %= BASE;            /* only added so many BASE's */
    return adler | (sum2 << 16);
 }

+static inline uint32_t adler32_len_64(uint32_t adler, const unsigned char *buf, size_t len, uint32_t sum2) {
+    while (len >= 16) {
+        len -= 16;
+        DO16(adler, sum2, buf);
+        buf += 16;
+    }
+    /* Process tail (len < 16).  */
+    return adler32_len_16(adler, buf, len, sum2);
+}
+
 #endif /* ADLER32_P_H */
--- a/libs/zlibng/arch/arm/Makefile.in
+++ b/libs/zlibng/arch/arm/Makefile.in
@ -6,19 +6,27 @@ CC=
 CFLAGS=
 SFLAGS=
 INCLUDES=
+ACLEFLAG=
+NEONFLAG=
 SUFFIX=

 SRCDIR=.
 SRCTOP=../..
 TOPDIR=$(SRCTOP)

-all: adler32_neon.o adler32_neon.lo armfeature.o armfeature.lo crc32_acle.o crc32_acle.lo fill_window_arm.o fill_window_arm.lo insert_string_acle.o insert_string_acle.lo
+all: \
+	adler32_neon.o adler32_neon.lo \
+	armfeature.o armfeature.lo \
+	chunkset_neon.o chunkset_neon.lo \
+	crc32_acle.o crc32_acle.lo \
+	slide_neon.o slide_neon.lo \
+	insert_string_acle.o insert_string_acle.lo

 adler32_neon.o:
-	$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
+	$(CC) $(CFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c

 adler32_neon.lo:
-	$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
+	$(CC) $(SFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c

 armfeature.o:
 	$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c
@ -26,23 +34,29 @@ armfeature.o:
 armfeature.lo:
 	$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c

+chunkset_neon.o:
+	$(CC) $(CFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_neon.c
+
+chunkset_neon.lo:
+	$(CC) $(SFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_neon.c
+
 crc32_acle.o:
-	$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
+	$(CC) $(CFLAGS) $(ACLEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c

 crc32_acle.lo:
-	$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
+	$(CC) $(SFLAGS) $(ACLEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c

-fill_window_arm.o:
-	$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_arm.c
+slide_neon.o:
+	$(CC) $(CFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_neon.c

-fill_window_arm.lo:
-	$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_arm.c
+slide_neon.lo:
+	$(CC) $(SFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_neon.c

 insert_string_acle.o:
-	$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
+	$(CC) $(CFLAGS) $(ACLEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c

 insert_string_acle.lo:
-	$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
+	$(CC) $(SFLAGS) $(ACLEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c

 mostlyclean: clean
 clean:
--- a/libs/zlibng/arch/arm/adler32_neon.c
+++ b/libs/zlibng/arch/arm/adler32_neon.c
@ -2,24 +2,16 @@
 * Copyright (C) 2017 ARM Holdings Inc.
 * Author: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
 *
- * This software is provided 'as-is', without any express or implied
- * warranty.  In no event will the authors be held liable for any damages
- * arising from the use of this software.
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- * 1. The origin of this software must not be misrepresented; you must not
- *  claim that you wrote the original software. If you use this software
- *    in a product, an acknowledgment in the product documentation would be
- *    appreciated but is not required.
- * 2. Altered source versions must be plainly marked as such, and must not be
- *    misrepresented as being the original software.
- * 3. This notice may not be removed or altered from any source distribution.
+ * For conditions of distribution and use, see copyright notice in zlib.h
 */
-#include "adler32_neon.h"
-#if defined(__ARM_NEON__) || defined(__ARM_NEON)
-#include <arm_neon.h>
-#include "adler32_p.h"
+#ifdef ARM_NEON_ADLER32
+#ifdef _M_ARM64
+#  include <arm64_neon.h>
+#else
+#  include <arm_neon.h>
+#endif
+#include "../../zutil.h"
+#include "../../adler32_p.h"

 static void NEON_accum32(uint32_t *s, const unsigned char *buf, size_t len) {
    static const uint8_t taps[32] = {
@ -109,7 +101,7 @@ uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len) {

    for (i = 0; i < len; i += n) {
        if ((i + n) > len)
-            n = len - i;
+            n = (int)(len - i);

        if (n < 16)
            break;
--- a/libs/zlibng/arch/arm/adler32_neon.h
+++ b/libs/zlibng/arch/arm/adler32_neon.h
@ -1,29 +0,0 @@
-/* Copyright (C) 1995-2011, 2016 Mark Adler
- * Copyright (C) 2017 ARM Holdings Inc.
- * Author: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
- *
- * This software is provided 'as-is', without any express or implied
- * warranty.  In no event will the authors be held liable for any damages
- * arising from the use of this software.
- * Permission is granted to anyone to use this software for any purpose,
- * including commercial applications, and to alter it and redistribute it
- * freely, subject to the following restrictions:
- * 1. The origin of this software must not be misrepresented; you must not
- *  claim that you wrote the original software. If you use this software
- *    in a product, an acknowledgment in the product documentation would be
- *    appreciated but is not required.
- * 2. Altered source versions must be plainly marked as such, and must not be
- *    misrepresented as being the original software.
- * 3. This notice may not be removed or altered from any source distribution.
- */
-#ifndef __ADLER32_NEON__
-#define __ADLER32_NEON__
-
-#if defined(__ARM_NEON__) || defined(__ARM_NEON)
-// Depending on the compiler flavor, size_t may be defined in one or the other header. See:
-// http://stackoverflow.com/questions/26410466/gcc-linaro-compiler-throws-error-unknown-type-name-size-t
-#include <stdint.h>
-#include <stddef.h>
-uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len);
-#endif
-#endif
--- a/libs/zlibng/arch/arm/arm.h
+++ b/libs/zlibng/arch/arm/arm.h
@ -8,6 +8,6 @@
 extern int arm_cpu_has_neon;
 extern int arm_cpu_has_crc32;

-void ZLIB_INTERNAL arm_check_features(void);
+void Z_INTERNAL arm_check_features(void);

 #endif /* ARM_H_ */
--- a/libs/zlibng/arch/arm/armfeature.c
+++ b/libs/zlibng/arch/arm/armfeature.c
@ -1,50 +1,69 @@
-#include "zutil.h"
+#include "../../zutil.h"

 #if defined(__linux__)
-# include <sys/auxv.h>
-# include <asm/hwcap.h>
+#  include <sys/auxv.h>
+#  include <asm/hwcap.h>
+#elif defined(__FreeBSD__) && defined(__aarch64__)
+#  include <machine/armreg.h>
+#  ifndef ID_AA64ISAR0_CRC32_VAL
+#    define ID_AA64ISAR0_CRC32_VAL ID_AA64ISAR0_CRC32
+#  endif
+#elif defined(__APPLE__)
+#  include <sys/sysctl.h>
 #elif defined(_WIN32)
-# include <winapifamily.h>
+#  include <winapifamily.h>
 #endif

 static int arm_has_crc32() {
 #if defined(__linux__) && defined(HWCAP2_CRC32)
-  return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0 ? 1 : 0;
+    return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0 ? 1 : 0;
+#elif defined(__FreeBSD__) && defined(__aarch64__)
+    return getenv("QEMU_EMULATING") == NULL
+      && ID_AA64ISAR0_CRC32_VAL(READ_SPECIALREG(id_aa64isar0_el1)) >= ID_AA64ISAR0_CRC32_BASE;
+#elif defined(__APPLE__)
+    int hascrc32;
+    size_t size = sizeof(hascrc32);
+    return sysctlbyname("hw.optional.armv8_crc32", &hascrc32, &size, NULL, 0) == 0
+      && hascrc32 == 1;
 #elif defined(ARM_NOCHECK_ACLE)
-  return 1;
+    return 1;
 #else
-  return 0;
+    return 0;
 #endif
 }

 /* AArch64 has neon. */
-#if !defined(__aarch64__)
-static inline int arm_has_neon()
-{
- #if defined(__linux__) && defined(HWCAP_NEON)
+#if !defined(__aarch64__) && !defined(_M_ARM64)
+static inline int arm_has_neon() {
+#if defined(__linux__) && defined(HWCAP_NEON)
    return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0 ? 1 : 0;
- #elif defined(_M_ARM) && defined(WINAPI_FAMILY_PARTITION)
-  #if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP)
+#elif defined(__APPLE__)
+    int hasneon;
+    size_t size = sizeof(hasneon);
+    return sysctlbyname("hw.optional.neon", &hasneon, &size, NULL, 0) == 0
+      && hasneon == 1;
+#elif defined(_M_ARM) && defined(WINAPI_FAMILY_PARTITION)
+#  if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP)
    return 1; /* Always supported */
-  #endif
- #endif
+#  endif
+#endif

- #if defined(ARM_NOCHECK_NEON)
+#if defined(ARM_NOCHECK_NEON)
    return 1;
- #else
-    return 0;
- #endif
-}
-#endif
-
-ZLIB_INTERNAL int arm_cpu_has_neon;
-ZLIB_INTERNAL int arm_cpu_has_crc32;
-
-void ZLIB_INTERNAL arm_check_features(void) {
-#if defined(__aarch64__)
-  arm_cpu_has_neon = 1; /* always available */
 #else
-  arm_cpu_has_neon = arm_has_neon();
+    return 0;
 #endif
-  arm_cpu_has_crc32 = arm_has_crc32();
+}
+#endif
+
+Z_INTERNAL int arm_cpu_has_neon;
+Z_INTERNAL int arm_cpu_has_crc32;
+
+void Z_INTERNAL arm_check_features(void) {
+#if defined(__aarch64__) || defined(_M_ARM64)
+    arm_cpu_has_neon = 1; /* always available */
+#else
+    arm_cpu_has_neon = arm_has_neon();
+#endif
+    arm_cpu_has_crc32 = arm_has_crc32();
 }
--- a/libs/zlibng/arch/arm/chunkset_neon.c
+++ b/libs/zlibng/arch/arm/chunkset_neon.c
@ -0,0 +1,54 @@
+/* chunkset_neon.c -- NEON inline functions to copy small data chunks.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifdef ARM_NEON_CHUNKSET
+#ifdef _M_ARM64
+#  include <arm64_neon.h>
+#else
+#  include <arm_neon.h>
+#endif
+#include "../../zbuild.h"
+#include "../../zutil.h"
+
+typedef uint8x16_t chunk_t;
+
+#define HAVE_CHUNKMEMSET_1
+#define HAVE_CHUNKMEMSET_2
+#define HAVE_CHUNKMEMSET_4
+#define HAVE_CHUNKMEMSET_8
+
+static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
+    *chunk = vld1q_dup_u8(from);
+}
+
+static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
+    *chunk = vreinterpretq_u8_s16(vdupq_n_s16(*(int16_t *)from));
+}
+
+static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
+    *chunk = vreinterpretq_u8_s32(vdupq_n_s32(*(int32_t *)from));
+}
+
+static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
+    *chunk = vcombine_u8(vld1_u8(from), vld1_u8(from));
+}
+
+#define CHUNKSIZE        chunksize_neon
+#define CHUNKCOPY        chunkcopy_neon
+#define CHUNKCOPY_SAFE   chunkcopy_safe_neon
+#define CHUNKUNROLL      chunkunroll_neon
+#define CHUNKMEMSET      chunkmemset_neon
+#define CHUNKMEMSET_SAFE chunkmemset_safe_neon
+
+static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
+    *chunk = vld1q_u8(s);
+}
+
+static inline void storechunk(uint8_t *out, chunk_t *chunk) {
+    vst1q_u8(out, *chunk);
+}
+
+#include "chunkset_tpl.h"
+
+#endif
--- a/libs/zlibng/arch/arm/crc32_acle.c
+++ b/libs/zlibng/arch/arm/crc32_acle.c
@ -5,21 +5,16 @@
 *
 */

-#ifdef __ARM_FEATURE_CRC32
-# include <arm_acle.h>
-# ifdef ZLIB_COMPAT
-#  include <zconf.h>
-# else
-#  include <zconf-ng.h>
-# endif
-# ifdef __linux__
-#  include <stddef.h>
-# endif
+#ifdef ARM_ACLE_CRC_HASH
+#ifndef _MSC_VER
+#  include <arm_acle.h>
+#endif
+#include "../../zutil.h"

 uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
-    register uint32_t c;
-    register const uint16_t *buf2;
-    register const uint32_t *buf4;
+    Z_REGISTER uint32_t c;
+    Z_REGISTER const uint16_t *buf2;
+    Z_REGISTER const uint32_t *buf4;

    c = ~crc;
    if (len && ((ptrdiff_t)buf & 1)) {
@ -36,7 +31,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
        buf4 = (const uint32_t *) buf;
    }

-# if defined(__aarch64__)
+#if defined(__aarch64__)
    if ((len > sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) {
        c = __crc32w(c, *buf4++);
        len -= sizeof(uint32_t);
@ -44,7 +39,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {

    const uint64_t *buf8 = (const uint64_t *) buf4;

-#  ifdef UNROLL_MORE
+#ifdef UNROLL_MORE
    while (len >= 4 * sizeof(uint64_t)) {
        c = __crc32d(c, *buf8++);
        c = __crc32d(c, *buf8++);
@ -52,7 +47,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
        c = __crc32d(c, *buf8++);
        len -= 4 * sizeof(uint64_t);
    }
-#  endif
+#endif

    while (len >= sizeof(uint64_t)) {
        c = __crc32d(c, *buf8++);
@ -74,7 +69,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
    }

    buf = (const unsigned char *) buf2;
-# else /* __aarch64__ */
+#else /* __aarch64__ */

 #  ifdef UNROLL_MORE
    while (len >= 8 * sizeof(uint32_t)) {
@ -103,7 +98,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
    } else {
        buf = (const unsigned char *) buf4;
    }
-# endif /* __aarch64__ */
+#endif /* __aarch64__ */

    if (len) {
        c = __crc32b(c, *buf);
@ -112,4 +107,4 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
    c = ~c;
    return c;
 }
-#endif /* __ARM_FEATURE_CRC32 */
+#endif
--- a/libs/zlibng/arch/arm/ctzl.h
+++ b/libs/zlibng/arch/arm/ctzl.h
@ -5,7 +5,7 @@

 #if defined(_MSC_VER) && !defined(__clang__)
 static __forceinline unsigned long __builtin_ctzl(unsigned long value) {
-	return _arm_clz(_arm_rbit(value));
+    return _arm_clz(_arm_rbit(value));
 }
 #endif

--- a/libs/zlibng/arch/arm/fill_window_arm.c
+++ b/libs/zlibng/arch/arm/fill_window_arm.c
@ -1,169 +0,0 @@
-/* fill_window_arm.c -- Optimized hash table shifting for ARM with support for NEON instructions
- * Copyright (C) 2017 Mika T. Lindqvist
- *
- * Authors:
- * Mika T. Lindqvist <postmaster@raasu.org>
- * Jun He <jun.he@arm.com>
- *
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-
-/* @(#) $Id$ */
-
-#include "zbuild.h"
-#include "deflate.h"
-#include "deflate_p.h"
-#include "functable.h"
-
-extern ZLIB_INTERNAL int read_buf(PREFIX3(stream) *strm, unsigned char *buf, unsigned size);
-
-#if defined(__ARM_NEON__) || defined(__ARM_NEON)
-#include <arm_neon.h>
-
-/* SIMD version of hash_chain rebase */
-static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
-    register uint16x8_t v, *p;
-    register size_t n;
-
-    size_t size = entries*sizeof(table[0]);
-    Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
-
-    Assert(sizeof(Pos) == 2, "Wrong Pos size");
-    v = vdupq_n_u16(window_size);
-
-    p = (uint16x8_t *)table;
-    n = size / (sizeof(uint16x8_t) * 8);
-    do {
-        p[0] = vqsubq_u16(p[0], v);
-        p[1] = vqsubq_u16(p[1], v);
-        p[2] = vqsubq_u16(p[2], v);
-        p[3] = vqsubq_u16(p[3], v);
-        p[4] = vqsubq_u16(p[4], v);
-        p[5] = vqsubq_u16(p[5], v);
-        p[6] = vqsubq_u16(p[6], v);
-        p[7] = vqsubq_u16(p[7], v);
-        p += 8;
-    } while (--n);
-}
-#else
-/* generic version for hash rebase */
-static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
-    unsigned int i;
-    for (i = 0; i < entries; i++) {
-        table[i] = (table[i] >= window_size) ? (table[i] - window_size) : NIL;
-    }
-}
-#endif
-
-void fill_window_arm(deflate_state *s) {
-    register unsigned n;
-    unsigned long more;  /* Amount of free space at the end of the window. */
-    unsigned int wsize = s->w_size;
-
-    Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
-
-    do {
-        more = s->window_size - s->lookahead - s->strstart;
-
-        /* If the window is almost full and there is insufficient lookahead,
-         * move the upper half to the lower one to make room in the upper half.
-         */
-        if (s->strstart >= wsize+MAX_DIST(s)) {
-            memcpy(s->window, s->window+wsize, wsize);
-            s->match_start -= wsize;
-            s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
-            s->block_start -= wsize;
-
-            /* Slide the hash table (could be avoided with 32 bit values
-               at the expense of memory usage). We slide even when level == 0
-               to keep the hash table consistent if we switch back to level > 0
-               later. (Using level 0 permanently is not an optimal usage of
-               zlib, so we don't care about this pathological case.)
-             */
-
-            slide_hash_chain(s->head, s->hash_size, wsize);
-            slide_hash_chain(s->prev, wsize, wsize);
-            more += wsize;
-        }
-        if (s->strm->avail_in == 0)
-            break;
-
-        /* If there was no sliding:
-         *    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
-         *    more == window_size - lookahead - strstart
-         * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
-         * => more >= window_size - 2*WSIZE + 2
-         * In the BIG_MEM or MMAP case (not yet supported),
-         *   window_size == input_size + MIN_LOOKAHEAD  &&
-         *   strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
-         * Otherwise, window_size == 2*WSIZE so more >= 2.
-         * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
-         */
-        Assert(more >= 2, "more < 2");
-
-        n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
-        s->lookahead += n;
-
-        /* Initialize the hash value now that we have some input: */
-        if (s->lookahead + s->insert >= MIN_MATCH) {
-            unsigned int str = s->strstart - s->insert;
-            unsigned int insert_cnt = s->insert;
-            unsigned int slen;
-
-            s->ins_h = s->window[str];
-
-            if (unlikely(s->lookahead < MIN_MATCH))
-                insert_cnt += s->lookahead - MIN_MATCH;
-            slen = insert_cnt;
-            if (str >= (MIN_MATCH - 2))
-            {
-                str += 2 - MIN_MATCH;
-                insert_cnt += MIN_MATCH - 2;
-            }
-            if (insert_cnt > 0)
-            {
-                functable.insert_string(s, str, insert_cnt);
-                s->insert -= slen;
-            }
-        }
-        /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
-         * but this is not important since only literal bytes will be emitted.
-         */
-    } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
-
-    /* If the WIN_INIT bytes after the end of the current data have never been
-     * written, then zero those bytes in order to avoid memory check reports of
-     * the use of uninitialized (or uninitialised as Julian writes) bytes by
-     * the longest match routines.  Update the high water mark for the next
-     * time through here.  WIN_INIT is set to MAX_MATCH since the longest match
-     * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
-     */
-    if (s->high_water < s->window_size) {
-        unsigned long curr = s->strstart + (unsigned long)s->lookahead;
-        unsigned long init;
-
-        if (s->high_water < curr) {
-            /* Previous high water mark below current data -- zero WIN_INIT
-             * bytes or up to end of window, whichever is less.
-             */
-            init = s->window_size - curr;
-            if (init > WIN_INIT)
-                init = WIN_INIT;
-            memset(s->window + curr, 0, init);
-            s->high_water = curr + init;
-        } else if (s->high_water < curr + WIN_INIT) {
-            /* High water mark at or above current data, but below current data
-             * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
-             * to end of window, whichever is less.
-             */
-            init = curr + WIN_INIT;
-            if (init > s->window_size)
-                init = s->window_size;
-            init -= s->high_water;
-            memset(s->window + s->high_water, 0, init);
-            s->high_water += init;
-        }
-    }
-
-    Assert((unsigned long)s->strstart <= s->window_size - MIN_LOOKAHEAD, "not enough room for search");
-}
--- a/libs/zlibng/arch/arm/insert_string_acle.c
+++ b/libs/zlibng/arch/arm/insert_string_acle.c
@ -5,49 +5,18 @@
 *
 */

-#if defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH)
-#include <arm_acle.h>
-#include "zbuild.h"
-#include "deflate.h"
-
-/* ===========================================================================
- * Insert string str in the dictionary and set match_head to the previous head
- * of the hash chain (the most recent string with same hash key). Return
- * the previous length of the hash chain.
- * IN  assertion: all calls to to INSERT_STRING are made with consecutive
- *    input characters and the first MIN_MATCH bytes of str are valid
- *    (except for the last MIN_MATCH-1 bytes of the input file).
- */
-Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count) {
-    Pos p, lp, ret;
-
-    if (unlikely(count == 0)) {
-        return s->prev[str & s->w_mask];
-    }
-
-    ret = 0;
-    lp = str + count - 1; /* last position */
-
-    for (p = str; p <= lp; p++) {
-        uint32_t val, h, hm;
-        memcpy(&val, &s->window[p], sizeof(val));
-
-        if (s->level >= TRIGGER_LEVEL)
-            val &= 0xFFFFFF;
-
-        h = __crc32w(0, val);
-        hm = h & s->hash_mask;
-
-        Pos head = s->head[hm];
-        if (head != p) {
-            s->prev[p & s->w_mask] = head;
-            s->head[hm] = p;
-            if (p == lp)
-              ret = head;
-        } else if (p == lp) {
-          ret = p;
-        }
-    }
-    return ret;
-}
+#ifdef ARM_ACLE_CRC_HASH
+#ifndef _MSC_VER
+#  include <arm_acle.h>
+#endif
+#include "../../zbuild.h"
+#include "../../deflate.h"
+
+#define UPDATE_HASH(s, h, val) \
+    h = __crc32w(0, val)
+
+#define INSERT_STRING       insert_string_acle
+#define QUICK_INSERT_STRING quick_insert_string_acle
+
+#include "../../insert_string_tpl.h"
 #endif
--- a/libs/zlibng/arch/arm/slide_neon.c
+++ b/libs/zlibng/arch/arm/slide_neon.c
@ -0,0 +1,52 @@
+/* slide_neon.c -- Optimized hash table shifting for ARM with support for NEON instructions
+ * Copyright (C) 2017-2020 Mika T. Lindqvist
+ *
+ * Authors:
+ * Mika T. Lindqvist <postmaster@raasu.org>
+ * Jun He <jun.he@arm.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#if defined(ARM_NEON_SLIDEHASH)
+#ifdef _M_ARM64
+#  include <arm64_neon.h>
+#else
+#  include <arm_neon.h>
+#endif
+#include "../../zbuild.h"
+#include "../../deflate.h"
+
+/* SIMD version of hash_chain rebase */
+static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
+    Z_REGISTER uint16x8_t v, *p;
+    Z_REGISTER size_t n;
+
+    size_t size = entries*sizeof(table[0]);
+    Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
+
+    Assert(sizeof(Pos) == 2, "Wrong Pos size");
+    v = vdupq_n_u16(window_size);
+
+    p = (uint16x8_t *)table;
+    n = size / (sizeof(uint16x8_t) * 8);
+    do {
+        p[0] = vqsubq_u16(p[0], v);
+        p[1] = vqsubq_u16(p[1], v);
+        p[2] = vqsubq_u16(p[2], v);
+        p[3] = vqsubq_u16(p[3], v);
+        p[4] = vqsubq_u16(p[4], v);
+        p[5] = vqsubq_u16(p[5], v);
+        p[6] = vqsubq_u16(p[6], v);
+        p[7] = vqsubq_u16(p[7], v);
+        p += 8;
+    } while (--n);
+}
+
+Z_INTERNAL void slide_hash_neon(deflate_state *s) {
+    unsigned int wsize = s->w_size;
+
+    slide_hash_chain(s->head, HASH_SIZE, wsize);
+    slide_hash_chain(s->prev, wsize, wsize);
+}
+#endif
--- a/libs/zlibng/arch/power/Makefile.in
+++ b/libs/zlibng/arch/power/Makefile.in
@ -0,0 +1,49 @@
+# Makefile for POWER-specific files
+# Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
+# For conditions of distribution and use, see copyright notice in zlib.h
+
+CC=
+CFLAGS=
+SFLAGS=
+INCLUDES=
+SUFFIX=
+
+SRCDIR=.
+SRCTOP=../..
+TOPDIR=$(SRCTOP)
+
+P8FLAGS=-mcpu=power8
+
+all: power.o \
+     power.lo \
+     adler32_power8.o \
+     adler32_power8.lo \
+     slide_hash_power8.o \
+     slide_hash_power8.lo
+
+power.o:
+	$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power.c
+
+power.lo:
+	$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power.c
+
+adler32_power8.o:
+	$(CC) $(CFLAGS) $(P8FLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_power8.c
+
+adler32_power8.lo:
+	$(CC) $(SFLAGS) $(P8FLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_power8.c
+
+slide_hash_power8.o:
+	$(CC) $(CFLAGS) $(P8FLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_power8.c
+
+slide_hash_power8.lo:
+	$(CC) $(SFLAGS) $(P8FLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_power8.c
+
+mostlyclean: clean
+clean:
+	rm -f *.o *.lo *~
+	rm -rf objs
+	rm -f *.gcda *.gcno *.gcov
+
+distclean:
+	rm -f Makefile
--- a/libs/zlibng/arch/power/adler32_power8.c
+++ b/libs/zlibng/arch/power/adler32_power8.c
@ -0,0 +1,154 @@
+/* Adler32 for POWER8 using VSX instructions.
+ * Copyright (C) 2020 IBM Corporation
+ * Author: Rogerio Alves <rcardoso@linux.ibm.com>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ * Calculate adler32 checksum for 16 bytes at once using POWER8+ VSX (vector)
+ * instructions.
+ *
+ * If adler32 do 1 byte at time on the first iteration s1 is s1_0 (_n means
+ * iteration n) is the initial value of adler - at start  _0 is 1 unless
+ * adler initial value is different than 1. So s1_1 = s1_0 + c[0] after
+ * the first calculation. For the iteration s1_2 = s1_1 + c[1] and so on.
+ * Hence, for iteration N, s1_N = s1_(N-1) + c[N] is the value of s1 on
+ * after iteration N.
+ *
+ * Therefore, for s2 and iteration N, s2_N = s2_0 + N*s1_N + N*c[0] +
+ * N-1*c[1] + ... + c[N]
+ *
+ * In a more general way:
+ *
+ * s1_N = s1_0 + sum(i=1 to N)c[i]
+ * s2_N = s2_0 + N*s1 + sum (i=1 to N)(N-i+1)*c[i]
+ *
+ * Where s1_N, s2_N are the values for s1, s2 after N iterations. So if we
+ * can process N-bit at time we can do this at once.
+ *
+ * Since VSX can support 16-bit vector instructions, we can process
+ * 16-bit at time using N = 16 we have:
+ *
+ * s1 = s1_16 = s1_(16-1) + c[16] = s1_0 + sum(i=1 to 16)c[i]
+ * s2 = s2_16 = s2_0 + 16*s1 + sum(i=1 to 16)(16-i+1)*c[i]
+ *
+ * After the first iteration we calculate the adler32 checksum for 16 bytes.
+ *
+ * For more background about adler32 please check the RFC:
+ * https://www.ietf.org/rfc/rfc1950.txt
+ */
+
+#ifdef POWER8_VSX_ADLER32
+
+#include <altivec.h>
+#include "zbuild.h"
+#include "zutil.h"
+#include "adler32_p.h"
+
+/* Vector across sum unsigned int (saturate).  */
+inline vector unsigned int vec_sumsu(vector unsigned int __a, vector unsigned int __b) {
+    __b = vec_sld(__a, __a, 8);
+    __b = vec_add(__b, __a);
+    __a = vec_sld(__b, __b, 4);
+    __a = vec_add(__a, __b);
+
+    return __a;
+}
+
+uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, size_t len) {
+    uint32_t s1 = adler & 0xffff;
+    uint32_t s2 = (adler >> 16) & 0xffff;
+
+    /* in case user likes doing a byte at a time, keep it fast */
+    if (UNLIKELY(len == 1))
+        return adler32_len_1(s1, buf, s2);
+
+    /* If buffer is empty or len=0 we need to return adler initial value.  */
+    if (UNLIKELY(buf == NULL))
+        return 1;
+
+    /* This is faster than VSX code for len < 64.  */
+    if (len < 64)
+        return adler32_len_64(s1, buf, len, s2);
+
+    /* Use POWER VSX instructions for len >= 64. */
+    const vector unsigned int v_zeros = { 0 };
+    const vector unsigned char v_mul = {16, 15, 14, 13, 12, 11, 10, 9, 8, 7,
+         6, 5, 4, 3, 2, 1};
+    const vector unsigned char vsh = vec_splat_u8(4);
+    const vector unsigned int vmask = {0xffffffff, 0x0, 0x0, 0x0};
+    vector unsigned int vs1 = { 0 };
+    vector unsigned int vs2 = { 0 };
+    vector unsigned int vs1_save = { 0 };
+    vector unsigned int vsum1, vsum2;
+    vector unsigned char vbuf;
+    int n;
+
+    vs1[0] = s1;
+    vs2[0] = s2;
+
+    /* Do length bigger than NMAX in blocks of NMAX size.  */
+    while (len >= NMAX) {
+        len -= NMAX;
+        n = NMAX / 16;
+        do {
+            vbuf = vec_xl(0, (unsigned char *) buf);
+            vsum1 = vec_sum4s(vbuf, v_zeros); /* sum(i=1 to 16) buf[i].  */
+            /* sum(i=1 to 16) buf[i]*(16-i+1).  */
+            vsum2 = vec_msum(vbuf, v_mul, v_zeros);
+            /* Save vs1.  */
+            vs1_save = vec_add(vs1_save, vs1);
+            /* Accumulate the sums.  */
+            vs1 = vec_add(vsum1, vs1);
+            vs2 = vec_add(vsum2, vs2);
+
+            buf += 16;
+        } while (--n);
+        /* Once each block of NMAX size.  */
+        vs1 = vec_sumsu(vs1, vsum1);
+        vs1_save = vec_sll(vs1_save, vsh); /* 16*vs1_save.  */
+        vs2 = vec_add(vs1_save, vs2);
+        vs2 = vec_sumsu(vs2, vsum2);
+
+        /* vs1[0] = (s1_i + sum(i=1 to 16)buf[i]) mod 65521.  */
+        vs1[0] = vs1[0] % BASE;
+        /* vs2[0] = s2_i + 16*s1_save +
+           sum(i=1 to 16)(16-i+1)*buf[i] mod 65521.  */
+        vs2[0] = vs2[0] % BASE;
+
+        vs1 = vec_and(vs1, vmask);
+        vs2 = vec_and(vs2, vmask);
+        vs1_save = v_zeros;
+    }
+
+    /* len is less than NMAX one modulo is needed.  */
+    if (len >= 16) {
+        while (len >= 16) {
+            len -= 16;
+
+            vbuf = vec_xl(0, (unsigned char *) buf);
+
+            vsum1 = vec_sum4s(vbuf, v_zeros); /* sum(i=1 to 16) buf[i].  */
+            /* sum(i=1 to 16) buf[i]*(16-i+1).  */
+            vsum2 = vec_msum(vbuf, v_mul, v_zeros);
+            /* Save vs1.  */
+            vs1_save = vec_add(vs1_save, vs1);
+            /* Accumulate the sums.  */
+            vs1 = vec_add(vsum1, vs1);
+            vs2 = vec_add(vsum2, vs2);
+
+            buf += 16;
+        }
+        /* Since the size will be always less than NMAX we do this once.  */
+        vs1 = vec_sumsu(vs1, vsum1);
+        vs1_save = vec_sll(vs1_save, vsh); /* 16*vs1_save.  */
+        vs2 = vec_add(vs1_save, vs2);
+        vs2 = vec_sumsu(vs2, vsum2);
+    }
+    /* Copy result back to s1, s2 (mod 65521).  */
+    s1 = vs1[0] % BASE;
+    s2 = vs2[0] % BASE;
+
+    /* Process tail (len < 16).and return  */
+    return adler32_len_16(s1, buf, len, s2);
+}
+
+#endif /* POWER8_VSX_ADLER32 */
--- a/libs/zlibng/arch/power/power.c
+++ b/libs/zlibng/arch/power/power.c
@ -0,0 +1,19 @@
+/* POWER feature check
+ * Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include <sys/auxv.h>
+#include "../../zutil.h"
+
+Z_INTERNAL int power_cpu_has_arch_2_07;
+
+void Z_INTERNAL power_check_features(void) {
+    unsigned long hwcap2;
+    hwcap2 = getauxval(AT_HWCAP2);
+
+#ifdef POWER8
+    if (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+      power_cpu_has_arch_2_07 = 1;
+#endif
+}
--- a/libs/zlibng/arch/power/power.h
+++ b/libs/zlibng/arch/power/power.h
@ -0,0 +1,13 @@
+/* power.h -- check for POWER CPU features
+ * Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef POWER_H_
+#define POWER_H_
+
+extern int power_cpu_has_arch_2_07;
+
+void Z_INTERNAL power_check_features(void);
+
+#endif /* POWER_H_ */
--- a/libs/zlibng/arch/power/slide_hash_power8.c
+++ b/libs/zlibng/arch/power/slide_hash_power8.c
@ -0,0 +1,60 @@
+/* Optimized slide_hash for POWER processors
+ * Copyright (C) 2019-2020 IBM Corporation
+ * Author: Matheus Castanho <msc@linux.ibm.com>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifdef POWER8_VSX_SLIDEHASH
+
+#include <altivec.h>
+#include "zbuild.h"
+#include "deflate.h"
+
+static inline void slide_hash_power8_loop(deflate_state *s, unsigned n_elems, Pos *table_end) {
+    vector unsigned short vw, vm, *vp;
+    unsigned chunks;
+
+    /* Each vector register (chunk) corresponds to 128 bits == 8 Posf,
+     * so instead of processing each of the n_elems in the hash table
+     * individually, we can do it in chunks of 8 with vector instructions.
+     *
+     * This function is only called from slide_hash_power8(), and both calls
+     * pass n_elems as a power of 2 higher than 2^7, as defined by
+     * deflateInit2_(), so n_elems will always be a multiple of 8. */
+    chunks = n_elems >> 3;
+    Assert(n_elems % 8 == 0, "Weird hash table size!");
+
+    /* This type casting is safe since s->w_size is always <= 64KB
+     * as defined by deflateInit2_() and Posf == unsigned short */
+    vw[0] = (Pos) s->w_size;
+    vw = vec_splat(vw,0);
+
+    vp = (vector unsigned short *) table_end;
+
+    do {
+        /* Processing 8 elements at a time */
+        vp--;
+        vm = *vp;
+
+        /* This is equivalent to: m >= w_size ? m - w_size : 0
+         * Since we are using a saturated unsigned subtraction, any
+         * values that are > w_size will be set to 0, while the others
+         * will be subtracted by w_size. */
+        *vp = vec_subs(vm,vw);
+    } while (--chunks);
+}
+
+void Z_INTERNAL slide_hash_power8(deflate_state *s) {
+    unsigned int n;
+    Pos *p;
+
+    n = HASH_SIZE;
+    p = &s->head[n];
+    slide_hash_power8_loop(s,n,p);
+
+    n = s->w_size;
+    p = &s->prev[n];
+    slide_hash_power8_loop(s,n,p);
+}
+
+#endif /* POWER8_VSX_SLIDEHASH */
--- a/libs/zlibng/arch/s390/README.md
+++ b/libs/zlibng/arch/s390/README.md
@ -1,6 +1,7 @@
-This directory contains IBM Z DEFLATE CONVERSION CALL support for
-zlib-ng. In order to enable it, the following build commands should be
-used:
+# Introduction
+
+This directory contains SystemZ deflate hardware acceleration support.
+It can be enabled using the following build commands:

    $ ./configure --with-dfltcc-deflate --with-dfltcc-inflate
    $ make
@ -10,60 +11,206 @@ or
    $ cmake -DWITH_DFLTCC_DEFLATE=1 -DWITH_DFLTCC_INFLATE=1 .
    $ make

-When built like this, zlib-ng would compress in hardware on level 1,
-and in software on all other levels. Decompression will always happen
-in hardware. In order to enable DFLTCC compression for levels 1-6 (i.e.
-to make it used by default) one could add -DDFLTCC_LEVEL_MASK=0x7e to
-CFLAGS when building zlib-ng.
+When built like this, zlib-ng would compress using hardware on level 1,
+and using software on all other levels. Decompression will always happen
+in hardware. In order to enable hardware compression for levels 1-6
+(i.e. to make it used by default) one could add
+`-DDFLTCC_LEVEL_MASK=0x7e` to CFLAGS when building zlib-ng.

-Two DFLTCC compression calls produce the same results only when they
-both are made on machines of the same generation, and when the
-respective buffers have the same offset relative to the start of the
-page. Therefore care should be taken when using hardware compression
-when reproducible results are desired.
+SystemZ deflate hardware acceleration is available on [IBM z15](
+https://www.ibm.com/products/z15) and newer machines under the name [
+"Integrated Accelerator for zEnterprise Data Compression"](
+https://www.ibm.com/support/z-content-solutions/compression/). The
+programming interface to it is a machine instruction called DEFLATE
+CONVERSION CALL (DFLTCC). It is documented in Chapter 26 of [Principles
+of Operation](http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf). Both
+the code and the rest of this document refer to this feature simply as
+"DFLTCC".
+
+# Performance
+
+Performance figures are published [here](
+https://github.com/iii-i/zlib-ng/wiki/Performance-with-dfltcc-patch-applied-and-dfltcc-support-built-on-dfltcc-enabled-machine
+). The compression speed-up can be as high as 110x and the decompression
+speed-up can be as high as 15x.
+
+# Limitations
+
+Two DFLTCC compression calls with identical inputs are not guaranteed to
+produce identical outputs. Therefore care should be taken when using
+hardware compression when reproducible results are desired. In
+particular, zlib-ng-specific `zng_deflateSetParams` call allows setting
+`Z_DEFLATE_REPRODUCIBLE` parameter, which disables DFLTCC support for a
+particular stream.

 DFLTCC does not support every single zlib-ng feature, in particular:

-* inflate(Z_BLOCK) and inflate(Z_TREES)
-* inflateMark()
-* inflatePrime()
-* deflateParams() after the first deflate() call
+* `inflate(Z_BLOCK)` and `inflate(Z_TREES)`
+* `inflateMark()`
+* `inflatePrime()`
+* `inflateSyncPoint()`

 When used, these functions will either switch to software, or, in case
 this is not possible, gracefully fail.

-All SystemZ-specific code lives in a separate file and is integrated
-with the rest of zlib-ng using hook macros, which are explained below.
+# Code structure
+
+All SystemZ-specific code lives in `arch/s390` directory and is
+integrated with the rest of zlib-ng using hook macros.
+
+## Hook macros

 DFLTCC takes as arguments a parameter block, an input buffer, an output
-buffer and a window. ZALLOC_STATE, ZFREE_STATE, ZCOPY_STATE,
-ZALLOC_WINDOW and TRY_FREE_WINDOW macros encapsulate allocation details
-for the parameter block (which is allocated alongside zlib-ng state)
-and the window (which must be page-aligned).
+buffer and a window. `ZALLOC_STATE()`, `ZFREE_STATE()`, `ZCOPY_STATE()`,
+`ZALLOC_WINDOW()` and `TRY_FREE_WINDOW()` macros encapsulate allocation
+details for the parameter block (which is allocated alongside zlib-ng
+state) and the window (which must be page-aligned).

-While for inflate software and hardware window formats match, this is
-not the case for deflate. Therefore, deflateSetDictionary and
-deflateGetDictionary need special handling, which is triggered using
-the DEFLATE_SET_DICTIONARY_HOOK and DEFLATE_GET_DICTIONARY_HOOK macros.
+While inflate software and hardware window formats match, this is not
+the case for deflate. Therefore, `deflateSetDictionary()` and
+`deflateGetDictionary()` need special handling, which is triggered using
+`DEFLATE_SET_DICTIONARY_HOOK()` and `DEFLATE_GET_DICTIONARY_HOOK()`
+macros.

-deflateResetKeep() and inflateResetKeep() update the DFLTCC parameter
-block using DEFLATE_RESET_KEEP_HOOK and INFLATE_RESET_KEEP_HOOK macros.
+`deflateResetKeep()` and `inflateResetKeep()` update the DFLTCC
+parameter block using `DEFLATE_RESET_KEEP_HOOK()` and
+`INFLATE_RESET_KEEP_HOOK()` macros.

-DEFLATE_PARAMS_HOOK, INFLATE_PRIME_HOOK and INFLATE_MARK_HOOK macros
-make the unsupported deflateParams(), inflatePrime() and inflateMark()
-calls fail gracefully.
+`INFLATE_PRIME_HOOK()`, `INFLATE_MARK_HOOK()` and
+`INFLATE_SYNC_POINT_HOOK()` macros make the respective unsupported
+calls gracefully fail.
+
+`DEFLATE_PARAMS_HOOK()` implements switching between hardware and
+software compression mid-stream using `deflateParams()`. Switching
+normally entails flushing the current block, which might not be possible
+in low memory situations. `deflateParams()` uses `DEFLATE_DONE()` hook
+in order to detect and gracefully handle such situations.

 The algorithm implemented in hardware has different compression ratio
-than the one implemented in software. DEFLATE_BOUND_ADJUST_COMPLEN and
-DEFLATE_NEED_CONSERVATIVE_BOUND macros make deflateBound() return the
-correct results for the hardware implementation.
+than the one implemented in software. `DEFLATE_BOUND_ADJUST_COMPLEN()`
+and `DEFLATE_NEED_CONSERVATIVE_BOUND()` macros make `deflateBound()`
+return the correct results for the hardware implementation.

-Actual compression and decompression are handled by DEFLATE_HOOK and
-INFLATE_TYPEDO_HOOK macros. Since inflation with DFLTCC manages the
-window on its own, calling updatewindow() is suppressed using
-INFLATE_NEED_UPDATEWINDOW() macro.
+Actual compression and decompression are handled by `DEFLATE_HOOK()` and
+`INFLATE_TYPEDO_HOOK()` macros. Since inflation with DFLTCC manages the
+window on its own, calling `updatewindow()` is suppressed using
+`INFLATE_NEED_UPDATEWINDOW()` macro.

 In addition to compression, DFLTCC computes CRC-32 and Adler-32
 checksums, therefore, whenever it's used, software checksumming is
-suppressed using DEFLATE_NEED_CHECKSUM and INFLATE_NEED_CHECKSUM
+suppressed using `DEFLATE_NEED_CHECKSUM()` and `INFLATE_NEED_CHECKSUM()`
 macros.
+
+While software always produces reproducible compression results, this
+is not the case for DFLTCC. Therefore, zlib-ng users are given the
+ability to specify whether or not reproducible compression results
+are required. While it is always possible to specify this setting
+before the compression begins, it is not always possible to do so in
+the middle of a deflate stream - the exact conditions for that are
+determined by `DEFLATE_CAN_SET_REPRODUCIBLE()` macro.
+
+## SystemZ-specific code
+
+When zlib-ng is built with DFLTCC, the hooks described above are
+converted to calls to functions, which are implemented in
+`arch/s390/dfltcc_*` files. The functions can be grouped in three broad
+categories:
+
+* Base DFLTCC support, e.g. wrapping the machine instruction -
+  `dfltcc()` and allocating aligned memory - `dfltcc_alloc_state()`.
+* Translating between software and hardware data formats, e.g.
+  `dfltcc_deflate_set_dictionary()`.
+* Translating between software and hardware state machines, e.g.
+  `dfltcc_deflate()` and `dfltcc_inflate()`.
+
+The functions from the first two categories are fairly simple, however,
+various quirks in both software and hardware state machines make the
+functions from the third category quite complicated.
+
+### `dfltcc_deflate()` function
+
+This function is called by `deflate()` and has the following
+responsibilities:
+
+* Checking whether DFLTCC can be used with the current stream. If this
+  is not the case, then it returns `0`, making `deflate()` use some
+  other function in order to compress in software. Otherwise it returns
+  `1`.
+* Block management and Huffman table generation. DFLTCC ends blocks only
+  when explicitly instructed to do so by the software. Furthermore,
+  whether to use fixed or dynamic Huffman tables must also be determined
+  by the software. Since looking at data in order to gather statistics
+  would negate performance benefits, the following approach is used: the
+  first `DFLTCC_FIRST_FHT_BLOCK_SIZE` bytes are placed into a fixed
+  block, and every next `DFLTCC_BLOCK_SIZE` bytes are placed into
+  dynamic blocks.
+* Writing EOBS. Block Closing Control bit in the parameter block
+  instructs DFLTCC to write EOBS, however, certain conditions need to be
+  met: input data length must be non-zero or Continuation Flag must be
+  set. To put this in simpler terms, DFLTCC will silently refuse to
+  write EOBS if this is the only thing that it is asked to do. Since the
+  code has to be able to emit EOBS in software anyway, in order to avoid
+  tricky corner cases Block Closing Control is never used. Whether to
+  write EOBS is instead controlled by `soft_bcc` variable.
+* Triggering block post-processing. Depending on flush mode, `deflate()`
+  must perform various additional actions when a block or a stream ends.
+  `dfltcc_deflate()` informs `deflate()` about this using
+  `block_state *result` parameter.
+* Converting software state fields into hardware parameter block fields,
+  and vice versa. For example, `wrap` and Check Value Type or `bi_valid`
+  and Sub-Byte Boundary. Certain fields cannot be translated and must
+  persist untouched in the parameter block between calls, for example,
+  Continuation Flag or Continuation State Buffer.
+* Handling flush modes and low-memory situations. These aspects are
+  quite intertwined and pervasive. The general idea here is that the
+  code must not do anything in software - whether explicitly by e.g.
+  calling `send_eobs()`, or implicitly - by returning to `deflate()`
+  with certain return and `*result` values, when Continuation Flag is
+  set.
+* Ending streams. When a new block is started and flush mode is
+  `Z_FINISH`, Block Header Final parameter block bit is used to mark
+  this block as final. However, sometimes an empty final block is
+  needed, and, unfortunately, just like with EOBS, DFLTCC will silently
+  refuse to do this. The general idea of DFLTCC implementation is to
+  rely as much as possible on the existing code. Here in order to do
+  this, the code pretends that it does not support DFLTCC, which makes
+  `deflate()` call a software compression function, which writes an
+  empty final block. Whether this is required is controlled by
+  `need_empty_block` variable.
+* Error handling. This is simply converting
+  Operation-Ending-Supplemental Code to string. Errors can only happen
+  due to things like memory corruption, and therefore they don't affect
+  the `deflate()` return code.
+
+### `dfltcc_inflate()` function
+
+This function is called by `inflate()` from the `TYPEDO` state (that is,
+when all the metadata is parsed and the stream is positioned at the type
+bits of deflate block header) and it's responsible for the following:
+
+* Falling back to software when flush mode is `Z_BLOCK` or `Z_TREES`.
+  Unfortunately, there is no way to ask DFLTCC to stop decompressing on
+  block or tree boundary.
+* `inflate()` decompression loop management. This is controlled using
+  the return value, which can be either `DFLTCC_INFLATE_BREAK` or
+  `DFLTCC_INFLATE_CONTINUE`.
+* Converting software state fields into hardware parameter block fields,
+  and vice versa. For example, `whave` and History Length or `wnext` and
+  History Offset.
+* Ending streams. This instructs `inflate()` to return `Z_STREAM_END`
+  and is controlled by `last` state field.
+* Error handling. Like deflate, error handling comprises
+  Operation-Ending-Supplemental Code to string conversion. Unlike
+  deflate, errors may happen due to bad inputs, therefore they are
+  propagated to `inflate()` by setting `mode` field to `MEM` or `BAD`.
+
+# Testing
+
+Given complexity of DFLTCC machine instruction, it is not clear whether
+QEMU TCG will ever support it. At the time of writing, one has to have
+access to an IBM z15+ VM or LPAR in order to test DFLTCC support. Since
+DFLTCC is a non-privileged instruction, neither special VM/LPAR
+configuration nor root are required.
+
+Still, zlib-ng CI has a few QEMU TCG-based configurations that check
+whether fallback to software is working.
--- a/libs/zlibng/arch/s390/dfltcc_common.c
+++ b/libs/zlibng/arch/s390/dfltcc_common.c
@ -1,6 +1,6 @@
 /* dfltcc_deflate.c - IBM Z DEFLATE CONVERSION CALL general support. */

-#include "zbuild.h"
+#include "../../zbuild.h"
 #include "dfltcc_common.h"
 #include "dfltcc_detail.h"

@ -12,20 +12,31 @@
   `posix_memalign' is not an option. Thus, we overallocate and take the
   aligned portion of the buffer.
 */
-static inline int is_dfltcc_enabled(void)
-{
+static inline int is_dfltcc_enabled(void) {
    uint64_t facilities[(DFLTCC_FACILITY / 64) + 1];
-    register uint8_t r0 __asm__("r0");
+    Z_REGISTER uint8_t r0 __asm__("r0");

    memset(facilities, 0, sizeof(facilities));
    r0 = sizeof(facilities) / sizeof(facilities[0]) - 1;
-    __asm__ volatile("stfle %[facilities]\n" : [facilities] "=Q" (facilities), [r0] "+r" (r0) :: "cc");
+    /* STFLE is supported since z9-109 and only in z/Architecture mode. When
+     * compiling with -m31, gcc defaults to ESA mode, however, since the kernel
+     * is 64-bit, it's always z/Architecture mode at runtime.
+     */
+    __asm__ volatile(
+#ifndef __clang__
+                     ".machinemode push\n"
+                     ".machinemode zarch\n"
+#endif
+                     "stfle %[facilities]\n"
+#ifndef __clang__
+                     ".machinemode pop\n"
+#endif
+                     : [facilities] "=Q" (facilities), [r0] "+r" (r0) :: "cc");
    return is_bit_set((const char *)facilities, DFLTCC_FACILITY);
 }

-void ZLIB_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size)
-{
-    struct dfltcc_state *dfltcc_state = (struct dfltcc_state *)((char *)strm->state + size);
+void Z_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size) {
+    struct dfltcc_state *dfltcc_state = (struct dfltcc_state *)((char *)strm->state + ALIGN_UP(size, 8));
    struct dfltcc_qaf_param *param = (struct dfltcc_qaf_param *)&dfltcc_state->param;

    /* Initialize available functions */
@ -47,24 +58,17 @@ void ZLIB_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size)
    dfltcc_state->param.ribm = DFLTCC_RIBM;
 }

-void ZLIB_INTERNAL *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt items, uInt size)
-{
-    Assert((items * size) % 8 == 0,
-           "The size of zlib-ng state must be a multiple of 8");
-    return ZALLOC(strm, items * size + sizeof(struct dfltcc_state), sizeof(unsigned char));
+void Z_INTERNAL *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt items, uInt size) {
+    return ZALLOC(strm, ALIGN_UP(items * size, 8) + sizeof(struct dfltcc_state), sizeof(unsigned char));
 }

-void ZLIB_INTERNAL dfltcc_copy_state(void *dst, const void *src, uInt size)
-{
-    memcpy(dst, src, size + sizeof(struct dfltcc_state));
+void Z_INTERNAL dfltcc_copy_state(void *dst, const void *src, uInt size) {
+    memcpy(dst, src, ALIGN_UP(size, 8) + sizeof(struct dfltcc_state));
 }

 static const int PAGE_ALIGN = 0x1000;

-#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1))
-
-void ZLIB_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt size)
-{
+void Z_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt size) {
    void *p;
    void *w;

@ -79,8 +83,7 @@ void ZLIB_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt
    return w;
 }

-void ZLIB_INTERNAL dfltcc_free_window(PREFIX3(streamp) strm, void *w)
-{
+void Z_INTERNAL dfltcc_free_window(PREFIX3(streamp) strm, void *w) {
    if (w)
        ZFREE(strm, *(void **)((unsigned char *)w - sizeof(void *)));
 }
--- a/libs/zlibng/arch/s390/dfltcc_common.h
+++ b/libs/zlibng/arch/s390/dfltcc_common.h
@ -2,17 +2,17 @@
 #define DFLTCC_COMMON_H

 #ifdef ZLIB_COMPAT
-#include "zlib.h"
+#include "../../zlib.h"
 #else
-#include "zlib-ng.h"
+#include "../../zlib-ng.h"
 #endif
-#include "zutil.h"
+#include "../../zutil.h"

-void ZLIB_INTERNAL *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt items, uInt size);
-void ZLIB_INTERNAL dfltcc_copy_state(void *dst, const void *src, uInt size);
-void ZLIB_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size);
-void ZLIB_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt size);
-void ZLIB_INTERNAL dfltcc_free_window(PREFIX3(streamp) strm, void *w);
+void Z_INTERNAL *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt items, uInt size);
+void Z_INTERNAL dfltcc_copy_state(void *dst, const void *src, uInt size);
+void Z_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size);
+void Z_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt size);
+void Z_INTERNAL dfltcc_free_window(PREFIX3(streamp) strm, void *w);

 #define ZALLOC_STATE dfltcc_alloc_state

--- a/libs/zlibng/arch/s390/dfltcc_deflate.c
+++ b/libs/zlibng/arch/s390/dfltcc_deflate.c
@ -13,27 +13,26 @@
        $ make
 */

-#include "zbuild.h"
-#include "zutil.h"
-#include "deflate.h"
+#include "../../zbuild.h"
+#include "../../zutil.h"
+#include "../../deflate.h"
+#include "../../trees_emit.h"
 #include "dfltcc_deflate.h"
 #include "dfltcc_detail.h"

-static inline int dfltcc_are_params_ok(int level, uInt window_bits, int strategy, uint16_t level_mask)
-{
-    return (level_mask & ((uint16_t)1 << level)) != 0 &&
-        (window_bits == HB_BITS) &&
-        (strategy == Z_FIXED || strategy == Z_DEFAULT_STRATEGY);
-}
-
-
-int ZLIB_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm)
-{
+static inline int dfltcc_can_deflate_with_params(PREFIX3(streamp) strm, int level, uInt window_bits, int strategy,
+                                       int reproducible) {
    deflate_state *state = (deflate_state *)strm->state;
    struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);

    /* Unsupported compression settings */
-    if (!dfltcc_are_params_ok(state->level, state->w_bits, state->strategy, dfltcc_state->level_mask))
+    if ((dfltcc_state->level_mask & (1 << level)) == 0)
+        return 0;
+    if (window_bits != HB_BITS)
+        return 0;
+    if (strategy != Z_FIXED && strategy != Z_DEFAULT_STRATEGY)
+        return 0;
+    if (reproducible)
        return 0;

    /* Unsupported hardware */
@ -45,8 +44,13 @@ int ZLIB_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm)
    return 1;
 }

-static inline void dfltcc_gdht(PREFIX3(streamp) strm)
-{
+int Z_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm) {
+    deflate_state *state = (deflate_state *)strm->state;
+
+    return dfltcc_can_deflate_with_params(strm, state->level, state->w_bits, state->strategy, state->reproducible);
+}
+
+static inline void dfltcc_gdht(PREFIX3(streamp) strm) {
    deflate_state *state = (deflate_state *)strm->state;
    struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
    size_t avail_in = strm->avail_in;
@ -54,8 +58,7 @@ static inline void dfltcc_gdht(PREFIX3(streamp) strm)
    dfltcc(DFLTCC_GDHT, param, NULL, NULL, &strm->next_in, &avail_in, NULL);
 }

-static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm)
-{
+static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm) {
    deflate_state *state = (deflate_state *)strm->state;
    struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
    size_t avail_in = strm->avail_in;
@ -72,11 +75,10 @@ static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm)
    return cc;
 }

-static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0 *param)
-{
+static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0 *param) {
    deflate_state *state = (deflate_state *)strm->state;

-    send_bits(state, bi_reverse(param->eobs >> (15 - param->eobl), param->eobl), param->eobl);
+    send_bits(state, bi_reverse(param->eobs >> (15 - param->eobl), param->eobl), param->eobl, state->bi_buf, state->bi_valid);
    flush_pending(strm);
    if (state->pending != 0) {
        /* The remaining data is located in pending_out[0:pending]. If someone
@ -93,8 +95,7 @@ static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0
 #endif
 }

-int ZLIB_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *result)
-{
+int Z_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *result) {
    deflate_state *state = (deflate_state *)strm->state;
    struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
    struct dfltcc_param_v0 *param = &dfltcc_state->param;
@ -104,31 +105,38 @@ int ZLIB_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *
    int soft_bcc;
    int no_flush;

-    if (!dfltcc_can_deflate(strm))
+    if (!dfltcc_can_deflate(strm)) {
+        /* Clear history. */
+        if (flush == Z_FULL_FLUSH)
+            param->hl = 0;
        return 0;
+    }

 again:
    masked_avail_in = 0;
    soft_bcc = 0;
    no_flush = flush == Z_NO_FLUSH;

-    /* Trailing empty block. Switch to software, except when Continuation Flag
-     * is set, which means that DFLTCC has buffered some output in the
-     * parameter block and needs to be called again in order to flush it.
+    /* No input data. Return, except when Continuation Flag is set, which means
+     * that DFLTCC has buffered some output in the parameter block and needs to
+     * be called again in order to flush it.
     */
-    if (flush == Z_FINISH && strm->avail_in == 0 && !param->cf) {
-        if (param->bcf) {
-            /* A block is still open, and the hardware does not support closing
-             * blocks without adding data. Thus, close it manually.
-             */
+    if (strm->avail_in == 0 && !param->cf) {
+        /* A block is still open, and the hardware does not support closing
+         * blocks without adding data. Thus, close it manually.
+         */
+        if (!no_flush && param->bcf) {
            send_eobs(strm, param);
            param->bcf = 0;
        }
-        return 0;
-    }
-
-    if (strm->avail_in == 0 && !param->cf) {
-        *result = need_more;
+        /* Let one of deflate_* functions write a trailing empty block. */
+        if (flush == Z_FINISH)
+            return 0;
+        /* Clear history. */
+        if (flush == Z_FULL_FLUSH)
+            param->hl = 0;
+        /* Trigger block post-processing if necessary. */
+        *result = no_flush ? need_more : block_done;
        return 1;
    }

@ -154,13 +162,18 @@ again:
            send_eobs(strm, param);
            param->bcf = 0;
            dfltcc_state->block_threshold = strm->total_in + dfltcc_state->block_size;
-            if (strm->avail_out == 0) {
-                *result = need_more;
-                return 1;
-            }
        }
    }

+    /* No space for compressed data. If we proceed, dfltcc_cmpr() will return
+     * DFLTCC_CC_OP1_TOO_SHORT without buffering header bits, but we will still
+     * set BCF=1, which is wrong. Avoid complications and return early.
+     */
+    if (strm->avail_out == 0) {
+        *result = need_more;
+        return 1;
+    }
+
    /* The caller gave us too much data. Pass only one block worth of
     * uncompressed data to DFLTCC and mask the rest, so that on the next
     * iteration we start a new block.
@ -180,7 +193,7 @@ again:
    param->cvt = state->wrap == 2 ? CVT_CRC32 : CVT_ADLER32;
    if (!no_flush)
        /* We need to close a block. Always do this in software - when there is
-         * no input data, the hardware will not nohor BCC. */
+         * no input data, the hardware will not honor BCC. */
        soft_bcc = 1;
    if (flush == Z_FINISH && !param->bcf)
        /* We are about to open a BFINAL block, set Block Header Final bit
@ -195,8 +208,8 @@ again:
    param->sbb = (unsigned int)state->bi_valid;
    if (param->sbb > 0)
        *strm->next_out = (unsigned char)state->bi_buf;
-    if (param->hl)
-        param->nt = 0; /* Honor history */
+    /* Honor history and check value */
+    param->nt = 0;
    param->cv = state->wrap == 2 ? ZSWAP32(strm->adler) : strm->adler;

    /* When opening a block, choose a Huffman-Table Type */
@ -277,31 +290,60 @@ again:
   fly with deflateParams, we need to convert between hardware and software
   window formats.
 */
-int ZLIB_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strategy)
-{
+static int dfltcc_was_deflate_used(PREFIX3(streamp) strm) {
+    deflate_state *state = (deflate_state *)strm->state;
+    struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
+
+    return strm->total_in > 0 || param->nt == 0 || param->hl > 0;
+}
+
+int Z_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strategy, int *flush) {
    deflate_state *state = (deflate_state *)strm->state;
-    struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
-    struct dfltcc_param_v0 *param = &dfltcc_state->param;
    int could_deflate = dfltcc_can_deflate(strm);
-    int can_deflate = dfltcc_are_params_ok(level, state->w_bits, strategy, dfltcc_state->level_mask);
+    int can_deflate = dfltcc_can_deflate_with_params(strm, level, state->w_bits, strategy, state->reproducible);

    if (can_deflate == could_deflate)
        /* We continue to work in the same mode - no changes needed */
        return Z_OK;

-    if (strm->total_in == 0 && param->nt == 1 && param->hl == 0)
+    if (!dfltcc_was_deflate_used(strm))
        /* DFLTCC was not used yet - no changes needed */
        return Z_OK;

-    /* Switching between hardware and software is not implemented */
-    return Z_STREAM_ERROR;
+    /* For now, do not convert between window formats - simply get rid of the old data instead */
+    *flush = Z_FULL_FLUSH;
+    return Z_OK;
+}
+
+int Z_INTERNAL dfltcc_deflate_done(PREFIX3(streamp) strm, int flush) {
+    deflate_state *state = (deflate_state *)strm->state;
+    struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
+    struct dfltcc_param_v0 *param = &dfltcc_state->param;
+
+    /* When deflate(Z_FULL_FLUSH) is called with small avail_out, it might
+     * close the block without resetting the compression state. Detect this
+     * situation and return that deflation is not done.
+     */
+    if (flush == Z_FULL_FLUSH && strm->avail_out == 0)
+        return 0;
+
+    /* Return that deflation is not done if DFLTCC is used and either it
+     * buffered some data (Continuation Flag is set), or has not written EOBS
+     * yet (Block-Continuation Flag is set).
+     */
+    return !dfltcc_can_deflate(strm) || (!param->cf && !param->bcf);
+}
+
+int Z_INTERNAL dfltcc_can_set_reproducible(PREFIX3(streamp) strm, int reproducible) {
+    deflate_state *state = (deflate_state *)strm->state;
+
+    return reproducible != state->reproducible && !dfltcc_was_deflate_used(strm);
 }

 /*
   Preloading history.
 */
-static void append_history(struct dfltcc_param_v0 *param, unsigned char *history, const unsigned char *buf, uInt count)
-{
+static void append_history(struct dfltcc_param_v0 *param, unsigned char *history, const unsigned char *buf, uInt count) {
    size_t offset;
    size_t n;

@ -331,20 +373,19 @@ static void append_history(struct dfltcc_param_v0 *param, unsigned char *history
    }
 }

-int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm,
-                                                const unsigned char *dictionary, uInt dict_length)
-{
+int Z_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm,
+                                                const unsigned char *dictionary, uInt dict_length) {
    deflate_state *state = (deflate_state *)strm->state;
    struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
    struct dfltcc_param_v0 *param = &dfltcc_state->param;

    append_history(param, state->window, dictionary, dict_length);
    state->strstart = 1; /* Add FDICT to zlib header */
+    state->block_start = state->strstart; /* Make deflate_stored happy */
    return Z_OK;
 }

-int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length)
-{
+int Z_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length) {
    deflate_state *state = (deflate_state *)strm->state;
    struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
    struct dfltcc_param_v0 *param = &dfltcc_state->param;
--- a/libs/zlibng/arch/s390/dfltcc_deflate.h
+++ b/libs/zlibng/arch/s390/dfltcc_deflate.h
@ -3,12 +3,14 @@

 #include "dfltcc_common.h"

-int ZLIB_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm);
-int ZLIB_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *result);
-int ZLIB_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strategy);
-int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm,
+int Z_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm);
+int Z_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *result);
+int Z_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strategy, int *flush);
+int Z_INTERNAL dfltcc_deflate_done(PREFIX3(streamp) strm, int flush);
+int Z_INTERNAL dfltcc_can_set_reproducible(PREFIX3(streamp) strm, int reproducible);
+int Z_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm,
                                                const unsigned char *dictionary, uInt dict_length);
-int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt* dict_length);
+int Z_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt* dict_length);

 #define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \
    do { \
@ -25,15 +27,17 @@ int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned
 #define DEFLATE_RESET_KEEP_HOOK(strm) \
    dfltcc_reset((strm), sizeof(deflate_state))

-#define DEFLATE_PARAMS_HOOK(strm, level, strategy) \
+#define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) \
    do { \
        int err; \
 \
-        err = dfltcc_deflate_params((strm), (level), (strategy)); \
+        err = dfltcc_deflate_params((strm), (level), (strategy), (hook_flush)); \
        if (err == Z_STREAM_ERROR) \
            return err; \
    } while (0)

+#define DEFLATE_DONE dfltcc_deflate_done
+
 #define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \
    do { \
        if (dfltcc_can_deflate((strm))) \
@ -47,4 +51,6 @@ int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned

 #define DEFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_deflate((strm)))

+#define DEFLATE_CAN_SET_REPRODUCIBLE dfltcc_can_set_reproducible
+
 #endif
--- a/libs/zlibng/arch/s390/dfltcc_detail.h
+++ b/libs/zlibng/arch/s390/dfltcc_detail.h
@ -46,18 +46,17 @@ typedef enum {
 #define DFLTCC_FACILITY 151

 static inline dfltcc_cc dfltcc(int fn, void *param,
-                               unsigned char **op1, size_t *len1, const unsigned char **op2, size_t *len2, void *hist)
-{
+                               unsigned char **op1, size_t *len1, z_const unsigned char **op2, size_t *len2, void *hist) {
    unsigned char *t2 = op1 ? *op1 : NULL;
    size_t t3 = len1 ? *len1 : 0;
-    const unsigned char *t4 = op2 ? *op2 : NULL;
+    z_const unsigned char *t4 = op2 ? *op2 : NULL;
    size_t t5 = len2 ? *len2 : 0;
-    register int r0 __asm__("r0") = fn;
-    register void *r1 __asm__("r1") = param;
-    register unsigned char *r2 __asm__("r2") = t2;
-    register size_t r3 __asm__("r3") = t3;
-    register const unsigned char *r4 __asm__("r4") = t4;
-    register size_t r5 __asm__("r5") = t5;
+    Z_REGISTER int r0 __asm__("r0") = fn;
+    Z_REGISTER void *r1 __asm__("r1") = param;
+    Z_REGISTER unsigned char *r2 __asm__("r2") = t2;
+    Z_REGISTER size_t r3 __asm__("r3") = t3;
+    Z_REGISTER z_const unsigned char *r4 __asm__("r4") = t4;
+    Z_REGISTER size_t r5 __asm__("r5") = t5;
    int cc;

    __asm__ volatile(
@ -108,13 +107,11 @@ struct dfltcc_qaf_param {

 static_assert(sizeof(struct dfltcc_qaf_param) == 32, sizeof_struct_dfltcc_qaf_param_is_32);

-static inline int is_bit_set(const char *bits, int n)
-{
+static inline int is_bit_set(const char *bits, int n) {
    return bits[n / 8] & (1 << (7 - (n % 8)));
 }

-static inline void clear_bit(char *bits, int n)
-{
+static inline void clear_bit(char *bits, int n) {
    bits[n / 8] &= ~(1 << (7 - (n % 8)));
 }

@ -175,8 +172,7 @@ struct dfltcc_param_v0 {

 static_assert(sizeof(struct dfltcc_param_v0) == 1536, sizeof_struct_dfltcc_param_v0_is_1536);

-static inline const char *oesc_msg(char *buf, int oesc)
-{
+static inline z_const char *oesc_msg(char *buf, int oesc) {
    if (oesc == 0x00)
        return NULL; /* Successful completion */
    else {
@ -198,4 +194,6 @@ struct dfltcc_state {
    char msg[64];                      /* Buffer for strm->msg */
 };

-#define GET_DFLTCC_STATE(state) ((struct dfltcc_state *)((state) + 1))
+#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1))
+
+#define GET_DFLTCC_STATE(state) ((struct dfltcc_state *)((char *)(state) + ALIGN_UP(sizeof(*state), 8)))
--- a/libs/zlibng/arch/s390/dfltcc_inflate.c
+++ b/libs/zlibng/arch/s390/dfltcc_inflate.c
@ -13,15 +13,14 @@
        $ make
 */

-#include "zbuild.h"
-#include "zutil.h"
-#include "inftrees.h"
-#include "inflate.h"
+#include "../../zbuild.h"
+#include "../../zutil.h"
+#include "../../inftrees.h"
+#include "../../inflate.h"
 #include "dfltcc_inflate.h"
 #include "dfltcc_detail.h"

-int ZLIB_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm)
-{
+int Z_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm) {
    struct inflate_state *state = (struct inflate_state *)strm->state;
    struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);

@ -33,8 +32,7 @@ int ZLIB_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm)
    return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) && is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0);
 }

-static inline dfltcc_cc dfltcc_xpnd(PREFIX3(streamp) strm)
-{
+static inline dfltcc_cc dfltcc_xpnd(PREFIX3(streamp) strm) {
    struct inflate_state *state = (struct inflate_state *)strm->state;
    struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
    size_t avail_in = strm->avail_in;
@ -49,8 +47,7 @@ static inline dfltcc_cc dfltcc_xpnd(PREFIX3(streamp) strm)
    return cc;
 }

-dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret)
-{
+dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret) {
    struct inflate_state *state = (struct inflate_state *)strm->state;
    struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
    struct dfltcc_param_v0 *param = &dfltcc_state->param;
@ -115,16 +112,14 @@ dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int fl
        DFLTCC_INFLATE_BREAK : DFLTCC_INFLATE_CONTINUE;
 }

-int ZLIB_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm)
-{
+int Z_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm) {
    struct inflate_state *state = (struct inflate_state *)strm->state;
    struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;

    return !param->nt;
 }

-int ZLIB_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm)
-{
+int Z_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm) {
    struct inflate_state *state = (struct inflate_state *)strm->state;
    struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);

--- a/libs/zlibng/arch/s390/dfltcc_inflate.h
+++ b/libs/zlibng/arch/s390/dfltcc_inflate.h
@ -3,15 +3,15 @@

 #include "dfltcc_common.h"

-int ZLIB_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm);
+int Z_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm);
 typedef enum {
    DFLTCC_INFLATE_CONTINUE,
    DFLTCC_INFLATE_BREAK,
    DFLTCC_INFLATE_SOFTWARE,
 } dfltcc_inflate_action;
-dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret);
-int ZLIB_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm);
-int ZLIB_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm);
+dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret);
+int Z_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm);
+int Z_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm);

 #define INFLATE_RESET_KEEP_HOOK(strm) \
    dfltcc_reset((strm), sizeof(struct inflate_state))
@ -41,4 +41,9 @@ int ZLIB_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm);
        if (dfltcc_was_inflate_used((strm))) return -(1L << 16); \
    } while (0)

+#define INFLATE_SYNC_POINT_HOOK(strm) \
+    do { \
+        if (dfltcc_was_inflate_used((strm))) return Z_STREAM_ERROR; \
+    } while (0)
+
 #endif
--- a/libs/zlibng/arch/x86/INDEX
+++ b/libs/zlibng/arch/x86/INDEX
@ -1,3 +0,0 @@
-fill_window_sse.c	SSE2 optimized fill_window
-deflate_quick.c		SSE4 optimized deflate strategy for use as level 1
-crc_folding.c		SSE4 + PCLMULQDQ optimized CRC folding implementation
--- a/libs/zlibng/arch/x86/INDEX.md
+++ b/libs/zlibng/arch/x86/INDEX.md
@ -0,0 +1,8 @@
+Contents
+--------
+
+|Name|Description|
+|:-|:-|
+|deflate_quick.c|SSE4 optimized deflate strategy for use as level 1|
+|crc_folding.c|SSE4 + PCLMULQDQ optimized CRC folding implementation|
+|slide_sse2.c|SSE2 optimized slide_hash|
--- a/libs/zlibng/arch/x86/Makefile.in
+++ b/libs/zlibng/arch/x86/Makefile.in
@ -8,7 +8,9 @@ SFLAGS=
 INCLUDES=
 SUFFIX=

+AVX2FLAG=-mavx2
 SSE2FLAG=-msse2
+SSSE3FLAG=-mssse3
 SSE4FLAG=-msse4
 PCLMULFLAG=-mpclmul

@ -16,7 +18,18 @@ SRCDIR=.
 SRCTOP=../..
 TOPDIR=$(SRCTOP)

-all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo
+all: \
+	x86.o x86.lo \
+	adler32_avx.o adler32.lo \
+	adler32_ssse3.o adler32_ssse3.lo \
+	chunkset_avx.o chunkset_avx.lo \
+	chunkset_sse.o chunkset_sse.lo \
+	compare258_avx.o compare258_avx.lo \
+	compare258_sse.o compare258_sse.lo \
+	insert_string_sse.o insert_string_sse.lo \
+	crc_folding.o crc_folding.lo \
+	slide_avx.o slide_avx.lo \
+	slide_sse.o slide_sse.lo

 x86.o:
 	$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
@ -24,17 +37,29 @@ x86.o:
 x86.lo:
 	$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c

-fill_window_sse.o:
-	$(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_sse.c
+chunkset_avx.o:
+	$(CC) $(CFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_avx.c

-fill_window_sse.lo:
-	$(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_sse.c
+chunkset_avx.lo:
+	$(CC) $(SFLAGS) $(AVX2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_avx.c

-deflate_quick.o:
-	$(CC) $(CFLAGS) $(SSE4FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/deflate_quick.c
+chunkset_sse.o:
+	$(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse.c

-deflate_quick.lo:
-	$(CC) $(SFLAGS) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/deflate_quick.c
+chunkset_sse.lo:
+	$(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse.c
+
+compare258_avx.o:
+	$(CC) $(CFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_avx.c
+
+compare258_avx.lo:
+	$(CC) $(SFLAGS) $(AVX2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_avx.c
+
+compare258_sse.o:
+	$(CC) $(CFLAGS) $(SSE4FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_sse.c
+
+compare258_sse.lo:
+	$(CC) $(SFLAGS) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_sse.c

 insert_string_sse.o:
 	$(CC) $(CFLAGS) $(SSE4FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse.c
@ -48,6 +73,30 @@ crc_folding.o:
 crc_folding.lo:
 	$(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c

+slide_avx.o:
+	$(CC) $(CFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_avx.c
+
+slide_avx.lo:
+	$(CC) $(SFLAGS) $(AVX2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_avx.c
+
+slide_sse.o:
+	$(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c
+
+slide_sse.lo:
+	$(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c
+
+adler32_avx.o: $(SRCDIR)/adler32_avx.c
+	$(CC) $(CFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_avx.c
+
+adler32_avx.lo: $(SRCDIR)/adler32_avx.c
+	$(CC) $(SFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_avx.c
+
+adler32_ssse3.o: $(SRCDIR)/adler32_ssse3.c
+	$(CC) $(CFLAGS) $(SSSE3FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_ssse3.c
+
+adler32_ssse3.lo: $(SRCDIR)/adler32_ssse3.c
+	$(CC) $(SFLAGS) $(SSSE3FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_ssse3.c
+
 mostlyclean: clean
 clean:
 	rm -f *.o *.lo *~
--- a/libs/zlibng/arch/x86/adler32_avx.c
+++ b/libs/zlibng/arch/x86/adler32_avx.c
@ -0,0 +1,117 @@
+/* adler32.c -- compute the Adler-32 checksum of a data stream
+ * Copyright (C) 1995-2011 Mark Adler
+ * Authors:
+ *   Brian Bockelman <bockelman@gmail.com>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "../../zbuild.h"
+#include "../../zutil.h"
+
+#include "../../adler32_p.h"
+
+#include <immintrin.h>
+
+#ifdef X86_AVX2_ADLER32
+
+Z_INTERNAL uint32_t adler32_avx2(uint32_t adler, const unsigned char *buf, size_t len) {
+    uint32_t sum2;
+
+     /* split Adler-32 into component sums */
+    sum2 = (adler >> 16) & 0xffff;
+    adler &= 0xffff;
+
+    /* in case user likes doing a byte at a time, keep it fast */
+    if (UNLIKELY(len == 1))
+        return adler32_len_1(adler, buf, sum2);
+
+    /* initial Adler-32 value (deferred check for len == 1 speed) */
+    if (UNLIKELY(buf == NULL))
+        return 1L;
+
+    /* in case short lengths are provided, keep it somewhat fast */
+    if (UNLIKELY(len < 16))
+        return adler32_len_16(adler, buf, len, sum2);
+
+    uint32_t ALIGNED_(32) s1[8], s2[8];
+
+    memset(s1, 0, sizeof(s1)); s1[7] = adler; // TODO: would a masked load be faster?
+    memset(s2, 0, sizeof(s2)); s2[7] = sum2;
+
+    char ALIGNED_(32) dot1[32] = \
+        {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+    __m256i dot1v = _mm256_load_si256((__m256i*)dot1);
+    char ALIGNED_(32) dot2[32] = \
+        {32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
+         16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
+    __m256i dot2v = _mm256_load_si256((__m256i*)dot2);
+    short ALIGNED_(32) dot3[16] = \
+        {1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1};
+    __m256i dot3v = _mm256_load_si256((__m256i*)dot3);
+
+    // We will need to multiply by
+    char ALIGNED_(32) shift[16] = {5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    __m128i shiftv = _mm_load_si128((__m128i*)shift);
+
+    while (len >= 32) {
+       __m256i vs1 = _mm256_load_si256((__m256i*)s1);
+       __m256i vs2 = _mm256_load_si256((__m256i*)s2);
+       __m256i vs1_0 = vs1;
+
+       int k = (len < NMAX ? (int)len : NMAX);
+       k -= k % 32;
+       len -= k;
+
+       while (k >= 32) {
+           /*
+              vs1 = adler + sum(c[i])
+              vs2 = sum2 + 16 vs1 + sum( (16-i+1) c[i] )
+           */
+           __m256i vbuf = _mm256_loadu_si256((__m256i*)buf);
+           buf += 32;
+           k -= 32;
+
+           __m256i v_short_sum1 = _mm256_maddubs_epi16(vbuf, dot1v); // multiply-add, resulting in 8 shorts.
+           __m256i vsum1 = _mm256_madd_epi16(v_short_sum1, dot3v);   // sum 8 shorts to 4 int32_t;
+           __m256i v_short_sum2 = _mm256_maddubs_epi16(vbuf, dot2v);
+           vs1 = _mm256_add_epi32(vsum1, vs1);
+           __m256i vsum2 = _mm256_madd_epi16(v_short_sum2, dot3v);
+           vs1_0 = _mm256_sll_epi32(vs1_0, shiftv);
+           vsum2 = _mm256_add_epi32(vsum2, vs2);
+           vs2   = _mm256_add_epi32(vsum2, vs1_0);
+           vs1_0 = vs1;
+       }
+
+       // At this point, we have partial sums stored in vs1 and vs2.  There are AVX512 instructions that
+       // would allow us to sum these quickly (VP4DPWSSD).  For now, just unpack and move on.
+       uint32_t ALIGNED_(32) s1_unpack[8];
+       uint32_t ALIGNED_(32) s2_unpack[8];
+
+       _mm256_store_si256((__m256i*)s1_unpack, vs1);
+       _mm256_store_si256((__m256i*)s2_unpack, vs2);
+
+       adler = (s1_unpack[0] % BASE) + (s1_unpack[1] % BASE) + (s1_unpack[2] % BASE) + (s1_unpack[3] % BASE) +
+               (s1_unpack[4] % BASE) + (s1_unpack[5] % BASE) + (s1_unpack[6] % BASE) + (s1_unpack[7] % BASE);
+       adler %= BASE;
+       s1[7] = adler;
+
+       sum2 = (s2_unpack[0] % BASE) + (s2_unpack[1] % BASE) + (s2_unpack[2] % BASE) + (s2_unpack[3] % BASE) +
+              (s2_unpack[4] % BASE) + (s2_unpack[5] % BASE) + (s2_unpack[6] % BASE) + (s2_unpack[7] % BASE);
+       sum2 %= BASE;
+       s2[7] = sum2;
+    }
+
+    while (len) {
+        len--;
+        adler += *buf++;
+        sum2 += adler;
+    }
+    adler %= BASE;
+    sum2 %= BASE;
+
+    /* return recombined sums */
+    return adler | (sum2 << 16);
+}
+
+#endif
--- a/libs/zlibng/arch/x86/adler32_ssse3.c
+++ b/libs/zlibng/arch/x86/adler32_ssse3.c
@ -0,0 +1,118 @@
+/* adler32.c -- compute the Adler-32 checksum of a data stream
+ * Copyright (C) 1995-2011 Mark Adler
+ * Authors:
+ *   Brian Bockelman <bockelman@gmail.com>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "../../zbuild.h"
+#include "../../zutil.h"
+
+#include "../../adler32_p.h"
+
+#ifdef X86_SSSE3_ADLER32
+
+#include <immintrin.h>
+
+Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, size_t len) {
+    uint32_t sum2;
+
+     /* split Adler-32 into component sums */
+    sum2 = (adler >> 16) & 0xffff;
+    adler &= 0xffff;
+
+    /* in case user likes doing a byte at a time, keep it fast */
+    if (UNLIKELY(len == 1))
+        return adler32_len_1(adler, buf, sum2);
+
+    /* initial Adler-32 value (deferred check for len == 1 speed) */
+    if (UNLIKELY(buf == NULL))
+        return 1L;
+
+    /* in case short lengths are provided, keep it somewhat fast */
+    if (UNLIKELY(len < 16))
+        return adler32_len_16(adler, buf, len, sum2);
+
+    uint32_t ALIGNED_(16) s1[4], s2[4];
+
+    s1[0] = s1[1] = s1[2] = 0; s1[3] = adler;
+    s2[0] = s2[1] = s2[2] = 0; s2[3] = sum2;
+
+    char ALIGNED_(16) dot1[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
+    __m128i dot1v = _mm_load_si128((__m128i*)dot1);
+    char ALIGNED_(16) dot2[16] = {16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
+    __m128i dot2v = _mm_load_si128((__m128i*)dot2);
+    short ALIGNED_(16) dot3[8] = {1, 1, 1, 1, 1, 1, 1, 1};
+    __m128i dot3v = _mm_load_si128((__m128i*)dot3);
+
+    // We will need to multiply by
+    //char ALIGNED_(16) shift[4] = {0, 0, 0, 4}; //{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4};
+
+    char ALIGNED_(16) shift[16] = {4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+    __m128i shiftv = _mm_load_si128((__m128i*)shift);
+
+    while (len >= 16) {
+       __m128i vs1 = _mm_load_si128((__m128i*)s1);
+       __m128i vs2 = _mm_load_si128((__m128i*)s2);
+       __m128i vs1_0 = vs1;
+
+       int k = (len < NMAX ? (int)len : NMAX);
+       k -= k % 16;
+       len -= k;
+
+       while (k >= 16) {
+           /*
+              vs1 = adler + sum(c[i])
+              vs2 = sum2 + 16 vs1 + sum( (16-i+1) c[i] )
+
+              NOTE: 256-bit equivalents are:
+                _mm256_maddubs_epi16 <- operates on 32 bytes to 16 shorts
+                _mm256_madd_epi16    <- Sums 16 shorts to 8 int32_t.
+              We could rewrite the below to use 256-bit instructions instead of 128-bit.
+           */
+           __m128i vbuf = _mm_loadu_si128((__m128i*)buf);
+           buf += 16;
+           k -= 16;
+
+           __m128i v_short_sum1 = _mm_maddubs_epi16(vbuf, dot1v); // multiply-add, resulting in 8 shorts.
+           __m128i vsum1 = _mm_madd_epi16(v_short_sum1, dot3v);  // sum 8 shorts to 4 int32_t;
+           __m128i v_short_sum2 = _mm_maddubs_epi16(vbuf, dot2v);
+           vs1 = _mm_add_epi32(vsum1, vs1);
+           __m128i vsum2 = _mm_madd_epi16(v_short_sum2, dot3v);
+           vs1_0 = _mm_sll_epi32(vs1_0, shiftv);
+           vsum2 = _mm_add_epi32(vsum2, vs2);
+           vs2   = _mm_add_epi32(vsum2, vs1_0);
+           vs1_0 = vs1;
+       }
+
+       // At this point, we have partial sums stored in vs1 and vs2.  There are AVX512 instructions that
+       // would allow us to sum these quickly (VP4DPWSSD).  For now, just unpack and move on.
+
+       uint32_t ALIGNED_(16) s1_unpack[4];
+       uint32_t ALIGNED_(16) s2_unpack[4];
+
+       _mm_store_si128((__m128i*)s1_unpack, vs1);
+       _mm_store_si128((__m128i*)s2_unpack, vs2);
+
+       adler = (s1_unpack[0] % BASE) + (s1_unpack[1] % BASE) + (s1_unpack[2] % BASE) + (s1_unpack[3] % BASE);
+       adler %= BASE;
+       s1[3] = adler;
+
+       sum2 = (s2_unpack[0] % BASE) + (s2_unpack[1] % BASE) + (s2_unpack[2] % BASE) + (s2_unpack[3] % BASE);
+       sum2 %= BASE;
+       s2[3] = sum2;
+    }
+
+    while (len) {
+        len--;
+        adler += *buf++;
+        sum2 += adler;
+    }
+    adler %= BASE;
+    sum2 %= BASE;
+
+    /* return recombined sums */
+    return adler | (sum2 << 16);
+}
+
+#endif
--- a/libs/zlibng/arch/x86/chunkset_avx.c
+++ b/libs/zlibng/arch/x86/chunkset_avx.c
@ -0,0 +1,50 @@
+/* chunkset_avx.c -- AVX inline functions to copy small data chunks.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include "zbuild.h"
+#include "zutil.h"
+
+#ifdef X86_AVX_CHUNKSET
+#include <immintrin.h>
+
+typedef __m256i chunk_t;
+
+#define HAVE_CHUNKMEMSET_1
+#define HAVE_CHUNKMEMSET_2
+#define HAVE_CHUNKMEMSET_4
+#define HAVE_CHUNKMEMSET_8
+
+static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
+    *chunk = _mm256_set1_epi8(*(int8_t *)from);
+}
+
+static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
+    *chunk = _mm256_set1_epi16(*(int16_t *)from);
+}
+
+static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
+    *chunk = _mm256_set1_epi32(*(int32_t *)from);
+}
+
+static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
+    *chunk = _mm256_set1_epi64x(*(int64_t *)from);
+}
+
+static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
+    *chunk = _mm256_loadu_si256((__m256i *)s);
+}
+
+static inline void storechunk(uint8_t *out, chunk_t *chunk) {
+    _mm256_storeu_si256((__m256i *)out, *chunk);
+}
+
+#define CHUNKSIZE        chunksize_avx
+#define CHUNKCOPY        chunkcopy_avx
+#define CHUNKCOPY_SAFE   chunkcopy_safe_avx
+#define CHUNKUNROLL      chunkunroll_avx
+#define CHUNKMEMSET      chunkmemset_avx
+#define CHUNKMEMSET_SAFE chunkmemset_safe_avx
+
+#include "chunkset_tpl.h"
+
+#endif
--- a/libs/zlibng/arch/x86/chunkset_sse.c
+++ b/libs/zlibng/arch/x86/chunkset_sse.c
@ -0,0 +1,51 @@
+/* chunkset_sse.c -- SSE inline functions to copy small data chunks.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "zbuild.h"
+#include "zutil.h"
+
+#ifdef X86_SSE2
+#include <immintrin.h>
+
+typedef __m128i chunk_t;
+
+#define HAVE_CHUNKMEMSET_1
+#define HAVE_CHUNKMEMSET_2
+#define HAVE_CHUNKMEMSET_4
+#define HAVE_CHUNKMEMSET_8
+
+static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
+    *chunk = _mm_set1_epi8(*(int8_t *)from);
+}
+
+static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
+    *chunk = _mm_set1_epi16(*(int16_t *)from);
+}
+
+static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
+    *chunk = _mm_set1_epi32(*(int32_t *)from);
+}
+
+static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
+    *chunk = _mm_set1_epi64x(*(int64_t *)from);
+}
+
+static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
+    *chunk = _mm_loadu_si128((__m128i *)s);
+}
+
+static inline void storechunk(uint8_t *out, chunk_t *chunk) {
+    _mm_storeu_si128((__m128i *)out, *chunk);
+}
+
+#define CHUNKSIZE        chunksize_sse2
+#define CHUNKCOPY        chunkcopy_sse2
+#define CHUNKCOPY_SAFE   chunkcopy_safe_sse2
+#define CHUNKUNROLL      chunkunroll_sse2
+#define CHUNKMEMSET      chunkmemset_sse2
+#define CHUNKMEMSET_SAFE chunkmemset_safe_sse2
+
+#include "chunkset_tpl.h"
+
+#endif
--- a/libs/zlibng/arch/x86/compare258_avx.c
+++ b/libs/zlibng/arch/x86/compare258_avx.c
@ -0,0 +1,67 @@
+/* compare258_avx.c -- AVX2 version of compare258
+ * Copyright Mika T. Lindqvist  <postmaster@raasu.org>
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "../../zbuild.h"
+#include "../../zutil.h"
+
+#include "fallback_builtins.h"
+
+#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
+
+#include <immintrin.h>
+#ifdef _MSC_VER
+#  include <nmmintrin.h>
+#endif
+
+/* UNALIGNED_OK, AVX2 intrinsic comparison */
+static inline uint32_t compare256_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) {
+    uint32_t len = 0;
+
+    do {
+        __m256i ymm_src0, ymm_src1, ymm_cmp;
+        ymm_src0 = _mm256_loadu_si256((__m256i*)src0);
+        ymm_src1 = _mm256_loadu_si256((__m256i*)src1);
+        ymm_cmp = _mm256_cmpeq_epi8(ymm_src0, ymm_src1); /* non-identical bytes = 00, identical bytes = FF */
+        unsigned mask = (unsigned)_mm256_movemask_epi8(ymm_cmp);
+        if (mask != 0xFFFFFFFF) {
+            uint32_t match_byte = (uint32_t)__builtin_ctz(~mask); /* Invert bits so identical = 0 */
+            return len + match_byte;
+        }
+
+        src0 += 32, src1 += 32, len += 32;
+
+        ymm_src0 = _mm256_loadu_si256((__m256i*)src0);
+        ymm_src1 = _mm256_loadu_si256((__m256i*)src1);
+        ymm_cmp = _mm256_cmpeq_epi8(ymm_src0, ymm_src1);
+        mask = (unsigned)_mm256_movemask_epi8(ymm_cmp);
+        if (mask != 0xFFFFFFFF) {
+            uint32_t match_byte = (uint32_t)__builtin_ctz(~mask);
+            return len + match_byte;
+        }
+
+        src0 += 32, src1 += 32, len += 32;
+    } while (len < 256);
+
+    return 256;
+}
+
+static inline uint32_t compare258_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) {
+    if (*(uint16_t *)src0 != *(uint16_t *)src1)
+        return (*src0 == *src1);
+
+    return compare256_unaligned_avx2_static(src0+2, src1+2) + 2;
+}
+
+Z_INTERNAL uint32_t compare258_unaligned_avx2(const unsigned char *src0, const unsigned char *src1) {
+    return compare258_unaligned_avx2_static(src0, src1);
+}
+
+#define LONGEST_MATCH   longest_match_unaligned_avx2
+#define COMPARE256      compare256_unaligned_avx2_static
+#define COMPARE258      compare258_unaligned_avx2_static
+
+#include "match_tpl.h"
+
+#endif
--- a/libs/zlibng/arch/x86/compare258_sse.c
+++ b/libs/zlibng/arch/x86/compare258_sse.c
@ -0,0 +1,74 @@
+/* compare258_sse.c -- SSE4.2 version of compare258
+ *
+ * Copyright (C) 2013 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Wajdi Feghali   <wajdi.k.feghali@intel.com>
+ *  Jim Guilford    <james.guilford@intel.com>
+ *  Vinodh Gopal    <vinodh.gopal@intel.com>
+ *     Erdinc Ozturk   <erdinc.ozturk@intel.com>
+ *  Jim Kukunas     <james.t.kukunas@linux.intel.com>
+ *
+ * Portions are Copyright (C) 2016 12Sided Technology, LLC.
+ * Author:
+ *  Phil Vachon     <pvachon@12sidedtech.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "../../zbuild.h"
+#include "../../zutil.h"
+
+#ifdef X86_SSE42_CMP_STR
+
+#include <immintrin.h>
+#ifdef _MSC_VER
+#  include <nmmintrin.h>
+#endif
+
+/* UNALIGNED_OK, SSE4.2 intrinsic comparison */
+static inline uint32_t compare256_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) {
+    uint32_t len = 0;
+
+    do {
+        #define mode _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH | _SIDD_NEGATIVE_POLARITY
+        __m128i xmm_src0, xmm_src1;
+        uint32_t ret;
+
+        xmm_src0 = _mm_loadu_si128((__m128i *)src0);
+        xmm_src1 = _mm_loadu_si128((__m128i *)src1);
+        ret = (uint32_t)_mm_cmpestri(xmm_src0, 16, xmm_src1, 16, mode);
+        if (_mm_cmpestrc(xmm_src0, 16, xmm_src1, 16, mode)) {
+            return len + ret;
+        }
+        src0 += 16, src1 += 16, len += 16;
+
+        xmm_src0 = _mm_loadu_si128((__m128i *)src0);
+        xmm_src1 = _mm_loadu_si128((__m128i *)src1);
+        ret = (uint32_t)_mm_cmpestri(xmm_src0, 16, xmm_src1, 16, mode);
+        if (_mm_cmpestrc(xmm_src0, 16, xmm_src1, 16, mode)) {
+            return len + ret;
+        }
+        src0 += 16, src1 += 16, len += 16;
+    } while (len < 256);
+
+    return 256;
+}
+
+static inline uint32_t compare258_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) {
+    if (*(uint16_t *)src0 != *(uint16_t *)src1)
+        return (*src0 == *src1);
+
+    return compare256_unaligned_sse4_static(src0+2, src1+2) + 2;
+}
+
+Z_INTERNAL uint32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1) {
+    return compare258_unaligned_sse4_static(src0, src1);
+}
+
+#define LONGEST_MATCH   longest_match_unaligned_sse4
+#define COMPARE256      compare256_unaligned_sse4_static
+#define COMPARE258      compare258_unaligned_sse4_static
+
+#include "match_tpl.h"
+
+#endif
--- a/libs/zlibng/arch/x86/crc_folding.c
+++ b/libs/zlibng/arch/x86/crc_folding.c
@ -1,5 +1,5 @@
 /*
- * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ 
+ * Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ
 * instruction.
 *
 * A white paper describing this algorithm can be found at:
@ -18,14 +18,14 @@

 #ifdef X86_PCLMULQDQ_CRC

-#include "zbuild.h"
+#include "../../zbuild.h"
 #include <inttypes.h>
 #include <immintrin.h>
 #include <wmmintrin.h>

 #include "crc_folding.h"

-ZLIB_INTERNAL void crc_fold_init(deflate_state *const s) {
+Z_INTERNAL void crc_fold_init(deflate_state *const s) {
    /* CRC_SAVE */
    _mm_storeu_si128((__m128i *)s->crc0 + 0, _mm_cvtsi32_si128(0x9db42487));
    _mm_storeu_si128((__m128i *)s->crc0 + 1, _mm_setzero_si128());
@ -227,9 +227,10 @@ static void partial_fold(const size_t len, __m128i *xmm_crc0, __m128i *xmm_crc1,
    *xmm_crc3 = _mm_castps_si128(ps_res);
 }

-ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, const unsigned char *src, long len) {
+Z_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, const unsigned char *src, long len) {
    unsigned long algn_diff;
    __m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3;
+    char ALIGNED_(16) partial_buf[16] = { 0 };

    /* CRC_LOAD */
    __m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0);
@ -241,11 +242,14 @@ ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, con
    if (len < 16) {
        if (len == 0)
            return;
-        xmm_crc_part = _mm_loadu_si128((__m128i *)src);
+
+        memcpy(partial_buf, src, len);
+        xmm_crc_part = _mm_loadu_si128((const __m128i *)partial_buf);
+        memcpy(dst, partial_buf, len);
        goto partial;
    }

-    algn_diff = (0 - (uintptr_t)src) & 0xF;
+    algn_diff = ((uintptr_t)16 - ((uintptr_t)src & 0xF)) & 0xF;
    if (algn_diff) {
        xmm_crc_part = _mm_loadu_si128((__m128i *)src);
        _mm_storeu_si128((__m128i *)dst, xmm_crc_part);
@ -255,6 +259,8 @@ ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, con
        len -= algn_diff;

        partial_fold(algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part);
+    } else {
+        xmm_crc_part = _mm_setzero_si128();
    }

    while ((len -= 64) >= 0) {
@ -305,7 +311,7 @@ ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, con
            goto done;

        dst += 48;
-        xmm_crc_part = _mm_load_si128((__m128i *)src + 3);
+        memcpy(&xmm_crc_part, (__m128i *)src + 3, len);
    } else if (len + 32 >= 0) {
        len += 32;

@ -324,7 +330,7 @@ ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, con
            goto done;

        dst += 32;
-        xmm_crc_part = _mm_load_si128((__m128i *)src + 2);
+        memcpy(&xmm_crc_part, (__m128i *)src + 2, len);
    } else if (len + 48 >= 0) {
        len += 48;

@ -340,16 +346,18 @@ ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, con
            goto done;

        dst += 16;
-        xmm_crc_part = _mm_load_si128((__m128i *)src + 1);
+        memcpy(&xmm_crc_part, (__m128i *)src + 1, len);
    } else {
        len += 64;
        if (len == 0)
            goto done;
-        xmm_crc_part = _mm_load_si128((__m128i *)src);
+        memcpy(&xmm_crc_part, src, len);
    }

+    _mm_storeu_si128((__m128i *)partial_buf, xmm_crc_part);
+    memcpy(dst, partial_buf, len);
+
 partial:
-    _mm_storeu_si128((__m128i *)dst, xmm_crc_part);
    partial_fold(len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part);
 done:
    /* CRC_SAVE */
@ -377,7 +385,7 @@ static const unsigned ALIGNED_(16) crc_mask2[4] = {
    0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
 };

-uint32_t ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s) {
+uint32_t Z_INTERNAL crc_fold_512to32(deflate_state *const s) {
    const __m128i xmm_mask  = _mm_load_si128((__m128i *)crc_mask);
    const __m128i xmm_mask2 = _mm_load_si128((__m128i *)crc_mask2);

@ -447,4 +455,3 @@ uint32_t ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s) {
 }

 #endif
-
--- a/libs/zlibng/arch/x86/crc_folding.h
+++ b/libs/zlibng/arch/x86/crc_folding.h
@ -10,10 +10,10 @@
 #ifndef CRC_FOLDING_H_
 #define CRC_FOLDING_H_

-#include "deflate.h"
+#include "../../deflate.h"

-ZLIB_INTERNAL void crc_fold_init(deflate_state *const);
-ZLIB_INTERNAL uint32_t crc_fold_512to32(deflate_state *const);
-ZLIB_INTERNAL void crc_fold_copy(deflate_state *const, unsigned char *, const unsigned char *, long);
+Z_INTERNAL void crc_fold_init(deflate_state *const);
+Z_INTERNAL uint32_t crc_fold_512to32(deflate_state *const);
+Z_INTERNAL void crc_fold_copy(deflate_state *const, unsigned char *, const unsigned char *, long);

 #endif
--- a/libs/zlibng/arch/x86/ctzl.h
+++ b/libs/zlibng/arch/x86/ctzl.h
@ -1,25 +0,0 @@
-#ifndef X86_CTZL_H
-#define X86_CTZL_H
-
-#include <intrin.h>
-#ifdef X86_CPUID
-# include "x86.h"
-#endif
-
-#if defined(_MSC_VER) && !defined(__clang__)
-/* This is not a general purpose replacement for __builtin_ctzl. The function expects that value is != 0
- * Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward is not checked
- */
-static __forceinline unsigned long __builtin_ctzl(unsigned long value)
-{
-#ifdef X86_CPUID
-	if (x86_cpu_has_tzcnt)
-		return _tzcnt_u32(value);
-#endif
-	unsigned long trailing_zero;
-	_BitScanForward(&trailing_zero, value);
-	return trailing_zero;
-}
-#endif
-
-#endif
--- a/libs/zlibng/arch/x86/deflate_quick.c
+++ b/libs/zlibng/arch/x86/deflate_quick.c
--- a/libs/zlibng/arch/x86/fill_window_sse.c
+++ b/libs/zlibng/arch/x86/fill_window_sse.c
@ -1,175 +0,0 @@
-/*
- * Fill Window with SSE2-optimized hash shifting
- *
- * Copyright (C) 2013 Intel Corporation
- * Authors:
- *  Arjan van de Ven    <arjan@linux.intel.com>
- *  Jim Kukunas         <james.t.kukunas@linux.intel.com>
- *
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
-#ifdef X86_SSE2
-
-#include "zbuild.h"
-#include <immintrin.h>
-#include "deflate.h"
-#include "deflate_p.h"
-#include "functable.h"
-
-extern int read_buf(PREFIX3(stream) *strm, unsigned char *buf, unsigned size);
-
-ZLIB_INTERNAL void fill_window_sse(deflate_state *s) {
-    const __m128i xmm_wsize = _mm_set1_epi16(s->w_size);
-
-    register unsigned n;
-    register Pos *p;
-    unsigned more;    /* Amount of free space at the end of the window. */
-    unsigned int wsize = s->w_size;
-
-    Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
-
-    do {
-        more = (unsigned)(s->window_size -(unsigned long)s->lookahead -(unsigned long)s->strstart);
-
-        /* Deal with !@#$% 64K limit: */
-        if (sizeof(int) <= 2) {
-            if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
-                more = wsize;
-
-            } else if (more == (unsigned)(-1)) {
-                /* Very unlikely, but possible on 16 bit machine if
-                 * strstart == 0 && lookahead == 1 (input done a byte at time)
-                 */
-                more--;
-            }
-        }
-
-        /* If the window is almost full and there is insufficient lookahead,
-         * move the upper half to the lower one to make room in the upper half.
-         */
-        if (s->strstart >= wsize+MAX_DIST(s)) {
-            memcpy(s->window, s->window+wsize, (unsigned)wsize);
-            s->match_start = (s->match_start >= wsize) ? s->match_start - wsize : 0;
-            s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
-            s->block_start -= (long) wsize;
-
-            /* Slide the hash table (could be avoided with 32 bit values
-               at the expense of memory usage). We slide even when level == 0
-               to keep the hash table consistent if we switch back to level > 0
-               later. (Using level 0 permanently is not an optimal usage of
-               zlib, so we don't care about this pathological case.)
-             */
-            n = s->hash_size;
-            p = &s->head[n];
-            p -= 8;
-            do {
-                __m128i value, result;
-
-                value = _mm_loadu_si128((__m128i *)p);
-                result = _mm_subs_epu16(value, xmm_wsize);
-                _mm_storeu_si128((__m128i *)p, result);
-
-                p -= 8;
-                n -= 8;
-            } while (n > 0);
-
-            n = wsize;
-            p = &s->prev[n];
-            p -= 8;
-            do {
-                __m128i value, result;
-
-                value = _mm_loadu_si128((__m128i *)p);
-                result = _mm_subs_epu16(value, xmm_wsize);
-                _mm_storeu_si128((__m128i *)p, result);
-
-                p -= 8;
-                n -= 8;
-            } while (n > 0);
-            more += wsize;
-        }
-        if (s->strm->avail_in == 0) break;
-
-        /* If there was no sliding:
-         *    strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
-         *    more == window_size - lookahead - strstart
-         * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
-         * => more >= window_size - 2*WSIZE + 2
-         * In the BIG_MEM or MMAP case (not yet supported),
-         *   window_size == input_size + MIN_LOOKAHEAD  &&
-         *   strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
-         * Otherwise, window_size == 2*WSIZE so more >= 2.
-         * If there was sliding, more >= WSIZE. So in all cases, more >= 2.
-         */
-        Assert(more >= 2, "more < 2");
-
-        n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
-        s->lookahead += n;
-
-        /* Initialize the hash value now that we have some input: */
-        if (s->lookahead + s->insert >= MIN_MATCH) {
-            unsigned int str = s->strstart - s->insert;
-            s->ins_h = s->window[str];
-            if (str >= 1)
-                functable.insert_string(s, str + 2 - MIN_MATCH, 1);
-#if MIN_MATCH != 3
-#error Call insert_string() MIN_MATCH-3 more times
-            while (s->insert) {
-                functable.insert_string(s, str, 1);
-                str++;
-                s->insert--;
-                if (s->lookahead + s->insert < MIN_MATCH)
-                    break;
-            }
-#else
-            unsigned int count;
-            if (unlikely(s->lookahead == 1)){
-                count = s->insert - 1;
-            }else{
-                count = s->insert;
-            }
-            functable.insert_string(s, str, count);
-            s->insert -= count;
-#endif
-        }
-        /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
-         * but this is not important since only literal bytes will be emitted.
-         */
-    } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
-
-    /* If the WIN_INIT bytes after the end of the current data have never been
-     * written, then zero those bytes in order to avoid memory check reports of
-     * the use of uninitialized (or uninitialised as Julian writes) bytes by
-     * the longest match routines.  Update the high water mark for the next
-     * time through here.  WIN_INIT is set to MAX_MATCH since the longest match
-     * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
-     */
-    if (s->high_water < s->window_size) {
-        unsigned long curr = s->strstart + (unsigned long)(s->lookahead);
-        unsigned long init;
-
-        if (s->high_water < curr) {
-            /* Previous high water mark below current data -- zero WIN_INIT
-             * bytes or up to end of window, whichever is less.
-             */
-            init = s->window_size - curr;
-            if (init > WIN_INIT)
-                init = WIN_INIT;
-            memset(s->window + curr, 0, (unsigned)init);
-            s->high_water = curr + init;
-        } else if (s->high_water < (unsigned long)curr + WIN_INIT) {
-            /* High water mark at or above current data, but below current data
-             * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
-             * to end of window, whichever is less.
-             */
-            init = (unsigned long)curr + WIN_INIT - s->high_water;
-            if (init > s->window_size - s->high_water)
-                init = s->window_size - s->high_water;
-            memset(s->window + s->high_water, 0, (unsigned)init);
-            s->high_water += init;
-        }
-    }
-
-    Assert((unsigned long)s->strstart <= s->window_size - MIN_LOOKAHEAD, "not enough room for search");
-}
-#endif
--- a/libs/zlibng/arch/x86/insert_string_sse.c
+++ b/libs/zlibng/arch/x86/insert_string_sse.c
@ -5,52 +5,42 @@
 *
 */

-#include "zbuild.h"
-#include "deflate.h"
-
-/* ===========================================================================
- * Insert string str in the dictionary and set match_head to the previous head
- * of the hash chain (the most recent string with same hash key). Return
- * the previous length of the hash chain.
- * IN  assertion: all calls to to INSERT_STRING are made with consecutive
- *    input characters and the first MIN_MATCH bytes of str are valid
- *    (except for the last MIN_MATCH-1 bytes of the input file).
- */
-#ifdef X86_SSE4_2_CRC_HASH
-ZLIB_INTERNAL Pos insert_string_sse(deflate_state *const s, const Pos str, unsigned int count) {
-    Pos ret = 0;
-    unsigned int idx;
-    unsigned int *ip, val, h;
-
-    for (idx = 0; idx < count; idx++) {
-        ip = (unsigned *)&s->window[str+idx];
-        memcpy(&val, ip, sizeof(val));
-        h = 0;
-
-        if (s->level >= TRIGGER_LEVEL)
-            val &= 0xFFFFFF;
-
+#include "../../zbuild.h"
+#include <immintrin.h>
 #ifdef _MSC_VER
-        h = _mm_crc32_u32(h, val);
-#elif defined(X86_SSE4_2_CRC_INTRIN)
-        h = __builtin_ia32_crc32si(h, val);
+#  include <nmmintrin.h>
+#endif
+#include "../../deflate.h"
+
+#ifdef X86_SSE42_CRC_INTRIN
+#  ifdef _MSC_VER
+#    define UPDATE_HASH(s, h, val)\
+        h = _mm_crc32_u32(h, val)
+#  else
+#    define UPDATE_HASH(s, h, val)\
+        h = __builtin_ia32_crc32si(h, val)
+#  endif
 #else
-        __asm__ __volatile__ (
-            "crc32 %1,%0\n\t"
-            : "+r" (h)
-            : "r" (val)
-        );
-#endif
-        Pos head = s->head[h & s->hash_mask];
-        if (head != str+idx) {
-            s->prev[(str+idx) & s->w_mask] = head;
-            s->head[h & s->hash_mask] = str+idx;
-            if (idx == count-1)
-              ret = head;
-        } else if (idx == count - 1) {
-          ret = str + idx;
-        }
+#  ifdef _MSC_VER
+#    define UPDATE_HASH(s, h, val) {\
+        __asm mov edx, h\
+        __asm mov eax, val\
+        __asm crc32 eax, edx\
+        __asm mov val, eax\
    }
-    return ret;
-}
+#  else
+#    define UPDATE_HASH(s, h, val) \
+        __asm__ __volatile__ (\
+            "crc32 %1,%0\n\t"\
+            : "+r" (h)\
+            : "r" (val)\
+        );
+#  endif
+#endif
+
+#define INSERT_STRING       insert_string_sse4
+#define QUICK_INSERT_STRING quick_insert_string_sse4
+
+#ifdef X86_SSE42_CRC_HASH
+#  include "../../insert_string_tpl.h"
 #endif
--- a/libs/zlibng/arch/x86/slide_avx.c
+++ b/libs/zlibng/arch/x86/slide_avx.c
@ -0,0 +1,47 @@
+/*
+ * AVX2 optimized hash slide, based on Intel's slide_sse implementation
+ *
+ * Copyright (C) 2017 Intel Corporation
+ * Authors:
+ *   Arjan van de Ven   <arjan@linux.intel.com>
+ *   Jim Kukunas        <james.t.kukunas@linux.intel.com>
+ *   Mika T. Lindqvist  <postmaster@raasu.org>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include "../../zbuild.h"
+#include "../../deflate.h"
+
+#include <immintrin.h>
+
+Z_INTERNAL void slide_hash_avx2(deflate_state *s) {
+    Pos *p;
+    unsigned n;
+    uint16_t wsize = (uint16_t)s->w_size;
+    const __m256i ymm_wsize = _mm256_set1_epi16((short)wsize);
+
+    n = HASH_SIZE;
+    p = &s->head[n] - 16;
+    do {
+        __m256i value, result;
+
+        value = _mm256_loadu_si256((__m256i *)p);
+        result= _mm256_subs_epu16(value, ymm_wsize);
+        _mm256_storeu_si256((__m256i *)p, result);
+        p -= 16;
+        n -= 16;
+    } while (n > 0);
+
+    n = wsize;
+    p = &s->prev[n] - 16;
+    do {
+        __m256i value, result;
+
+        value = _mm256_loadu_si256((__m256i *)p);
+        result= _mm256_subs_epu16(value, ymm_wsize);
+        _mm256_storeu_si256((__m256i *)p, result);
+
+        p -= 16;
+        n -= 16;
+    } while (n > 0);
+}
--- a/libs/zlibng/arch/x86/slide_sse.c
+++ b/libs/zlibng/arch/x86/slide_sse.c
@ -0,0 +1,46 @@
+/*
+ * SSE optimized hash slide
+ *
+ * Copyright (C) 2017 Intel Corporation
+ * Authors:
+ *   Arjan van de Ven   <arjan@linux.intel.com>
+ *   Jim Kukunas        <james.t.kukunas@linux.intel.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+#include "../../zbuild.h"
+#include "../../deflate.h"
+
+#include <immintrin.h>
+
+Z_INTERNAL void slide_hash_sse2(deflate_state *s) {
+    Pos *p;
+    unsigned n;
+    uint16_t wsize = (uint16_t)s->w_size;
+    const __m128i xmm_wsize = _mm_set1_epi16((short)wsize);
+
+    n = HASH_SIZE;
+    p = &s->head[n] - 8;
+    do {
+        __m128i value, result;
+
+        value = _mm_loadu_si128((__m128i *)p);
+        result= _mm_subs_epu16(value, xmm_wsize);
+        _mm_storeu_si128((__m128i *)p, result);
+        p -= 8;
+        n -= 8;
+    } while (n > 0);
+
+    n = wsize;
+    p = &s->prev[n] - 8;
+    do {
+        __m128i value, result;
+
+        value = _mm_loadu_si128((__m128i *)p);
+        result= _mm_subs_epu16(value, xmm_wsize);
+        _mm_storeu_si128((__m128i *)p, result);
+
+        p -= 8;
+        n -= 8;
+    } while (n > 0);
+}
--- a/libs/zlibng/arch/x86/x86.c
+++ b/libs/zlibng/arch/x86/x86.c
@ -8,61 +8,73 @@
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

-#include "zutil.h"
+#include "../../zutil.h"

 #ifdef _MSC_VER
-#include <intrin.h>
+#  include <intrin.h>
 #else
 // Newer versions of GCC and clang come with cpuid.h
-#include <cpuid.h>
+#  include <cpuid.h>
 #endif

-ZLIB_INTERNAL int x86_cpu_has_sse2;
-ZLIB_INTERNAL int x86_cpu_has_sse42;
-ZLIB_INTERNAL int x86_cpu_has_pclmulqdq;
-ZLIB_INTERNAL int x86_cpu_has_tzcnt;
+Z_INTERNAL int x86_cpu_has_avx2;
+Z_INTERNAL int x86_cpu_has_sse2;
+Z_INTERNAL int x86_cpu_has_ssse3;
+Z_INTERNAL int x86_cpu_has_sse42;
+Z_INTERNAL int x86_cpu_has_pclmulqdq;
+Z_INTERNAL int x86_cpu_has_tzcnt;

 static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) {
 #ifdef _MSC_VER
-	unsigned int registers[4];
-	__cpuid(registers, info);
+    unsigned int registers[4];
+    __cpuid((int *)registers, info);

-	*eax = registers[0];
-	*ebx = registers[1];
-	*ecx = registers[2];
-	*edx = registers[3];
+    *eax = registers[0];
+    *ebx = registers[1];
+    *ecx = registers[2];
+    *edx = registers[3];
 #else
-	unsigned int _eax;
-	unsigned int _ebx;
-	unsigned int _ecx;
-	unsigned int _edx;
-	__cpuid(info, _eax, _ebx, _ecx, _edx);
-	*eax = _eax;
-	*ebx = _ebx;
-	*ecx = _ecx;
-	*edx = _edx;
+    __cpuid(info, *eax, *ebx, *ecx, *edx);
 #endif
 }

-void ZLIB_INTERNAL x86_check_features(void) {
-	unsigned eax, ebx, ecx, edx;
-	unsigned maxbasic;
+static void cpuidex(int info, int subinfo, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) {
+#ifdef _MSC_VER
+    unsigned int registers[4];
+    __cpuidex((int *)registers, info, subinfo);

-	cpuid(0, &maxbasic, &ebx, &ecx, &edx);
-
-	cpuid(1 /*CPU_PROCINFO_AND_FEATUREBITS*/, &eax, &ebx, &ecx, &edx);
-
-	x86_cpu_has_sse2 = edx & 0x4000000;
-	x86_cpu_has_sse42 = ecx & 0x100000;
-	x86_cpu_has_pclmulqdq = ecx & 0x2;
-
-	if (maxbasic >= 7) {
-	  cpuid(7, &eax, &ebx, &ecx, &edx);
-
-	  // check BMI1 bit
-	  // Reference: https://software.intel.com/sites/default/files/article/405250/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family.pdf
-	  x86_cpu_has_tzcnt = ebx & 0x8;
-	} else {
-	  x86_cpu_has_tzcnt = 0;
-	}
+    *eax = registers[0];
+    *ebx = registers[1];
+    *ecx = registers[2];
+    *edx = registers[3];
+#else
+    __cpuid_count(info, subinfo, *eax, *ebx, *ecx, *edx);
+#endif
+}
+
+void Z_INTERNAL x86_check_features(void) {
+    unsigned eax, ebx, ecx, edx;
+    unsigned maxbasic;
+
+    cpuid(0, &maxbasic, &ebx, &ecx, &edx);
+
+    cpuid(1 /*CPU_PROCINFO_AND_FEATUREBITS*/, &eax, &ebx, &ecx, &edx);
+
+    x86_cpu_has_sse2 = edx & 0x4000000;
+    x86_cpu_has_ssse3 = ecx & 0x200;
+    x86_cpu_has_sse42 = ecx & 0x100000;
+    x86_cpu_has_pclmulqdq = ecx & 0x2;
+
+    if (maxbasic >= 7) {
+        cpuidex(7, 0, &eax, &ebx, &ecx, &edx);
+
+        // check BMI1 bit
+        // Reference: https://software.intel.com/sites/default/files/article/405250/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family.pdf
+        x86_cpu_has_tzcnt = ebx & 0x8;
+        // check AVX2 bit
+        x86_cpu_has_avx2 = ebx & 0x20;
+    } else {
+        x86_cpu_has_tzcnt = 0;
+        x86_cpu_has_avx2 = 0;
+    }
 }
--- a/libs/zlibng/arch/x86/x86.h
+++ b/libs/zlibng/arch/x86/x86.h
@ -1,16 +1,18 @@
- /* cpu.h -- check for CPU features
- * Copyright (C) 2013 Intel Corporation Jim Kukunas
- * For conditions of distribution and use, see copyright notice in zlib.h
- */
+/* cpu.h -- check for CPU features
+* Copyright (C) 2013 Intel Corporation Jim Kukunas
+* For conditions of distribution and use, see copyright notice in zlib.h
+*/

 #ifndef CPU_H_
 #define CPU_H_

+extern int x86_cpu_has_avx2;
 extern int x86_cpu_has_sse2;
+extern int x86_cpu_has_ssse3;
 extern int x86_cpu_has_sse42;
 extern int x86_cpu_has_pclmulqdq;
 extern int x86_cpu_has_tzcnt;

-void ZLIB_INTERNAL x86_check_features(void);
+void Z_INTERNAL x86_check_features(void);

 #endif /* CPU_H_ */
--- a/libs/zlibng/chunkset.c
+++ b/libs/zlibng/chunkset.c
@ -0,0 +1,81 @@
+/* chunkset.c -- inline functions to copy small data chunks.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "zbuild.h"
+#include "zutil.h"
+
+// We need sizeof(chunk_t) to be 8, no matter what.
+#if defined(UNALIGNED64_OK)
+typedef uint64_t chunk_t;
+#elif defined(UNALIGNED_OK)
+typedef struct chunk_t { uint32_t u32[2]; } chunk_t;
+#else
+typedef struct chunk_t { uint8_t u8[8]; } chunk_t;
+#endif
+
+#define HAVE_CHUNKMEMSET_1
+#define HAVE_CHUNKMEMSET_4
+#define HAVE_CHUNKMEMSET_8
+
+static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
+#if defined(UNALIGNED64_OK)
+    *chunk = 0x0101010101010101 * (uint8_t)*from;
+#elif defined(UNALIGNED_OK)
+    chunk->u32[0] = 0x01010101 * (uint8_t)*from;
+    chunk->u32[1] = chunk->u32[0];
+#else
+    memset(chunk, *from, sizeof(chunk_t));
+#endif
+}
+
+static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
+#if defined(UNALIGNED64_OK)
+    uint32_t half_chunk;
+    half_chunk = *(uint32_t *)from;
+    *chunk = 0x0000000100000001 * (uint64_t)half_chunk;
+#elif defined(UNALIGNED_OK)
+    chunk->u32[0] = *(uint32_t *)from;
+    chunk->u32[1] = chunk->u32[0];
+#else
+    uint8_t *chunkptr = (uint8_t *)chunk;
+    memcpy(chunkptr, from, 4);
+    memcpy(chunkptr+4, from, 4);
+#endif
+}
+
+static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
+#if defined(UNALIGNED64_OK)
+    *chunk = *(uint64_t *)from;
+#elif defined(UNALIGNED_OK)
+    uint32_t* p = (uint32_t *)from;
+    chunk->u32[0] = p[0];
+    chunk->u32[1] = p[1];
+#else
+    memcpy(chunk, from, sizeof(chunk_t));
+#endif
+}
+
+static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
+    chunkmemset_8((uint8_t *)s, chunk);
+}
+
+static inline void storechunk(uint8_t *out, chunk_t *chunk) {
+#if defined(UNALIGNED64_OK)
+    *(uint64_t *)out = *chunk;
+#elif defined(UNALIGNED_OK)
+    ((uint32_t *)out)[0] = chunk->u32[0];
+    ((uint32_t *)out)[1] = chunk->u32[1];
+#else
+    memcpy(out, chunk, sizeof(chunk_t));
+#endif
+}
+
+#define CHUNKSIZE        chunksize_c
+#define CHUNKCOPY        chunkcopy_c
+#define CHUNKCOPY_SAFE   chunkcopy_safe_c
+#define CHUNKUNROLL      chunkunroll_c
+#define CHUNKMEMSET      chunkmemset_c
+#define CHUNKMEMSET_SAFE chunkmemset_safe_c
+
+#include "chunkset_tpl.h"
--- a/libs/zlibng/chunkset_tpl.h
+++ b/libs/zlibng/chunkset_tpl.h
@ -0,0 +1,172 @@
+/* chunkset_tpl.h -- inline functions to copy small data chunks.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* Returns the chunk size */
+Z_INTERNAL uint32_t CHUNKSIZE(void) {
+    return sizeof(chunk_t);
+}
+
+/* Behave like memcpy, but assume that it's OK to overwrite at least
+   chunk_t bytes of output even if the length is shorter than this,
+   that the length is non-zero, and that `from` lags `out` by at least
+   sizeof chunk_t bytes (or that they don't overlap at all or simply that
+   the distance is less than the length of the copy).
+
+   Aside from better memory bus utilisation, this means that short copies
+   (chunk_t bytes or fewer) will fall straight through the loop
+   without iteration, which will hopefully make the branch prediction more
+   reliable. */
+Z_INTERNAL uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) {
+    chunk_t chunk;
+    --len;
+    loadchunk(from, &chunk);
+    storechunk(out, &chunk);
+    out += (len % sizeof(chunk_t)) + 1;
+    from += (len % sizeof(chunk_t)) + 1;
+    len /= sizeof(chunk_t);
+    while (len > 0) {
+        loadchunk(from, &chunk);
+        storechunk(out, &chunk);
+        out += sizeof(chunk_t);
+        from += sizeof(chunk_t);
+        --len;
+    }
+    return out;
+}
+
+/* Behave like chunkcopy, but avoid writing beyond of legal output. */
+Z_INTERNAL uint8_t* CHUNKCOPY_SAFE(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe) {
+    if ((safe - out) < (ptrdiff_t)sizeof(chunk_t)) {
+        int32_t use_chunk16 = sizeof(chunk_t) > 16 && (len & 16);
+        if (use_chunk16) {
+            memcpy(out, from, 16);
+            out += 16;
+            from += 16;
+        }
+        if (len & 8) {
+            memcpy(out, from, 8);
+            out += 8;
+            from += 8;
+        }
+        if (len & 4) {
+            memcpy(out, from, 4);
+            out += 4;
+            from += 4;
+        }
+        if (len & 2) {
+            memcpy(out, from, 2);
+            out += 2;
+            from += 2;
+        }
+        if (len & 1) {
+            *out++ = *from++;
+        }
+        return out;
+    }
+    return CHUNKCOPY(out, from, len);
+}
+
+/* Perform short copies until distance can be rewritten as being at least
+   sizeof chunk_t.
+
+   This assumes that it's OK to overwrite at least the first
+   2*sizeof(chunk_t) bytes of output even if the copy is shorter than this.
+   This assumption holds because inflate_fast() starts every iteration with at
+   least 258 bytes of output space available (258 being the maximum length
+   output from a single token; see inflate_fast()'s assumptions below). */
+Z_INTERNAL uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) {
+    unsigned char const *from = out - *dist;
+    chunk_t chunk;
+    while (*dist < *len && *dist < sizeof(chunk_t)) {
+        loadchunk(from, &chunk);
+        storechunk(out, &chunk);
+        out += *dist;
+        *len -= *dist;
+        *dist += *dist;
+    }
+    return out;
+}
+
+/* Copy DIST bytes from OUT - DIST into OUT + DIST * k, for 0 <= k < LEN/DIST.
+   Return OUT + LEN. */
+Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) {
+    /* Debug performance related issues when len < sizeof(uint64_t):
+       Assert(len >= sizeof(uint64_t), "chunkmemset should be called on larger chunks"); */
+    Assert(dist > 0, "cannot have a distance 0");
+
+    unsigned char *from = out - dist;
+    chunk_t chunk;
+    unsigned sz = sizeof(chunk);
+    if (len < sz) {
+        do {
+            *out++ = *from++;
+            --len;
+        } while (len != 0);
+        return out;
+    }
+
+#ifdef HAVE_CHUNKMEMSET_1
+    if (dist == 1) {
+        chunkmemset_1(from, &chunk);
+    } else
+#endif
+#ifdef HAVE_CHUNKMEMSET_2
+    if (dist == 2) {
+        chunkmemset_2(from, &chunk);
+    } else
+#endif
+#ifdef HAVE_CHUNKMEMSET_4
+    if (dist == 4) {
+        chunkmemset_4(from, &chunk);
+    } else
+#endif
+#ifdef HAVE_CHUNKMEMSET_8
+    if (dist == 8) {
+        chunkmemset_8(from, &chunk);
+    } else
+#endif
+    if (dist == sz) {
+        loadchunk(from, &chunk);
+    } else if (dist < sz) {
+        unsigned char *end = out + len - 1;
+        while (len > dist) {
+            out = CHUNKCOPY_SAFE(out, from, dist, end);
+            len -= dist;
+        }
+        if (len > 0) {
+            out = CHUNKCOPY_SAFE(out, from, len, end);
+        }
+        return out;
+    } else {
+        out = CHUNKUNROLL(out, &dist, &len);
+        return CHUNKCOPY(out, out - dist, len);
+    }
+
+    unsigned rem = len % sz;
+    len -= rem;
+    while (len) {
+        storechunk(out, &chunk);
+        out += sz;
+        len -= sz;
+    }
+
+    /* Last, deal with the case when LEN is not a multiple of SZ. */
+    if (rem)
+        memcpy(out, from, rem);
+    out += rem;
+
+    return out;
+}
+
+Z_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, unsigned dist, unsigned len, unsigned left) {
+    if (left < (unsigned)(3 * sizeof(chunk_t))) {
+        while (len > 0) {
+            *out = *(out - dist);
+            out++;
+            --len;
+        }
+        return out;
+    }
+    return CHUNKMEMSET(out, dist, len);
+}
--- a/libs/zlibng/cmake/detect-arch.c
+++ b/libs/zlibng/cmake/detect-arch.c
@ -0,0 +1,99 @@
+// archdetect.c -- Detect compiler architecture and raise preprocessor error
+//                 containing a simple arch identifier.
+// Copyright (C) 2019 Hans Kristian Rosbach
+// Licensed under the Zlib license, see LICENSE.md for details
+
+// x86_64
+#if defined(__x86_64__) || defined(_M_X64)
+    #error archfound x86_64
+
+// x86
+#elif defined(__i386) || defined(_M_IX86)
+    #error archfound i686
+
+// ARM
+#elif defined(__aarch64__) || defined(_M_ARM64)
+    #error archfound aarch64
+#elif defined(__arm__) || defined(__arm) || defined(_M_ARM) || defined(__TARGET_ARCH_ARM)
+    #if defined(__ARM64_ARCH_8__) || defined(__ARMv8__) || defined(__ARMv8_A__)
+        #error archfound armv8
+    #elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__)
+        #error archfound armv7
+    #elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6M__)
+        #error archfound armv6
+    #elif defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
+        #error archfound armv5
+    #elif defined(__ARM_ARCH_4T__) || defined(__TARGET_ARCH_5E__)
+        #error archfound armv4
+    #elif defined(__ARM_ARCH_3__) || defined(__TARGET_ARCH_3M__)
+        #error archfound armv3
+    #elif defined(__ARM_ARCH_2__)
+        #error archfound armv2
+    #endif
+
+// PowerPC
+#elif defined(__powerpc__) || defined(_ppc__) || defined(__PPC__)
+    #if defined(__64BIT__) || defined(__powerpc64__) || defined(__ppc64__)
+        #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+            #error archfound powerpc64le
+        #else 
+            #error archfound powerpc64
+        #endif
+    #else
+        #error archfound powerpc
+    #endif
+
+// --------------- Less common architectures alphabetically below ---------------
+
+// ALPHA
+#elif defined(__alpha__) || defined(__alpha)
+    #error archfound alpha
+
+// Blackfin
+#elif defined(__BFIN__)
+    #error archfound blackfin
+
+// Itanium
+#elif defined(__ia64) || defined(_M_IA64)
+    #error archfound ia64
+
+// MIPS
+#elif defined(__mips__) || defined(__mips)
+    #error archfound mips
+
+// Motorola 68000-series
+#elif defined(__m68k__)
+    #error archfound m68k
+
+// SuperH
+#elif defined(__sh__)
+    #error archfound sh
+
+// SPARC
+#elif defined(__sparc__) || defined(__sparc)
+    #if defined(__sparcv9) || defined(__sparc_v9__)
+        #error archfound sparc9
+    #elif defined(__sparcv8) || defined(__sparc_v8__)
+        #error archfound sparc8
+    #endif
+
+// SystemZ
+#elif defined(__370__)
+    #error archfound s370
+#elif defined(__s390__)
+    #error archfound s390
+#elif defined(__s390x) || defined(__zarch__)
+    #error archfound s390x
+
+// PARISC
+#elif defined(__hppa__)
+    #error archfound parisc
+
+// RS-6000
+#elif defined(__THW_RS6000)
+    #error archfound rs6000
+
+// return 'unrecognized' if we do not know what architecture this is
+#else
+    #error archfound unrecognized
+#endif
--- a/libs/zlibng/cmake/detect-arch.cmake
+++ b/libs/zlibng/cmake/detect-arch.cmake
@ -0,0 +1,93 @@
+# detect-arch.cmake -- Detect compiler architecture and set ARCH and BASEARCH
+# Copyright (C) 2019 Hans Kristian Rosbach
+# Licensed under the Zlib license, see LICENSE.md for details
+set(ARCHDETECT_FOUND TRUE)
+
+if(CMAKE_OSX_ARCHITECTURES)
+    # If multiple architectures are requested (universal build), pick only the first
+    list(GET CMAKE_OSX_ARCHITECTURES 0 ARCH)
+elseif(MSVC)
+    if("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "X86")
+        set(ARCH "i686")
+    elseif("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "x64")
+        set(ARCH "x86_64")
+    elseif("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM" OR "${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARMV7")
+        set(ARCH "arm")
+    elseif ("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM64")
+        set(ARCH "aarch64")
+    endif()
+elseif(CMAKE_CROSSCOMPILING)
+    set(ARCH ${CMAKE_C_COMPILER_TARGET})
+else()
+    # Let preprocessor parse archdetect.c and raise an error containing the arch identifier
+    enable_language(C)
+    try_run(
+        run_result_unused
+        compile_result_unused
+        ${CMAKE_CURRENT_SOURCE_DIR}
+        ${CMAKE_CURRENT_SOURCE_DIR}/cmake/detect-arch.c
+        COMPILE_OUTPUT_VARIABLE RAWOUTPUT
+        CMAKE_FLAGS CMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}
+    )
+
+    # Find basearch tag, and extract the arch word into BASEARCH variable
+    string(REGEX REPLACE ".*archfound ([a-zA-Z0-9_]+).*" "\\1" ARCH "${RAWOUTPUT}")
+    if(NOT ARCH)
+        set(ARCH unknown)
+    endif()
+endif()
+
+# Make sure we have ARCH set
+if(NOT ARCH OR ARCH STREQUAL "unknown")
+    set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
+    message(STATUS "Arch not recognized, falling back to cmake arch: '${ARCH}'")
+else()
+    message(STATUS "Arch detected: '${ARCH}'")
+endif()
+
+# Base arch detection
+if("${ARCH}" MATCHES "(x86_64|AMD64|i[3-6]86)")
+    set(BASEARCH "x86")
+    set(BASEARCH_X86_FOUND TRUE)
+elseif("${ARCH}" MATCHES "(arm(v[0-9])?|aarch64)")
+    set(BASEARCH "arm")
+    set(BASEARCH_ARM_FOUND TRUE)
+elseif("${ARCH}" MATCHES "ppc(64(le)?)?|powerpc(64(le)?)?")
+    set(BASEARCH "ppc")
+    set(BASEARCH_PPC_FOUND TRUE)
+elseif("${ARCH}" MATCHES "alpha")
+    set(BASEARCH "alpha")
+    set(BASEARCH_ALPHA_FOUND TRUE)
+elseif("${ARCH}" MATCHES "blackfin")
+    set(BASEARCH "blackfin")
+    set(BASEARCH_BLACKFIN_FOUND TRUE)
+elseif("${ARCH}" MATCHES "ia64")
+    set(BASEARCH "ia64")
+    set(BASEARCH_IA64_FOUND TRUE)
+elseif("${ARCH}" MATCHES "mips")
+    set(BASEARCH "mips")
+    set(BASEARCH_MIPS_FOUND TRUE)
+elseif("${ARCH}" MATCHES "m68k")
+    set(BASEARCH "m68k")
+    set(BASEARCH_M68K_FOUND TRUE)
+elseif("${ARCH}" MATCHES "sh")
+    set(BASEARCH "sh")
+    set(BASEARCH_SH_FOUND TRUE)
+elseif("${ARCH}" MATCHES "sparc[89]?")
+    set(BASEARCH "sparc")
+    set(BASEARCH_SPARC_FOUND TRUE)
+elseif("${ARCH}" MATCHES "s3[679]0x?")
+    set(BASEARCH "s360")
+    set(BASEARCH_S360_FOUND TRUE)
+elseif("${ARCH}" MATCHES "parisc")
+    set(BASEARCH "parisc")
+    set(BASEARCH_PARISC_FOUND TRUE)
+elseif("${ARCH}" MATCHES "rs6000")
+    set(BASEARCH "rs6000")
+    set(BASEARCH_RS6000_FOUND TRUE)
+else()
+    set(BASEARCH "x86")
+    set(BASEARCH_X86_FOUND TRUE)
+    message(STATUS "Basearch '${ARCH}' not recognized, defaulting to 'x86'.")
+endif()
+message(STATUS "Basearch of '${ARCH}' has been detected as: '${BASEARCH}'")
--- a/libs/zlibng/cmake/detect-sanitizer.cmake
+++ b/libs/zlibng/cmake/detect-sanitizer.cmake
@ -0,0 +1,123 @@
+# detect-sanitizer.cmake -- Detect supported compiler sanitizer flags
+# Licensed under the Zlib license, see LICENSE.md for details
+
+macro(check_sanitizer_support known_checks supported_checks)
+    set(available_checks "")
+
+    # Build list of supported sanitizer flags by incrementally trying compilation with
+    # known sanitizer checks
+
+    foreach(check ${known_checks})
+        if(available_checks STREQUAL "")
+            set(compile_checks "${check}")
+        else()
+            set(compile_checks "${available_checks},${check}")
+        endif()
+
+        set(CMAKE_REQUIRED_FLAGS "-fsanitize=${compile_checks}")
+
+        check_c_source_compiles("int main() { return 0; }" HAS_SANITIZER_${check}
+            FAIL_REGEX "not supported|unrecognized command|unknown option")
+
+        set(CMAKE_REQUIRED_FLAGS)
+
+        if(HAS_SANITIZER_${check})
+            set(available_checks ${compile_checks})
+        endif()
+    endforeach()
+
+    set(${supported_checks} ${available_checks})
+endmacro()
+
+macro(add_address_sanitizer)
+    set(known_checks
+        address
+        pointer-compare
+        pointer-subtract
+        )
+
+    check_sanitizer_support("${known_checks}" supported_checks)
+    if(NOT ${supported_checks} STREQUAL "")
+        message(STATUS "Address sanitizer is enabled: ${supported_checks}")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}")
+    else()
+        message(STATUS "Address sanitizer is not supported")
+    endif()
+
+    if(CMAKE_CROSSCOMPILING_EMULATOR)
+        # Only check for leak sanitizer if not cross-compiling due to qemu crash
+        message(WARNING "Leak sanitizer is not supported when cross compiling")
+    else()
+        # Leak sanitizer requires address sanitizer
+        check_sanitizer_support("leak" supported_checks)
+        if(NOT ${supported_checks} STREQUAL "")
+            message(STATUS "Leak sanitizer is enabled: ${supported_checks}")
+            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}")
+        else()
+            message(STATUS "Leak sanitizer is not supported")
+        endif()
+    endif()
+endmacro()
+
+macro(add_memory_sanitizer)
+    check_sanitizer_support("memory" supported_checks)
+    if(NOT ${supported_checks} STREQUAL "")
+        message(STATUS "Memory sanitizer is enabled: ${supported_checks}")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}")
+    else()
+        message(STATUS "Memory sanitizer is not supported")
+    endif()
+endmacro()
+
+macro(add_undefined_sanitizer)
+    set(known_checks
+        array-bounds
+        bool
+        bounds
+        builtin
+        enum
+        float-cast-overflow
+        float-divide-by-zero
+        function
+        integer-divide-by-zero
+        local-bounds
+        null
+        nonnull-attribute
+        pointer-overflow
+        return
+        returns-nonnull-attribute
+        shift
+        shift-base
+        shift-exponent
+        signed-integer-overflow
+        undefined
+        unsigned-integer-overflow
+        unsigned-shift-base
+        vla-bound
+        vptr
+        )
+
+    # Only check for alignment sanitizer flag if unaligned access is not supported
+    if(NOT UNALIGNED_OK)
+        list(APPEND known_checks alignment)
+    endif()
+    # Object size sanitizer has no effect at -O0 and produces compiler warning if enabled
+    if(NOT CMAKE_C_FLAGS MATCHES "-O0")
+        list(APPEND known_checks object-size)
+    endif()
+
+    check_sanitizer_support("${known_checks}" supported_checks)
+
+    if(NOT ${supported_checks} STREQUAL "")
+        message(STATUS "Undefined behavior sanitizer is enabled: ${supported_checks}")
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}")
+
+        # Group sanitizer flag -fsanitize=undefined will automatically add alignment, even if
+        # it is not in our sanitize flag list, so we need to explicitly disable alignment sanitizing.
+        if(UNALIGNED_OK)
+            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-sanitize=alignment")
+        endif()
+    else()
+        message(STATUS "UNdefined behavior sanitizer is not supported")
+    endif()
+endmacro()
--- a/libs/zlibng/cmake/run-and-compare.cmake
+++ b/libs/zlibng/cmake/run-and-compare.cmake
@ -0,0 +1,48 @@
+if(NOT DEFINED OUTPUT OR NOT DEFINED COMPARE OR NOT DEFINED COMMAND)
+    message(FATAL_ERROR "Run and compare arguments missing")
+endif()
+
+if(INPUT)
+    # Run command with stdin input and redirect stdout to output
+    execute_process(COMMAND ${CMAKE_COMMAND}
+        "-DCOMMAND=${COMMAND}"
+        -DINPUT=${INPUT}
+        -DOUTPUT=${OUTPUT}
+        "-DSUCCESS_EXIT=${SUCCESS_EXIT}"
+        -P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
+        RESULT_VARIABLE CMD_RESULT)
+else()
+    # Run command and redirect stdout to output
+    execute_process(COMMAND ${CMAKE_COMMAND}
+        "-DCOMMAND=${COMMAND}"
+        -DOUTPUT=${OUTPUT}
+        "-DSUCCESS_EXIT=${SUCCESS_EXIT}"
+        -P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
+        RESULT_VARIABLE CMD_RESULT)
+endif()
+
+if(CMD_RESULT)
+    message(FATAL_ERROR "Run before compare failed: ${CMD_RESULT}")
+endif()
+
+# Use configure_file to normalize line-endings
+if(IGNORE_LINE_ENDINGS)
+    configure_file(${COMPARE} ${COMPARE}.cmp NEWLINE_STYLE LF)
+    set(COMPARE ${COMPARE}.cmp)
+    configure_file(${OUTPUT} ${OUTPUT}.cmp NEWLINE_STYLE LF)
+    set(OUTPUT ${OUTPUT}.cmp)
+endif()
+
+# Compare that output is equal to specified file
+execute_process(COMMAND ${CMAKE_COMMAND}
+    -E compare_files ${COMPARE} ${OUTPUT}
+    RESULT_VARIABLE CMD_RESULT)
+
+# Delete temporary files used to normalize line-endings
+if(IGNORE_LINE_ENDINGS)
+    file(REMOVE ${COMPARE} ${OUTPUT})
+endif()
+
+if(CMD_RESULT)
+    message(FATAL_ERROR "Run compare failed: ${CMD_RESULT}")
+endif()
--- a/libs/zlibng/cmake/run-and-redirect.cmake
+++ b/libs/zlibng/cmake/run-and-redirect.cmake
@ -0,0 +1,38 @@
+# If no output is specified, discard output
+if(NOT DEFINED OUTPUT)
+    if(WIN32)
+        set(OUTPUT NUL)
+    else()
+        set(OUTPUT /dev/null)
+    endif()
+endif()
+
+if(INPUT)
+    # Check to see that input file exists
+    if(NOT EXISTS ${INPUT})
+        message(FATAL_ERROR "Cannot find input: ${INPUT}")
+    endif()
+    # Execute with both stdin and stdout file
+    execute_process(COMMAND ${COMMAND}
+        RESULT_VARIABLE CMD_RESULT
+        INPUT_FILE ${INPUT}
+        OUTPUT_FILE ${OUTPUT})
+else()
+    # Execute with only stdout file
+    execute_process(COMMAND ${COMMAND}
+        RESULT_VARIABLE CMD_RESULT
+        OUTPUT_FILE ${OUTPUT})
+endif()
+
+# Check if exit code is in list of successful exit codes
+if(SUCCESS_EXIT)
+    list(FIND SUCCESS_EXIT ${CMD_RESULT} _INDEX)
+    if (${_INDEX} GREATER -1)
+        set(CMD_RESULT 0)
+    endif()
+endif()
+
+# Check to see if successful
+if(CMD_RESULT)
+    message(FATAL_ERROR "${COMMAND} failed: ${CMD_RESULT}")
+endif()
--- a/libs/zlibng/cmake/test-compress.cmake
+++ b/libs/zlibng/cmake/test-compress.cmake
@ -0,0 +1,188 @@
+if(TARGET)
+    set(COMPRESS_TARGET ${TARGET})
+    set(DECOMPRESS_TARGET ${TARGET})
+endif()
+
+if(NOT DEFINED INPUT OR NOT DEFINED COMPRESS_TARGET OR NOT DEFINED DECOMPRESS_TARGET)
+    message(FATAL_ERROR "Compress test arguments missing")
+endif()
+
+# Set default values
+if(NOT DEFINED COMPARE)
+    set(COMPARE ON)
+endif()
+if(NOT DEFINED COMPRESS_ARGS)
+    set(COMPRESS_ARGS -c -k)
+endif()
+if(NOT DEFINED DECOMPRESS_ARGS)
+    set(DECOMPRESS_ARGS -d -c)
+endif()
+if(NOT DEFINED GZIP_VERIFY)
+    set(GZIP_VERIFY ON)
+endif()
+if(NOT DEFINED SUCCESS_EXIT)
+    set(SUCCESS_EXIT 0)
+endif()
+
+# Generate unique output path so multiple tests can be executed at the same time
+if(NOT OUTPUT)
+    # Output name based on input and unique id
+    string(RANDOM UNIQUE_ID)
+    set(OUTPUT ${INPUT}-${UNIQUE_ID})
+else()
+    # Output name appends unique id in case multiple tests with same output name
+    string(RANDOM LENGTH 6 UNIQUE_ID)
+    set(OUTPUT ${OUTPUT}-${UNIQUE_ID})
+endif()
+string(REPLACE ".gz" "" OUTPUT "${OUTPUT}")
+
+macro(cleanup)
+    # Cleanup temporary mingizip files
+    file(REMOVE ${OUTPUT}.gz ${OUTPUT}.out)
+    # Cleanup temporary gzip files
+    file(REMOVE ${OUTPUT}.gzip.gz ${OUTPUT}.gzip.out)
+endmacro()
+
+# Compress input file
+if(NOT EXISTS ${INPUT})
+    message(FATAL_ERROR "Cannot find compress input: ${INPUT}")
+endif()
+
+set(COMPRESS_COMMAND ${COMPRESS_TARGET} ${COMPRESS_ARGS})
+
+execute_process(COMMAND ${CMAKE_COMMAND}
+    "-DCOMMAND=${COMPRESS_COMMAND}"
+    -DINPUT=${INPUT}
+    -DOUTPUT=${OUTPUT}.gz
+    "-DSUCCESS_EXIT=${SUCCESS_EXIT}"
+    -P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
+    RESULT_VARIABLE CMD_RESULT)
+
+if(CMD_RESULT)
+    cleanup()
+    message(FATAL_ERROR "Compress failed: ${CMD_RESULT}")
+endif()
+
+# Decompress output
+if(NOT EXISTS ${OUTPUT}.gz)
+    cleanup()
+    message(FATAL_ERROR "Cannot find decompress input: ${OUTPUT}.gz")
+endif()
+
+set(DECOMPRESS_COMMAND ${DECOMPRESS_TARGET} ${DECOMPRESS_ARGS})
+
+execute_process(COMMAND ${CMAKE_COMMAND}
+    "-DCOMMAND=${DECOMPRESS_COMMAND}"
+    -DINPUT=${OUTPUT}.gz
+    -DOUTPUT=${OUTPUT}.out
+    "-DSUCCESS_EXIT=${SUCCESS_EXIT}"
+    -P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
+    RESULT_VARIABLE CMD_RESULT)
+
+if(CMD_RESULT)
+    cleanup()
+    message(FATAL_ERROR "Decompress failed: ${CMD_RESULT}")
+endif()
+
+if(COMPARE)
+    # Compare decompressed output with original input file
+    execute_process(COMMAND ${CMAKE_COMMAND}
+        -E compare_files ${INPUT} ${OUTPUT}.out
+        RESULT_VARIABLE CMD_RESULT)
+
+    if(CMD_RESULT)
+        cleanup()
+        message(FATAL_ERROR "Compare minigzip decompress failed: ${CMD_RESULT}")
+    endif()
+endif()
+
+if(GZIP_VERIFY AND NOT "${COMPRESS_ARGS}" MATCHES "-T")
+    # Transparent writing does not use gzip format
+    find_program(GZIP gzip)
+    if(GZIP)
+        if(NOT EXISTS ${OUTPUT}.gz)
+            cleanup()
+            message(FATAL_ERROR "Cannot find gzip decompress input: ${OUTPUT}.gz")
+        endif()
+
+        # Check gzip can decompress our compressed output
+        set(GZ_DECOMPRESS_COMMAND ${GZIP} --decompress)
+
+        execute_process(COMMAND ${CMAKE_COMMAND}
+            "-DCOMMAND=${GZ_DECOMPRESS_COMMAND}"
+            -DINPUT=${OUTPUT}.gz
+            -DOUTPUT=${OUTPUT}.gzip.out
+            "-DSUCCESS_EXIT=${SUCCESS_EXIT}"
+            -P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
+            RESULT_VARIABLE CMD_RESULT)
+
+        if(CMD_RESULT)
+            cleanup()
+            message(FATAL_ERROR "Gzip decompress failed: ${CMD_RESULT}")
+        endif()
+
+        # Compare gzip output with original input file
+        execute_process(COMMAND ${CMAKE_COMMAND}
+            -E compare_files ${INPUT} ${OUTPUT}.gzip.out
+            RESULT_VARIABLE CMD_RESULT)
+
+        if(CMD_RESULT)
+            cleanup()
+            message(FATAL_ERROR "Compare gzip decompress failed: ${CMD_RESULT}")
+        endif()
+
+        if(NOT EXISTS ${OUTPUT}.gz)
+            cleanup()
+            message(FATAL_ERROR "Cannot find gzip compress input: ${INPUT}")
+        endif()
+
+        # Compress input file with gzip
+        set(GZ_COMPRESS_COMMAND ${GZIP} --stdout)
+
+        execute_process(COMMAND ${CMAKE_COMMAND}
+            "-DCOMMAND=${GZ_COMPRESS_COMMAND}"
+            -DINPUT=${INPUT}
+            -DOUTPUT=${OUTPUT}.gzip.gz
+            "-DSUCCESS_EXIT=${SUCCESS_EXIT}"
+            -P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
+            RESULT_VARIABLE CMD_RESULT)
+
+        if(CMD_RESULT)
+            cleanup()
+            message(FATAL_ERROR "Gzip compress failed: ${CMD_RESULT}")
+        endif()
+
+        if(NOT EXISTS ${OUTPUT}.gz)
+            cleanup()
+            message(FATAL_ERROR "Cannot find minigzip decompress input: ${OUTPUT}.gzip.gz")
+        endif()
+
+        # Check minigzip can decompress gzip compressed output
+        execute_process(COMMAND ${CMAKE_COMMAND}
+            "-DCOMMAND=${DECOMPRESS_COMMAND}"
+            -DINPUT=${OUTPUT}.gzip.gz
+            -DOUTPUT=${OUTPUT}.gzip.out
+            "-DSUCCESS_EXIT=${SUCCESS_EXIT}"
+            -P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
+            RESULT_VARIABLE CMD_RESULT)
+
+        if(CMD_RESULT)
+            cleanup()
+            message(FATAL_ERROR "Minigzip decompress gzip failed: ${CMD_RESULT}")
+        endif()
+
+        if(COMPARE)
+            # Compare original input file with gzip decompressed output
+            execute_process(COMMAND ${CMAKE_COMMAND}
+                -E compare_files ${INPUT} ${OUTPUT}.gzip.out
+                RESULT_VARIABLE CMD_RESULT)
+
+            if(CMD_RESULT)
+                cleanup()
+                message(FATAL_ERROR "Compare minigzip decompress gzip failed: ${CMD_RESULT}")
+            endif()
+        endif()
+    endif()
+endif()
+
+cleanup()
--- a/libs/zlibng/cmake/toolchain-aarch64.cmake
+++ b/libs/zlibng/cmake/toolchain-aarch64.cmake
@ -0,0 +1,26 @@
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR aarch64)
+set(CMAKE_SYSTEM_VERSION 1)
+
+message(STATUS "Using cross-compile toolchain: ${CROSS_COMPILE_TOOLCHAIN}")
+
+set(CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu")
+set(CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu")
+
+set(CMAKE_CROSSCOMPILING TRUE)
+set(CMAKE_CROSSCOMPILING_EMULATOR qemu-aarch64 -L /usr/${CMAKE_C_COMPILER_TARGET}/)
+
+SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+
+find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
+if(NOT C_COMPILER_FULL_PATH)
+    message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
+endif()
+set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
+
+find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
+if(CXX_COMPILER_FULL_PATH)
+    set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
+endif()
--- a/libs/zlibng/cmake/toolchain-arm.cmake
+++ b/libs/zlibng/cmake/toolchain-arm.cmake
@ -0,0 +1,24 @@
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR arm)
+set(CMAKE_SYSTEM_VERSION 1)
+
+message(STATUS "Using cross-compile toolchain: ${CMAKE_C_COMPILER_TARGET}")
+
+set(CMAKE_CROSSCOMPILING TRUE)
+set(CMAKE_CROSSCOMPILING_EMULATOR qemu-arm -L /usr/${CMAKE_C_COMPILER_TARGET}/)
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+
+find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
+if(NOT C_COMPILER_FULL_PATH)
+    message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
+endif()
+set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
+
+find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
+if(CXX_COMPILER_FULL_PATH)
+    set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
+endif()
--- a/libs/zlibng/cmake/toolchain-mingw-i686.cmake
+++ b/libs/zlibng/cmake/toolchain-mingw-i686.cmake
@ -0,0 +1,16 @@
+set(CMAKE_SYSTEM_NAME Windows)
+
+set(CMAKE_C_COMPILER_TARGET i686)
+set(CMAKE_CXX_COMPILER_TARGET i686)
+
+set(CMAKE_C_COMPILER i686-w64-mingw32-gcc)
+set(CMAKE_CXX_COMPILER i686-w64-mingw32-g++)
+set(CMAKE_RC_COMPILER i686-w64-mingw32-windres)
+
+set(CMAKE_CROSSCOMPILING TRUE)
+set(CMAKE_CROSSCOMPILING_EMULATOR wine)
+
+set(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32)
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
--- a/libs/zlibng/cmake/toolchain-mingw-x86_64.cmake
+++ b/libs/zlibng/cmake/toolchain-mingw-x86_64.cmake
@ -0,0 +1,16 @@
+set(CMAKE_SYSTEM_NAME Windows)
+
+set(CMAKE_C_COMPILER_TARGET x86_64)
+set(CMAKE_CXX_COMPILER_TARGET x86_64)
+
+set(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc)
+set(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++)
+set(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres)
+
+set(CMAKE_CROSSCOMPILING TRUE)
+set(CMAKE_CROSSCOMPILING_EMULATOR wine)
+
+set(CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32)
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
--- a/libs/zlibng/cmake/toolchain-powerpc.cmake
+++ b/libs/zlibng/cmake/toolchain-powerpc.cmake
@ -0,0 +1,25 @@
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR powerpc)
+set(CMAKE_SYSTEM_VERSION 1)
+
+set(CMAKE_C_COMPILER_TARGET "powerpc-linux-gnu")
+set(CMAKE_CXX_COMPILER_TARGET "powerpc-linux-gnu")
+
+set(CMAKE_CROSSCOMPILING TRUE)
+set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc -L /usr/${CMAKE_C_COMPILER_TARGET}/)
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+
+find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
+if(NOT C_COMPILER_FULL_PATH)
+    message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
+endif()
+set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
+
+find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
+if(CXX_COMPILER_FULL_PATH)
+    set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
+endif()
--- a/libs/zlibng/cmake/toolchain-powerpc64.cmake
+++ b/libs/zlibng/cmake/toolchain-powerpc64.cmake
@ -0,0 +1,25 @@
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR ppc64)
+set(CMAKE_SYSTEM_VERSION 1)
+
+set(CMAKE_C_COMPILER_TARGET "powerpc64-linux-gnu")
+set(CMAKE_CXX_COMPILER_TARGET "powerpc64-linux-gnu")
+
+set(CMAKE_CROSSCOMPILING TRUE)
+set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc64 -L /usr/${CMAKE_C_COMPILER_TARGET}/)
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+
+find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
+if(NOT C_COMPILER_FULL_PATH)
+    message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
+endif()
+set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
+
+find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
+if(CXX_COMPILER_FULL_PATH)
+    set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
+endif()
--- a/libs/zlibng/cmake/toolchain-powerpc64le.cmake
+++ b/libs/zlibng/cmake/toolchain-powerpc64le.cmake
@ -0,0 +1,25 @@
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR ppc64le)
+set(CMAKE_SYSTEM_VERSION 1)
+
+set(CMAKE_C_COMPILER_TARGET "powerpc64le-linux-gnu")
+set(CMAKE_CXX_COMPILER_TARGET "powerpc64le-linux-gnu")
+
+set(CMAKE_CROSSCOMPILING TRUE)
+set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc64le -L /usr/${CMAKE_C_COMPILER_TARGET}/)
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+
+find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
+if(NOT C_COMPILER_FULL_PATH)
+    message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
+endif()
+set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
+
+find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
+if(CXX_COMPILER_FULL_PATH)
+    set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
+endif()
--- a/libs/zlibng/cmake/toolchain-s390x.cmake
+++ b/libs/zlibng/cmake/toolchain-s390x.cmake
@ -0,0 +1,25 @@
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR s390x)
+set(CMAKE_SYSTEM_VERSION 1)
+
+set(CMAKE_C_COMPILER_TARGET "s390x-linux-gnu")
+set(CMAKE_CXX_COMPILER_TARGET "s390x-linux-gnu")
+
+set(CMAKE_CROSSCOMPILING TRUE)
+set(CMAKE_CROSSCOMPILING_EMULATOR qemu-s390x -L /usr/${CMAKE_C_COMPILER_TARGET}/)
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+
+find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
+if(NOT C_COMPILER_FULL_PATH)
+    message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
+endif()
+set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
+
+find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
+if(CXX_COMPILER_FULL_PATH)
+    set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
+endif()
--- a/libs/zlibng/cmake/toolchain-sparc64.cmake
+++ b/libs/zlibng/cmake/toolchain-sparc64.cmake
@ -0,0 +1,25 @@
+set(CMAKE_SYSTEM_NAME Linux)
+set(CMAKE_SYSTEM_PROCESSOR sparc64)
+set(CMAKE_SYSTEM_VERSION 1)
+
+set(CMAKE_C_COMPILER_TARGET "sparc64-linux-gnu")
+set(CMAKE_CXX_COMPILER_TARGET "sparc64-linux-gnu")
+
+set(CMAKE_CROSSCOMPILING TRUE)
+set(CMAKE_CROSSCOMPILING_EMULATOR qemu-sparc64 -L /usr/${CMAKE_C_COMPILER_TARGET}/)
+
+set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
+set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
+set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+
+find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
+if(NOT C_COMPILER_FULL_PATH)
+    message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
+endif()
+set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
+
+find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
+if(CXX_COMPILER_FULL_PATH)
+    set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
+endif()
--- a/libs/zlibng/compare258.c
+++ b/libs/zlibng/compare258.c
@ -0,0 +1,186 @@
+/* compare258.c -- aligned and unaligned versions of compare258
+ * Copyright (C) 2020 Nathan Moinvaziri
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "zbuild.h"
+#include "zutil.h"
+
+#include "fallback_builtins.h"
+
+/* ALIGNED, byte comparison */
+static inline uint32_t compare256_c_static(const unsigned char *src0, const unsigned char *src1) {
+    uint32_t len = 0;
+
+    do {
+        if (*src0 != *src1)
+            return len + (*src0 == *src1);
+        src0 += 1, src1 += 1, len += 1;
+        if (*src0 != *src1)
+            return len + (*src0 == *src1);
+        src0 += 1, src1 += 1, len += 1;
+        if (*src0 != *src1)
+            return len + (*src0 == *src1);
+        src0 += 1, src1 += 1, len += 1;
+        if (*src0 != *src1)
+            return len + (*src0 == *src1);
+        src0 += 1, src1 += 1, len += 1;
+        if (*src0 != *src1)
+            return len + (*src0 == *src1);
+        src0 += 1, src1 += 1, len += 1;
+        if (*src0 != *src1)
+            return len + (*src0 == *src1);
+        src0 += 1, src1 += 1, len += 1;
+        if (*src0 != *src1)
+            return len + (*src0 == *src1);
+        src0 += 1, src1 += 1, len += 1;
+        if (*src0 != *src1)
+            return len + (*src0 == *src1);
+        src0 += 1, src1 += 1, len += 1;
+    } while (len < 256);
+
+    return 256;
+}
+
+static inline uint32_t compare258_c_static(const unsigned char *src0, const unsigned char *src1) {
+    if (*src0 != *src1)
+        return 0;
+    src0 += 1, src1 += 1;
+    if (*src0 != *src1)
+        return 1;
+    src0 += 1, src1 += 1;
+
+    return compare256_c_static(src0, src1) + 2;
+}
+
+Z_INTERNAL uint32_t compare258_c(const unsigned char *src0, const unsigned char *src1) {
+    return compare258_c_static(src0, src1);
+}
+
+#define LONGEST_MATCH   longest_match_c
+#define COMPARE256      compare256_c_static
+#define COMPARE258      compare258_c_static
+
+#include "match_tpl.h"
+
+#ifdef UNALIGNED_OK
+/* UNALIGNED_OK, 16-bit integer comparison */
+static inline uint32_t compare256_unaligned_16_static(const unsigned char *src0, const unsigned char *src1) {
+    uint32_t len = 0;
+
+    do {
+        if (*(uint16_t *)src0 != *(uint16_t *)src1)
+            return len + (*src0 == *src1);
+        src0 += 2, src1 += 2, len += 2;
+        if (*(uint16_t *)src0 != *(uint16_t *)src1)
+            return len + (*src0 == *src1);
+        src0 += 2, src1 += 2, len += 2;
+        if (*(uint16_t *)src0 != *(uint16_t *)src1)
+            return len + (*src0 == *src1);
+        src0 += 2, src1 += 2, len += 2;
+        if (*(uint16_t *)src0 != *(uint16_t *)src1)
+            return len + (*src0 == *src1);
+        src0 += 2, src1 += 2, len += 2;
+    } while (len < 256);
+
+    return 256;
+}
+
+static inline uint32_t compare258_unaligned_16_static(const unsigned char *src0, const unsigned char *src1) {
+    if (*(uint16_t *)src0 != *(uint16_t *)src1)
+        return (*src0 == *src1);
+
+    return compare256_unaligned_16_static(src0+2, src1+2) + 2;
+}
+
+Z_INTERNAL uint32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char *src1) {
+    return compare258_unaligned_16_static(src0, src1);
+}
+
+#define LONGEST_MATCH   longest_match_unaligned_16
+#define COMPARE256      compare256_unaligned_16_static
+#define COMPARE258      compare258_unaligned_16_static
+
+#include "match_tpl.h"
+
+#ifdef HAVE_BUILTIN_CTZ
+/* UNALIGNED_OK, 32-bit integer comparison */
+static inline uint32_t compare256_unaligned_32_static(const unsigned char *src0, const unsigned char *src1) {
+    uint32_t len = 0;
+
+    do {
+        uint32_t sv = *(uint32_t *)src0;
+        uint32_t mv = *(uint32_t *)src1;
+        uint32_t diff = sv ^ mv;
+
+        if (diff) {
+            uint32_t match_byte = __builtin_ctz(diff) / 8;
+            return len + match_byte;
+        }
+
+        src0 += 4, src1 += 4, len += 4;
+    } while (len < 256);
+
+    return 256;
+}
+
+static inline uint32_t compare258_unaligned_32_static(const unsigned char *src0, const unsigned char *src1) {
+    if (*(uint16_t *)src0 != *(uint16_t *)src1)
+        return (*src0 == *src1);
+
+    return compare256_unaligned_32_static(src0+2, src1+2) + 2;
+}
+
+Z_INTERNAL uint32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char *src1) {
+    return compare258_unaligned_32_static(src0, src1);
+}
+
+#define LONGEST_MATCH   longest_match_unaligned_32
+#define COMPARE256      compare256_unaligned_32_static
+#define COMPARE258      compare258_unaligned_32_static
+
+#include "match_tpl.h"
+
+#endif
+
+#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+/* UNALIGNED64_OK, 64-bit integer comparison */
+static inline uint32_t compare256_unaligned_64_static(const unsigned char *src0, const unsigned char *src1) {
+    uint32_t len = 0;
+
+    do {
+        uint64_t sv = *(uint64_t *)src0;
+        uint64_t mv = *(uint64_t *)src1;
+        uint64_t diff = sv ^ mv;
+
+        if (diff) {
+            uint64_t match_byte = __builtin_ctzll(diff) / 8;
+            return len + (uint32_t)match_byte;
+        }
+
+        src0 += 8, src1 += 8, len += 8;
+    } while (len < 256);
+
+    return 256;
+}
+
+static inline uint32_t compare258_unaligned_64_static(const unsigned char *src0, const unsigned char *src1) {
+    if (*(uint16_t *)src0 != *(uint16_t *)src1)
+        return (*src0 == *src1);
+
+    return compare256_unaligned_64_static(src0+2, src1+2) + 2;
+}
+
+Z_INTERNAL uint32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char *src1) {
+    return compare258_unaligned_64_static(src0, src1);
+}
+
+#define LONGEST_MATCH   longest_match_unaligned_64
+#define COMPARE256      compare256_unaligned_64_static
+#define COMPARE258      compare258_unaligned_64_static
+
+#include "match_tpl.h"
+
+#endif
+
+#endif
--- a/libs/zlibng/compress.c
+++ b/libs/zlibng/compress.c
@ -3,14 +3,12 @@
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

-/* @(#) $Id$ */
-
 #define ZLIB_INTERNAL
 #include "zbuild.h"
 #if defined(ZLIB_COMPAT)
-# include "zlib.h"
+#  include "zlib.h"
 #else
-# include "zlib-ng.h"
+#  include "zlib-ng.h"
 #endif

 /* ===========================================================================
@ -24,7 +22,7 @@
   memory, Z_BUF_ERROR if there was not enough room in the output buffer,
   Z_STREAM_ERROR if the level parameter is invalid.
 */
-int ZEXPORT PREFIX(compress2)(unsigned char *dest, z_size_t *destLen, const unsigned char *source,
+int Z_EXPORT PREFIX(compress2)(unsigned char *dest, z_size_t *destLen, const unsigned char *source,
                        z_size_t sourceLen, int level) {
    PREFIX3(stream) stream;
    int err;
@ -44,7 +42,7 @@ int ZEXPORT PREFIX(compress2)(unsigned char *dest, z_size_t *destLen, const unsi

    stream.next_out = dest;
    stream.avail_out = 0;
-    stream.next_in = (const unsigned char *)source;
+    stream.next_in = (z_const unsigned char *)source;
    stream.avail_in = 0;

    do {
@ -66,7 +64,7 @@ int ZEXPORT PREFIX(compress2)(unsigned char *dest, z_size_t *destLen, const unsi

 /* ===========================================================================
 */
-int ZEXPORT PREFIX(compress)(unsigned char *dest, z_size_t *destLen, const unsigned char *source, z_size_t sourceLen) {
+int Z_EXPORT PREFIX(compress)(unsigned char *dest, z_size_t *destLen, const unsigned char *source, z_size_t sourceLen) {
    return PREFIX(compress2)(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
 }

@ -74,6 +72,12 @@ int ZEXPORT PREFIX(compress)(unsigned char *dest, z_size_t *destLen, const unsig
   If the default memLevel or windowBits for deflateInit() is changed, then
   this function needs to be updated.
 */
-z_size_t ZEXPORT PREFIX(compressBound)(z_size_t sourceLen) {
+z_size_t Z_EXPORT PREFIX(compressBound)(z_size_t sourceLen) {
+#ifndef NO_QUICK_STRATEGY
+    /* Quick deflate strategy worse case is 9 bits per literal, rounded to nearest byte,
+       plus the size of block & gzip headers and footers */
+    return sourceLen + ((sourceLen + 13 + 7) >> 3) + 18;
+#else
    return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + (sourceLen >> 25) + 13;
+#endif
 }
--- a/libs/zlibng/configure
+++ b/libs/zlibng/configure
--- a/libs/zlibng/crc32.c
+++ b/libs/zlibng/crc32.c
@ -9,251 +9,40 @@
 * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
 */

-/* @(#) $Id$ */
-
-# include "zbuild.h"
-# include "gzendian.h"
-# include <inttypes.h>
-
-/*
-  Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
-  protection on the static variables used to control the first-use generation
-  of the crc tables.  Therefore, if you #define DYNAMIC_CRC_TABLE, you should
-  first call get_crc_table() to initialize the tables before allowing more than
-  one thread to use crc32().
-
-  DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h. A main()
-  routine is also produced, so that this one source file can be compiled to an
-  executable.
- */
-
-#ifdef MAKECRCH
-#  include <stdio.h>
-#  ifndef DYNAMIC_CRC_TABLE
-#    define DYNAMIC_CRC_TABLE
-#  endif /* !DYNAMIC_CRC_TABLE */
-#endif /* MAKECRCH */
-
+#include "zbuild.h"
+#include "zendian.h"
+#include <inttypes.h>
 #include "deflate.h"
 #include "functable.h"
-
-
-/* Local functions for crc concatenation */
-#define GF2_DIM 32      /* dimension of GF(2) vectors (length of CRC) */
-static uint32_t gf2_matrix_times(const uint32_t *mat, uint32_t vec);
-static uint32_t crc32_combine_(uint32_t crc1, uint32_t crc2, z_off64_t len2);
-static void crc32_combine_gen_(uint32_t *op, z_off64_t len2);
-
-/* ========================================================================= */
-static uint32_t gf2_matrix_times(const uint32_t *mat, uint32_t vec) {
-    uint32_t sum = 0;
-    while (vec) {
-        if (vec & 1)
-            sum ^= *mat;
-        vec >>= 1;
-        mat++;
-    }
-    return sum;
-}
-
-#ifdef DYNAMIC_CRC_TABLE
-volatile int crc_table_empty = 1;
-static uint32_t crc_table[8][256];
-static uint32_t crc_comb[GF2_DIM][GF2_DIM];
-void make_crc_table(void);
-static void gf2_matrix_square(uint32_t *square, const uint32_t *mat);
-#ifdef MAKECRCH
-static void write_table(FILE *, const uint32_t *, int);
-#endif /* MAKECRCH */
-
-/* ========================================================================= */
-static void gf2_matrix_square(uint32_t *square, const uint32_t *mat) {
-    int n;
-
-    for (n = 0; n < GF2_DIM; n++)
-        square[n] = gf2_matrix_times(mat, mat[n]);
-}
-
-/*
-  Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
-  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
-
-  Polynomials over GF(2) are represented in binary, one bit per coefficient,
-  with the lowest powers in the most significant bit.  Then adding polynomials
-  is just exclusive-or, and multiplying a polynomial by x is a right shift by
-  one.  If we call the above polynomial p, and represent a byte as the
-  polynomial q, also with the lowest power in the most significant bit (so the
-  byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
-  where a mod b means the remainder after dividing a by b.
-
-  This calculation is done using the shift-register method of multiplying and
-  taking the remainder.  The register is initialized to zero, and for each
-  incoming bit, x^32 is added mod p to the register if the bit is a one (where
-  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
-  x (which is shifting right by one and adding x^32 mod p if the bit shifted
-  out is a one).  We start with the highest power (least significant bit) of
-  q and repeat for all eight bits of q.
-
-  The first table is simply the CRC of all possible eight bit values.  This is
-  all the information needed to generate CRCs on data a byte at a time for all
-  combinations of CRC register values and incoming bytes.  The remaining tables
-  allow for word-at-a-time CRC calculation for both big-endian and little-
-  endian machines, where a word is four bytes.
-*/
-void make_crc_table() {
-    uint32_t c;
-    int n, k;
-    uint32_t poly;                       /* polynomial exclusive-or pattern */
-    /* terms of polynomial defining this crc (except x^32): */
-    static volatile int first = 1;      /* flag to limit concurrent making */
-    static const unsigned char p[] = {0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 16, 22, 23, 26};
-
-    /* See if another task is already doing this (not thread-safe, but better
-       than nothing -- significantly reduces duration of vulnerability in
-       case the advice about DYNAMIC_CRC_TABLE is ignored) */
-    if (first) {
-        first = 0;
-
-        /* make exclusive-or pattern from polynomial (0xedb88320) */
-        poly = 0;
-        for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++)
-            poly |= (uint32_t)1 << (31 - p[n]);
-
-        /* generate a crc for every 8-bit value */
-        for (n = 0; n < 256; n++) {
-            c = (uint32_t)n;
-            for (k = 0; k < 8; k++)
-                c = c & 1 ? poly ^ (c >> 1) : c >> 1;
-            crc_table[0][n] = c;
-        }
-
-        /* generate crc for each value followed by one, two, and three zeros,
-           and then the byte reversal of those as well as the first table */
-        for (n = 0; n < 256; n++) {
-            c = crc_table[0][n];
-            crc_table[4][n] = ZSWAP32(c);
-            for (k = 1; k < 4; k++) {
-                c = crc_table[0][c & 0xff] ^ (c >> 8);
-                crc_table[k][n] = c;
-                crc_table[k + 4][n] = ZSWAP32(c);
-            }
-        }
-
-        /* generate zero operators table for crc32_combine() */
-
-        /* generate the operator to apply a single zero bit to a CRC -- the
-           first row adds the polynomial if the low bit is a 1, and the
-           remaining rows shift the CRC right one bit */
-        k = GF2_DIM - 3;
-        crc_comb[k][0] = 0xedb88320UL;      /* CRC-32 polynomial */
-        uint32_t row = 1;
-        for (n = 1; n < GF2_DIM; n++) {
-            crc_comb[k][n] = row;
-            row <<= 1;
-        }
-
-        /* generate operators that apply 2, 4, and 8 zeros to a CRC, putting
-           the last one, the operator for one zero byte, at the 0 position */
-        gf2_matrix_square(crc_comb[k + 1], crc_comb[k]);
-        gf2_matrix_square(crc_comb[k + 2], crc_comb[k + 1]);
-        gf2_matrix_square(crc_comb[0], crc_comb[k + 2]);
-
-        /* generate operators for applying 2^n zero bytes to a CRC, filling out
-           the remainder of the table -- the operators repeat after GF2_DIM
-           values of n, so the table only needs GF2_DIM entries, regardless of
-           the size of the length being processed */
-        for (n = 1; n < k; n++)
-            gf2_matrix_square(crc_comb[n], crc_comb[n - 1]);
-
-        /* mark tables as complete, in case someone else is waiting */
-        crc_table_empty = 0;
-    } else {      /* not first */
-        /* wait for the other guy to finish (not efficient, but rare) */
-        while (crc_table_empty)
-            {}
-    }
-#ifdef MAKECRCH
-    {
-        FILE *out;
-
-        out = fopen("crc32.h", "w");
-        if (out == NULL) return;
-
-        /* write out CRC table to crc32.h */
-        fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n");
-        fprintf(out, " * Generated automatically by crc32.c\n */\n\n");
-        fprintf(out, "static const uint32_t ");
-        fprintf(out, "crc_table[8][256] =\n{\n  {\n");
-        write_table(out, crc_table[0], 256);
-        for (k = 1; k < 8; k++) {
-            fprintf(out, "  },\n  {\n");
-            write_table(out, crc_table[k], 256);
-        }
-        fprintf(out, "  }\n};\n");
-
-        /* write out zero operator table to crc32.h */
-        fprintf(out, "\nstatic const uint32_t ");
-        fprintf(out, "crc_comb[%d][%d] =\n{\n  {\n", GF2_DIM, GF2_DIM);
-        write_table(out, crc_comb[0], GF2_DIM);
-        for (k = 1; k < GF2_DIM; k++) {
-            fprintf(out, "  },\n  {\n");
-            write_table(out, crc_comb[k], GF2_DIM);
-        }
-        fprintf(out, "  }\n};\n");
-        fclose(out);
-    }
-#endif /* MAKECRCH */
-}
-
-#ifdef MAKECRCH
-static void write_table(FILE *out, const uint32_t *table, int k) {
-    int n;
-
-    for (n = 0; n < k; n++)
-        fprintf(out, "%s0x%08" PRIx32 "%s", n % 5 ? "" : "    ",
-                (uint32_t)(table[n]),
-                n == k - 1 ? "\n" : (n % 5 == 4 ? ",\n" : ", "));
-}
-
-int main()
-{
-    make_crc_table();
-    return 0;
-}
-#endif /* MAKECRCH */
-
-#else /* !DYNAMIC_CRC_TABLE */
-/* ========================================================================
- * Tables of CRC-32s of all single-byte values, made by make_crc_table(),
- * and tables of zero operator matrices for crc32_combine().
- */
-#include "crc32.h"
-#endif /* DYNAMIC_CRC_TABLE */
+#include "crc32_tbl.h"

 /* =========================================================================
 * This function can be used by asm versions of crc32()
 */
-const uint32_t * ZEXPORT PREFIX(get_crc_table)(void) {
-#ifdef DYNAMIC_CRC_TABLE
-    if (crc_table_empty)
-        make_crc_table();
-#endif /* DYNAMIC_CRC_TABLE */
+const uint32_t * Z_EXPORT PREFIX(get_crc_table)(void) {
    return (const uint32_t *)crc_table;
 }

-uint32_t ZEXPORT PREFIX(crc32_z)(uint32_t crc, const unsigned char *buf, size_t len) {
+#ifdef ZLIB_COMPAT
+unsigned long Z_EXPORT PREFIX(crc32_z)(unsigned long crc, const unsigned char *buf, size_t len) {
+    if (buf == NULL) return 0;
+
+    return (unsigned long)functable.crc32((uint32_t)crc, buf, len);
+}
+#else
+uint32_t Z_EXPORT PREFIX(crc32_z)(uint32_t crc, const unsigned char *buf, size_t len) {
    if (buf == NULL) return 0;

    return functable.crc32(crc, buf, len);
 }
+#endif
 /* ========================================================================= */
 #define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
 #define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
 #define DO4 DO1; DO1; DO1; DO1

 /* ========================================================================= */
-ZLIB_INTERNAL uint32_t crc32_generic(uint32_t crc, const unsigned char *buf, uint64_t len)
-{
+Z_INTERNAL uint32_t crc32_generic(uint32_t crc, const unsigned char *buf, uint64_t len) {
    crc = crc ^ 0xffffffff;

 #ifdef UNROLL_MORE
@ -274,9 +63,15 @@ ZLIB_INTERNAL uint32_t crc32_generic(uint32_t crc, const unsigned char *buf, uin
    return crc ^ 0xffffffff;
 }

-uint32_t ZEXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t len) {
+#ifdef ZLIB_COMPAT
+unsigned long Z_EXPORT PREFIX(crc32)(unsigned long crc, const unsigned char *buf, unsigned int len) {
+    return (unsigned long)PREFIX(crc32_z)((uint32_t)crc, buf, len);
+}
+#else
+uint32_t Z_EXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t len) {
    return PREFIX(crc32_z)(crc, buf, len);
 }
+#endif

 /*
   This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit
@ -298,9 +93,9 @@ uint32_t ZEXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t
 #define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4

 /* ========================================================================= */
-ZLIB_INTERNAL uint32_t crc32_little(uint32_t crc, const unsigned char *buf, uint64_t len) {
-    register uint32_t c;
-    register const uint32_t *buf4;
+Z_INTERNAL uint32_t crc32_little(uint32_t crc, const unsigned char *buf, uint64_t len) {
+    Z_REGISTER uint32_t c;
+    Z_REGISTER const uint32_t *buf4;

    c = crc;
    c = ~c;
@ -340,9 +135,9 @@ ZLIB_INTERNAL uint32_t crc32_little(uint32_t crc, const unsigned char *buf, uint
 #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4

 /* ========================================================================= */
-ZLIB_INTERNAL uint32_t crc32_big(uint32_t crc, const unsigned char *buf, uint64_t len) {
-    register uint32_t c;
-    register const uint32_t *buf4;
+Z_INTERNAL uint32_t crc32_big(uint32_t crc, const unsigned char *buf, uint64_t len) {
+    Z_REGISTER uint32_t c;
+    Z_REGISTER const uint32_t *buf4;

    c = ZSWAP32(crc);
    c = ~c;
@ -374,45 +169,19 @@ ZLIB_INTERNAL uint32_t crc32_big(uint32_t crc, const unsigned char *buf, uint64_
 }
 #endif /* BYTE_ORDER == BIG_ENDIAN */

-
-/* ========================================================================= */
-static uint32_t crc32_combine_(uint32_t crc1, uint32_t crc2, z_off64_t len2) {
-    int n;
-
-#ifdef DYNAMIC_CRC_TABLE
-    if (crc_table_empty)
-        make_crc_table();
-#endif /* DYNAMIC_CRC_TABLE */
-
-    if (len2 > 0)
-        /* operator for 2^n zeros repeats every GF2_DIM n values */
-        for (n = 0; len2; n = (n + 1) % GF2_DIM, len2 >>= 1)
-            if (len2 & 1)
-                crc1 = gf2_matrix_times(crc_comb[n], crc1);
-    return crc1 ^ crc2;
-}
-
-/* ========================================================================= */
-uint32_t ZEXPORT PREFIX(crc32_combine)(uint32_t crc1, uint32_t crc2, z_off_t len2) {
-    return crc32_combine_(crc1, crc2, len2);
-}
-
-uint32_t ZEXPORT PREFIX(crc32_combine64)(uint32_t crc1, uint32_t crc2, z_off64_t len2) {
-    return crc32_combine_(crc1, crc2, len2);
-}
-
 #ifdef X86_PCLMULQDQ_CRC
 #include "arch/x86/x86.h"
 #include "arch/x86/crc_folding.h"

-ZLIB_INTERNAL void crc_finalize(deflate_state *const s) {
+Z_INTERNAL void crc_finalize(deflate_state *const s) {
    if (x86_cpu_has_pclmulqdq)
        s->strm->adler = crc_fold_512to32(s);
 }
 #endif

-ZLIB_INTERNAL void crc_reset(deflate_state *const s) {
+Z_INTERNAL void crc_reset(deflate_state *const s) {
 #ifdef X86_PCLMULQDQ_CRC
+    x86_check_features();
    if (x86_cpu_has_pclmulqdq) {
        crc_fold_init(s);
        return;
@ -421,7 +190,7 @@ ZLIB_INTERNAL void crc_reset(deflate_state *const s) {
    s->strm->adler = PREFIX(crc32)(0L, NULL, 0);
 }

-ZLIB_INTERNAL void copy_with_crc(PREFIX3(stream) *strm, unsigned char *dst, unsigned long size) {
+Z_INTERNAL void copy_with_crc(PREFIX3(stream) *strm, unsigned char *dst, unsigned long size) {
 #ifdef X86_PCLMULQDQ_CRC
    if (x86_cpu_has_pclmulqdq) {
        crc_fold_copy(strm->state, dst, strm->next_in, size);
@ -431,68 +200,3 @@ ZLIB_INTERNAL void copy_with_crc(PREFIX3(stream) *strm, unsigned char *dst, unsi
    memcpy(dst, strm->next_in, size);
    strm->adler = PREFIX(crc32)(strm->adler, dst, size);
 }
-
-/* ========================================================================= */
-static void crc32_combine_gen_(uint32_t *op, z_off64_t len2)
-{
-    uint32_t row;
-    int j;
-    unsigned i;
-
-#ifdef DYNAMIC_CRC_TABLE
-    if (crc_table_empty)
-        make_crc_table();
-#endif /* DYNAMIC_CRC_TABLE */
-
-    /* if len2 is zero or negative, return the identity matrix */
-    if (len2 <= 0) {
-        row = 1;
-        for (j = 0; j < GF2_DIM; j++) {
-            op[j] = row;
-            row <<= 1;
-        }
-        return;
-    }
-
-    /* at least one bit in len2 is set -- find it, and copy the operator
-       corresponding to that position into op */
-    i = 0;
-    for (;;) {
-        if (len2 & 1) {
-            for (j = 0; j < GF2_DIM; j++)
-                op[j] = crc_comb[i][j];
-            break;
-        }
-        len2 >>= 1;
-        i = (i + 1) % GF2_DIM;
-    }
-
-    /* for each remaining bit set in len2 (if any), multiply op by the operator
-       corresponding to that position */
-    for (;;) {
-        len2 >>= 1;
-        i = (i + 1) % GF2_DIM;
-        if (len2 == 0)
-            break;
-        if (len2 & 1)
-            for (j = 0; j < GF2_DIM; j++)
-                op[j] = gf2_matrix_times(crc_comb[i], op[j]);
-    }
-}
-
-/* ========================================================================= */
-void ZEXPORT PREFIX(crc32_combine_gen)(uint32_t *op, z_off_t len2)
-{
-    crc32_combine_gen_(op, len2);
-}
-
-void ZEXPORT PREFIX(crc32_combine_gen64)(uint32_t *op, z_off64_t len2)
-{
-    crc32_combine_gen_(op, len2);
-}
-
-/* ========================================================================= */
-uint32_t ZEXPORT PREFIX(crc32_combine_op)(uint32_t crc1, uint32_t crc2, const uint32_t *op)
-{
-    return gf2_matrix_times(op, crc1) ^ crc2;
-}
--- a/libs/zlibng/crc32_comb.c
+++ b/libs/zlibng/crc32_comb.c
@ -0,0 +1,108 @@
+/* crc32_comb.c -- compute the CRC-32 of a data stream
+ * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016, 2018 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ *
+ * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
+ * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing
+ * tables for updating the shift register in one step with three exclusive-ors
+ * instead of four steps with four exclusive-ors.  This results in about a
+ * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
+ */
+
+#include "zbuild.h"
+#include <inttypes.h>
+#include "deflate.h"
+#include "crc32_p.h"
+#include "crc32_comb_tbl.h"
+
+
+/* Local functions for crc concatenation */
+static uint32_t crc32_combine_(uint32_t crc1, uint32_t crc2, z_off64_t len2);
+static void crc32_combine_gen_(uint32_t *op, z_off64_t len2);
+
+/* ========================================================================= */
+static uint32_t crc32_combine_(uint32_t crc1, uint32_t crc2, z_off64_t len2) {
+    int n;
+
+    if (len2 > 0)
+        /* operator for 2^n zeros repeats every GF2_DIM n values */
+        for (n = 0; len2; n = (n + 1) % GF2_DIM, len2 >>= 1)
+            if (len2 & 1)
+                crc1 = gf2_matrix_times(crc_comb[n], crc1);
+    return crc1 ^ crc2;
+}
+
+/* ========================================================================= */
+#ifdef ZLIB_COMPAT
+unsigned long Z_EXPORT PREFIX(crc32_combine)(unsigned long crc1, unsigned long crc2, z_off_t len2) {
+    return (unsigned long)crc32_combine_((uint32_t)crc1, (uint32_t)crc2, len2);
+}
+
+unsigned long Z_EXPORT PREFIX4(crc32_combine)(unsigned long crc1, unsigned long crc2, z_off64_t len2) {
+    return (unsigned long)crc32_combine_((uint32_t)crc1, (uint32_t)crc2, len2);
+}
+#else
+uint32_t Z_EXPORT PREFIX4(crc32_combine)(uint32_t crc1, uint32_t crc2, z_off64_t len2) {
+    return crc32_combine_(crc1, crc2, len2);
+}
+#endif
+
+/* ========================================================================= */
+
+static void crc32_combine_gen_(uint32_t *op, z_off64_t len2) {
+    uint32_t row;
+    int j;
+    unsigned i;
+
+    /* if len2 is zero or negative, return the identity matrix */
+    if (len2 <= 0) {
+        row = 1;
+        for (j = 0; j < GF2_DIM; j++) {
+            op[j] = row;
+            row <<= 1;
+        }
+        return;
+    }
+
+    /* at least one bit in len2 is set -- find it, and copy the operator
+       corresponding to that position into op */
+    i = 0;
+    for (;;) {
+        if (len2 & 1) {
+            for (j = 0; j < GF2_DIM; j++)
+                op[j] = crc_comb[i][j];
+            break;
+        }
+        len2 >>= 1;
+        i = (i + 1) % GF2_DIM;
+    }
+
+    /* for each remaining bit set in len2 (if any), multiply op by the operator
+       corresponding to that position */
+    for (;;) {
+        len2 >>= 1;
+        i = (i + 1) % GF2_DIM;
+        if (len2 == 0)
+            break;
+        if (len2 & 1)
+            for (j = 0; j < GF2_DIM; j++)
+                op[j] = gf2_matrix_times(crc_comb[i], op[j]);
+    }
+}
+
+/* ========================================================================= */
+
+#ifdef ZLIB_COMPAT
+void Z_EXPORT PREFIX(crc32_combine_gen)(uint32_t *op, z_off_t len2) {
+    crc32_combine_gen_(op, len2);
+}
+#endif
+
+void Z_EXPORT PREFIX4(crc32_combine_gen)(uint32_t *op, z_off64_t len2) {
+    crc32_combine_gen_(op, len2);
+}
+
+/* ========================================================================= */
+uint32_t Z_EXPORT PREFIX(crc32_combine_op)(uint32_t crc1, uint32_t crc2, const uint32_t *op) {
+    return gf2_matrix_times(op, crc1) ^ crc2;
+}
--- a/libs/zlibng/crc32_comb_tbl.h
+++ b/libs/zlibng/crc32_comb_tbl.h
@ -0,0 +1,300 @@
+#ifndef CRC32_COMB_TBL_H_
+#define CRC32_COMB_TBL_H_
+
+/* crc32_comb_tbl.h -- zero operators table for CRC combine
+ * Generated automatically by makecrct.c
+ */
+
+static const uint32_t crc_comb[32][32] =
+{
+  {
+    0x77073096, 0xee0e612c, 0x076dc419, 0x0edb8832, 0x1db71064,
+    0x3b6e20c8, 0x76dc4190, 0xedb88320, 0x00000001, 0x00000002,
+    0x00000004, 0x00000008, 0x00000010, 0x00000020, 0x00000040,
+    0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800,
+    0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000,
+    0x00020000, 0x00040000, 0x00080000, 0x00100000, 0x00200000,
+    0x00400000, 0x00800000
+  },
+  {
+    0x191b3141, 0x32366282, 0x646cc504, 0xc8d98a08, 0x4ac21251,
+    0x958424a2, 0xf0794f05, 0x3b83984b, 0x77073096, 0xee0e612c,
+    0x076dc419, 0x0edb8832, 0x1db71064, 0x3b6e20c8, 0x76dc4190,
+    0xedb88320, 0x00000001, 0x00000002, 0x00000004, 0x00000008,
+    0x00000010, 0x00000020, 0x00000040, 0x00000080, 0x00000100,
+    0x00000200, 0x00000400, 0x00000800, 0x00001000, 0x00002000,
+    0x00004000, 0x00008000
+  },
+  {
+    0xb8bc6765, 0xaa09c88b, 0x8f629757, 0xc5b428ef, 0x5019579f,
+    0xa032af3e, 0x9b14583d, 0xed59b63b, 0x01c26a37, 0x0384d46e,
+    0x0709a8dc, 0x0e1351b8, 0x1c26a370, 0x384d46e0, 0x709a8dc0,
+    0xe1351b80, 0x191b3141, 0x32366282, 0x646cc504, 0xc8d98a08,
+    0x4ac21251, 0x958424a2, 0xf0794f05, 0x3b83984b, 0x77073096,
+    0xee0e612c, 0x076dc419, 0x0edb8832, 0x1db71064, 0x3b6e20c8,
+    0x76dc4190, 0xedb88320
+  },
+  {
+    0xccaa009e, 0x4225077d, 0x844a0efa, 0xd3e51bb5, 0x7cbb312b,
+    0xf9766256, 0x299dc2ed, 0x533b85da, 0xa6770bb4, 0x979f1129,
+    0xf44f2413, 0x33ef4e67, 0x67de9cce, 0xcfbd399c, 0x440b7579,
+    0x8816eaf2, 0xcb5cd3a5, 0x4dc8a10b, 0x9b914216, 0xec53826d,
+    0x03d6029b, 0x07ac0536, 0x0f580a6c, 0x1eb014d8, 0x3d6029b0,
+    0x7ac05360, 0xf580a6c0, 0x30704bc1, 0x60e09782, 0xc1c12f04,
+    0x58f35849, 0xb1e6b092
+  },
+  {
+    0xae689191, 0x87a02563, 0xd4314c87, 0x73139f4f, 0xe6273e9e,
+    0x173f7b7d, 0x2e7ef6fa, 0x5cfdedf4, 0xb9fbdbe8, 0xa886b191,
+    0x8a7c6563, 0xcf89cc87, 0x44629f4f, 0x88c53e9e, 0xcafb7b7d,
+    0x4e87f0bb, 0x9d0fe176, 0xe16ec4ad, 0x19ac8f1b, 0x33591e36,
+    0x66b23c6c, 0xcd6478d8, 0x41b9f7f1, 0x8373efe2, 0xdd96d985,
+    0x605cb54b, 0xc0b96a96, 0x5a03d36d, 0xb407a6da, 0xb37e4bf5,
+    0xbd8d91ab, 0xa06a2517
+  },
+  {
+    0xf1da05aa, 0x38c50d15, 0x718a1a2a, 0xe3143454, 0x1d596ee9,
+    0x3ab2ddd2, 0x7565bba4, 0xeacb7748, 0x0ee7e8d1, 0x1dcfd1a2,
+    0x3b9fa344, 0x773f4688, 0xee7e8d10, 0x078c1c61, 0x0f1838c2,
+    0x1e307184, 0x3c60e308, 0x78c1c610, 0xf1838c20, 0x38761e01,
+    0x70ec3c02, 0xe1d87804, 0x18c1f649, 0x3183ec92, 0x6307d924,
+    0xc60fb248, 0x576e62d1, 0xaedcc5a2, 0x86c88d05, 0xd6e01c4b,
+    0x76b13ed7, 0xed627dae
+  },
+  {
+    0x8f352d95, 0xc51b5d6b, 0x5147bc97, 0xa28f792e, 0x9e6ff41d,
+    0xe7aeee7b, 0x142cdab7, 0x2859b56e, 0x50b36adc, 0xa166d5b8,
+    0x99bcad31, 0xe8085c23, 0x0b61be07, 0x16c37c0e, 0x2d86f81c,
+    0x5b0df038, 0xb61be070, 0xb746c6a1, 0xb5fc8b03, 0xb0881047,
+    0xba6126cf, 0xafb34bdf, 0x841791ff, 0xd35e25bf, 0x7dcd4d3f,
+    0xfb9a9a7e, 0x2c4432bd, 0x5888657a, 0xb110caf4, 0xb95093a9,
+    0xa9d02113, 0x88d14467
+  },
+  {
+    0x33fff533, 0x67ffea66, 0xcfffd4cc, 0x448eafd9, 0x891d5fb2,
+    0xc94bb925, 0x49e6740b, 0x93cce816, 0xfce8d66d, 0x22a0aa9b,
+    0x45415536, 0x8a82aa6c, 0xce745299, 0x4799a373, 0x8f3346e6,
+    0xc5178b8d, 0x515e115b, 0xa2bc22b6, 0x9e09432d, 0xe763801b,
+    0x15b60677, 0x2b6c0cee, 0x56d819dc, 0xadb033b8, 0x80116131,
+    0xdb53c423, 0x6dd68e07, 0xdbad1c0e, 0x6c2b3e5d, 0xd8567cba,
+    0x6bddff35, 0xd7bbfe6a
+  },
+  {
+    0xce3371cb, 0x4717e5d7, 0x8e2fcbae, 0xc72e911d, 0x552c247b,
+    0xaa5848f6, 0x8fc197ad, 0xc4f2291b, 0x52955477, 0xa52aa8ee,
+    0x9124579d, 0xf939a97b, 0x290254b7, 0x5204a96e, 0xa40952dc,
+    0x9363a3f9, 0xfdb641b3, 0x201d8527, 0x403b0a4e, 0x8076149c,
+    0xdb9d2f79, 0x6c4b58b3, 0xd896b166, 0x6a5c648d, 0xd4b8c91a,
+    0x72009475, 0xe40128ea, 0x13735795, 0x26e6af2a, 0x4dcd5e54,
+    0x9b9abca8, 0xec447f11
+  },
+  {
+    0x1072db28, 0x20e5b650, 0x41cb6ca0, 0x8396d940, 0xdc5cb4c1,
+    0x63c86fc3, 0xc790df86, 0x5450b94d, 0xa8a1729a, 0x8a33e375,
+    0xcf16c0ab, 0x455c8717, 0x8ab90e2e, 0xce031a1d, 0x4777327b,
+    0x8eee64f6, 0xc6adcfad, 0x562a991b, 0xac553236, 0x83db622d,
+    0xdcc7c21b, 0x62fe8277, 0xc5fd04ee, 0x508b0f9d, 0xa1161f3a,
+    0x995d3835, 0xe9cb762b, 0x08e7ea17, 0x11cfd42e, 0x239fa85c,
+    0x473f50b8, 0x8e7ea170
+  },
+  {
+    0xf891f16f, 0x2a52e49f, 0x54a5c93e, 0xa94b927c, 0x89e622b9,
+    0xc8bd4333, 0x4a0b8027, 0x9417004e, 0xf35f06dd, 0x3dcf0bfb,
+    0x7b9e17f6, 0xf73c2fec, 0x35095999, 0x6a12b332, 0xd4256664,
+    0x733bca89, 0xe6779512, 0x179e2c65, 0x2f3c58ca, 0x5e78b194,
+    0xbcf16328, 0xa293c011, 0x9e568663, 0xe7dc0a87, 0x14c9134f,
+    0x2992269e, 0x53244d3c, 0xa6489a78, 0x97e032b1, 0xf4b16323,
+    0x3213c007, 0x6427800e
+  },
+  {
+    0x88b6ba63, 0xca1c7287, 0x4f49e34f, 0x9e93c69e, 0xe6568b7d,
+    0x17dc10bb, 0x2fb82176, 0x5f7042ec, 0xbee085d8, 0xa6b00df1,
+    0x96111da3, 0xf7533d07, 0x35d77c4f, 0x6baef89e, 0xd75df13c,
+    0x75cae439, 0xeb95c872, 0x0c5a96a5, 0x18b52d4a, 0x316a5a94,
+    0x62d4b528, 0xc5a96a50, 0x5023d2e1, 0xa047a5c2, 0x9bfe4dc5,
+    0xec8d9dcb, 0x026a3dd7, 0x04d47bae, 0x09a8f75c, 0x1351eeb8,
+    0x26a3dd70, 0x4d47bae0
+  },
+  {
+    0x5ad8a92c, 0xb5b15258, 0xb013a2f1, 0xbb5643a3, 0xaddd8107,
+    0x80ca044f, 0xdae50edf, 0x6ebb1bff, 0xdd7637fe, 0x619d69bd,
+    0xc33ad37a, 0x5d04a0b5, 0xba09416a, 0xaf638495, 0x85b60f6b,
+    0xd01d1897, 0x7b4b376f, 0xf6966ede, 0x365ddbfd, 0x6cbbb7fa,
+    0xd9776ff4, 0x699fd9a9, 0xd33fb352, 0x7d0e60e5, 0xfa1cc1ca,
+    0x2f4885d5, 0x5e910baa, 0xbd221754, 0xa13528e9, 0x991b5793,
+    0xe947a967, 0x09fe548f
+  },
+  {
+    0xb566f6e2, 0xb1bceb85, 0xb808d14b, 0xab60a4d7, 0x8db04fef,
+    0xc011999f, 0x5b52357f, 0xb6a46afe, 0xb639d3bd, 0xb702a13b,
+    0xb5744437, 0xb1998e2f, 0xb8421a1f, 0xabf5327f, 0x8c9b62bf,
+    0xc247c33f, 0x5ffe803f, 0xbffd007e, 0xa48b06bd, 0x92670b3b,
+    0xffbf1037, 0x240f262f, 0x481e4c5e, 0x903c98bc, 0xfb083739,
+    0x2d616833, 0x5ac2d066, 0xb585a0cc, 0xb07a47d9, 0xbb8589f3,
+    0xac7a15a7, 0x83852d0f
+  },
+  {
+    0x9d9129bf, 0xe053553f, 0x1bd7ac3f, 0x37af587e, 0x6f5eb0fc,
+    0xdebd61f8, 0x660bc5b1, 0xcc178b62, 0x435e1085, 0x86bc210a,
+    0xd6094455, 0x77638eeb, 0xeec71dd6, 0x06ff3ded, 0x0dfe7bda,
+    0x1bfcf7b4, 0x37f9ef68, 0x6ff3ded0, 0xdfe7bda0, 0x64be7d01,
+    0xc97cfa02, 0x4988f245, 0x9311e48a, 0xfd52cf55, 0x21d498eb,
+    0x43a931d6, 0x875263ac, 0xd5d5c119, 0x70da8473, 0xe1b508e6,
+    0x181b178d, 0x30362f1a
+  },
+  {
+    0x2ee43a2c, 0x5dc87458, 0xbb90e8b0, 0xac50d721, 0x83d0a803,
+    0xdcd05647, 0x62d1aacf, 0xc5a3559e, 0x5037ad7d, 0xa06f5afa,
+    0x9bafb3b5, 0xec2e612b, 0x032dc417, 0x065b882e, 0x0cb7105c,
+    0x196e20b8, 0x32dc4170, 0x65b882e0, 0xcb7105c0, 0x4d930dc1,
+    0x9b261b82, 0xed3d3145, 0x010b64cb, 0x0216c996, 0x042d932c,
+    0x085b2658, 0x10b64cb0, 0x216c9960, 0x42d932c0, 0x85b26580,
+    0xd015cd41, 0x7b5a9cc3
+  },
+  {
+    0x1b4511ee, 0x368a23dc, 0x6d1447b8, 0xda288f70, 0x6f2018a1,
+    0xde403142, 0x67f164c5, 0xcfe2c98a, 0x44b49555, 0x89692aaa,
+    0xc9a35315, 0x4837a06b, 0x906f40d6, 0xfbaf87ed, 0x2c2e099b,
+    0x585c1336, 0xb0b8266c, 0xba014a99, 0xaf739373, 0x859620a7,
+    0xd05d470f, 0x7bcb885f, 0xf79710be, 0x345f273d, 0x68be4e7a,
+    0xd17c9cf4, 0x79883fa9, 0xf3107f52, 0x3d51f8e5, 0x7aa3f1ca,
+    0xf547e394, 0x31fec169
+  },
+  {
+    0xbce15202, 0xa2b3a245, 0x9e1642cb, 0xe75d83d7, 0x15ca01ef,
+    0x2b9403de, 0x572807bc, 0xae500f78, 0x87d118b1, 0xd4d33723,
+    0x72d76807, 0xe5aed00e, 0x102ca65d, 0x20594cba, 0x40b29974,
+    0x816532e8, 0xd9bb6391, 0x6807c163, 0xd00f82c6, 0x7b6e03cd,
+    0xf6dc079a, 0x36c90975, 0x6d9212ea, 0xdb2425d4, 0x6d394de9,
+    0xda729bd2, 0x6f9431e5, 0xdf2863ca, 0x6521c1d5, 0xca4383aa,
+    0x4ff60115, 0x9fec022a
+  },
+  {
+    0xff08e5ef, 0x2560cd9f, 0x4ac19b3e, 0x9583367c, 0xf0776ab9,
+    0x3b9fd333, 0x773fa666, 0xee7f4ccc, 0x078f9fd9, 0x0f1f3fb2,
+    0x1e3e7f64, 0x3c7cfec8, 0x78f9fd90, 0xf1f3fb20, 0x3896f001,
+    0x712de002, 0xe25bc004, 0x1fc68649, 0x3f8d0c92, 0x7f1a1924,
+    0xfe343248, 0x271962d1, 0x4e32c5a2, 0x9c658b44, 0xe3ba10c9,
+    0x1c0527d3, 0x380a4fa6, 0x70149f4c, 0xe0293e98, 0x1b237b71,
+    0x3646f6e2, 0x6c8dedc4
+  },
+  {
+    0x6f76172e, 0xdeec2e5c, 0x66a95af9, 0xcd52b5f2, 0x41d46da5,
+    0x83a8db4a, 0xdc20b0d5, 0x633067eb, 0xc660cfd6, 0x57b099ed,
+    0xaf6133da, 0x85b361f5, 0xd017c5ab, 0x7b5e8d17, 0xf6bd1a2e,
+    0x360b321d, 0x6c16643a, 0xd82cc874, 0x6b2896a9, 0xd6512d52,
+    0x77d35ce5, 0xefa6b9ca, 0x043c75d5, 0x0878ebaa, 0x10f1d754,
+    0x21e3aea8, 0x43c75d50, 0x878ebaa0, 0xd46c7301, 0x73a9e043,
+    0xe753c086, 0x15d6874d
+  },
+  {
+    0x56f5cab9, 0xadeb9572, 0x80a62ca5, 0xda3d5f0b, 0x6f0bb857,
+    0xde1770ae, 0x675fe71d, 0xcebfce3a, 0x460e9a35, 0x8c1d346a,
+    0xc34b6e95, 0x5de7db6b, 0xbbcfb6d6, 0xacee6bed, 0x82add19b,
+    0xde2aa577, 0x67244caf, 0xce48995e, 0x47e034fd, 0x8fc069fa,
+    0xc4f1d5b5, 0x5292ad2b, 0xa5255a56, 0x913bb2ed, 0xf906639b,
+    0x297dc177, 0x52fb82ee, 0xa5f705dc, 0x909f0df9, 0xfa4f1db3,
+    0x2fef3d27, 0x5fde7a4e
+  },
+  {
+    0x385993ac, 0x70b32758, 0xe1664eb0, 0x19bd9b21, 0x337b3642,
+    0x66f66c84, 0xcdecd908, 0x40a8b451, 0x815168a2, 0xd9d3d705,
+    0x68d6a84b, 0xd1ad5096, 0x782ba76d, 0xf0574eda, 0x3bdf9bf5,
+    0x77bf37ea, 0xef7e6fd4, 0x058dd9e9, 0x0b1bb3d2, 0x163767a4,
+    0x2c6ecf48, 0x58dd9e90, 0xb1bb3d20, 0xb8077c01, 0xab7ffe43,
+    0x8d8efac7, 0xc06cf3cf, 0x5ba8e1df, 0xb751c3be, 0xb5d2813d,
+    0xb0d4043b, 0xbad90e37
+  },
+  {
+    0xb4247b20, 0xb339f001, 0xbd02e643, 0xa174cac7, 0x999893cf,
+    0xe84021df, 0x0bf145ff, 0x17e28bfe, 0x2fc517fc, 0x5f8a2ff8,
+    0xbf145ff0, 0xa559b9a1, 0x91c27503, 0xf8f5ec47, 0x2a9adecf,
+    0x5535bd9e, 0xaa6b7b3c, 0x8fa7f039, 0xc43ee633, 0x530cca27,
+    0xa619944e, 0x97422edd, 0xf5f55bfb, 0x309bb1b7, 0x6137636e,
+    0xc26ec6dc, 0x5fac8bf9, 0xbf5917f2, 0xa5c329a5, 0x90f7550b,
+    0xfa9fac57, 0x2e4e5eef
+  },
+  {
+    0x695186a7, 0xd2a30d4e, 0x7e371cdd, 0xfc6e39ba, 0x23ad7535,
+    0x475aea6a, 0x8eb5d4d4, 0xc61aafe9, 0x57445993, 0xae88b326,
+    0x8660600d, 0xd7b1c65b, 0x74128af7, 0xe82515ee, 0x0b3b2d9d,
+    0x16765b3a, 0x2cecb674, 0x59d96ce8, 0xb3b2d9d0, 0xbc14b5e1,
+    0xa3586d83, 0x9dc1dd47, 0xe0f2bccf, 0x1a947fdf, 0x3528ffbe,
+    0x6a51ff7c, 0xd4a3fef8, 0x7236fbb1, 0xe46df762, 0x13aae885,
+    0x2755d10a, 0x4eaba214
+  },
+  {
+    0x66bc001e, 0xcd78003c, 0x41810639, 0x83020c72, 0xdd751ea5,
+    0x619b3b0b, 0xc3367616, 0x5d1dea6d, 0xba3bd4da, 0xaf06aff5,
+    0x857c59ab, 0xd189b517, 0x78626c6f, 0xf0c4d8de, 0x3af8b7fd,
+    0x75f16ffa, 0xebe2dff4, 0x0cb4b9a9, 0x19697352, 0x32d2e6a4,
+    0x65a5cd48, 0xcb4b9a90, 0x4de63361, 0x9bcc66c2, 0xece9cbc5,
+    0x02a291cb, 0x05452396, 0x0a8a472c, 0x15148e58, 0x2a291cb0,
+    0x54523960, 0xa8a472c0
+  },
+  {
+    0xb58b27b3, 0xb0674927, 0xbbbf940f, 0xac0e2e5f, 0x836d5aff,
+    0xddabb3bf, 0x6026613f, 0xc04cc27e, 0x5be882bd, 0xb7d1057a,
+    0xb4d30cb5, 0xb2d71f2b, 0xbedf3817, 0xa6cf766f, 0x96efea9f,
+    0xf6aed37f, 0x362ca0bf, 0x6c59417e, 0xd8b282fc, 0x6a1403b9,
+    0xd4280772, 0x732108a5, 0xe642114a, 0x17f524d5, 0x2fea49aa,
+    0x5fd49354, 0xbfa926a8, 0xa4234b11, 0x93379063, 0xfd1e2687,
+    0x214d4b4f, 0x429a969e
+  },
+  {
+    0xfe273162, 0x273f6485, 0x4e7ec90a, 0x9cfd9214, 0xe28a2269,
+    0x1e654293, 0x3cca8526, 0x79950a4c, 0xf32a1498, 0x3d252f71,
+    0x7a4a5ee2, 0xf494bdc4, 0x32587dc9, 0x64b0fb92, 0xc961f724,
+    0x49b2e809, 0x9365d012, 0xfdbaa665, 0x20044a8b, 0x40089516,
+    0x80112a2c, 0xdb535219, 0x6dd7a273, 0xdbaf44e6, 0x6c2f8f8d,
+    0xd85f1f1a, 0x6bcf3875, 0xd79e70ea, 0x744de795, 0xe89bcf2a,
+    0x0a469815, 0x148d302a
+  },
+  {
+    0xd3c98813, 0x7ce21667, 0xf9c42cce, 0x28f95fdd, 0x51f2bfba,
+    0xa3e57f74, 0x9cbbf8a9, 0xe206f713, 0x1f7ce867, 0x3ef9d0ce,
+    0x7df3a19c, 0xfbe74338, 0x2cbf8031, 0x597f0062, 0xb2fe00c4,
+    0xbe8d07c9, 0xa66b09d3, 0x97a715e7, 0xf43f2d8f, 0x330f5d5f,
+    0x661ebabe, 0xcc3d757c, 0x430becb9, 0x8617d972, 0xd75eb4a5,
+    0x75cc6f0b, 0xeb98de16, 0x0c40ba6d, 0x188174da, 0x3102e9b4,
+    0x6205d368, 0xc40ba6d0
+  },
+  {
+    0xf7d6deb4, 0x34dcbb29, 0x69b97652, 0xd372eca4, 0x7d94df09,
+    0xfb29be12, 0x2d227a65, 0x5a44f4ca, 0xb489e994, 0xb262d569,
+    0xbfb4ac93, 0xa4185f67, 0x9341b88f, 0xfdf2775f, 0x2095e8ff,
+    0x412bd1fe, 0x8257a3fc, 0xdfde41b9, 0x64cd8533, 0xc99b0a66,
+    0x4847128d, 0x908e251a, 0xfa6d4c75, 0x2fab9eab, 0x5f573d56,
+    0xbeae7aac, 0xa62df319, 0x972ae073, 0xf524c6a7, 0x31388b0f,
+    0x6271161e, 0xc4e22c3c
+  },
+  {
+    0xedb88320, 0x00000001, 0x00000002, 0x00000004, 0x00000008,
+    0x00000010, 0x00000020, 0x00000040, 0x00000080, 0x00000100,
+    0x00000200, 0x00000400, 0x00000800, 0x00001000, 0x00002000,
+    0x00004000, 0x00008000, 0x00010000, 0x00020000, 0x00040000,
+    0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000,
+    0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000,
+    0x20000000, 0x40000000
+  },
+  {
+    0x76dc4190, 0xedb88320, 0x00000001, 0x00000002, 0x00000004,
+    0x00000008, 0x00000010, 0x00000020, 0x00000040, 0x00000080,
+    0x00000100, 0x00000200, 0x00000400, 0x00000800, 0x00001000,
+    0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000,
+    0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000,
+    0x00800000, 0x01000000, 0x02000000, 0x04000000, 0x08000000,
+    0x10000000, 0x20000000
+  },
+  {
+    0x1db71064, 0x3b6e20c8, 0x76dc4190, 0xedb88320, 0x00000001,
+    0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020,
+    0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400,
+    0x00000800, 0x00001000, 0x00002000, 0x00004000, 0x00008000,
+    0x00010000, 0x00020000, 0x00040000, 0x00080000, 0x00100000,
+    0x00200000, 0x00400000, 0x00800000, 0x01000000, 0x02000000,
+    0x04000000, 0x08000000
+  }
+};
+
+#endif /* CRC32_COMB_TBL_H_ */
--- a/libs/zlibng/crc32_p.h
+++ b/libs/zlibng/crc32_p.h
@ -0,0 +1,19 @@
+#ifndef CRC32_P_H_
+#define CRC32_P_H_
+
+#define GF2_DIM 32      /* dimension of GF(2) vectors (length of CRC) */
+
+
+static inline uint32_t gf2_matrix_times(const uint32_t *mat, uint32_t vec) {
+    uint32_t sum = 0;
+    while (vec) {
+        if (vec & 1)
+            sum ^= *mat;
+        vec >>= 1;
+        mat++;
+    }
+    return sum;
+}
+
+
+#endif /* CRC32_P_H_ */
--- a/libs/zlibng/crc32_tbl.h
+++ b/libs/zlibng/crc32_tbl.h
@ -1,8 +1,8 @@
-#ifndef CRC32_H_
-#define CRC32_H_
+#ifndef CRC32_TBL_H_
+#define CRC32_TBL_H_

-/* crc32.h -- tables for rapid CRC calculation
- * Generated automatically by crc32.c
+/* crc32_tbl.h -- tables for rapid CRC calculation
+ * Generated automatically by makecrct.c
 */

 static const uint32_t crc_table[8][256] =
@ -441,295 +441,4 @@ static const uint32_t crc_table[8][256] =
  }
 };

-static const uint32_t crc_comb[32][32] =
-{
-  {
-    0x77073096UL, 0xee0e612cUL, 0x076dc419UL, 0x0edb8832UL, 0x1db71064UL,
-    0x3b6e20c8UL, 0x76dc4190UL, 0xedb88320UL, 0x00000001UL, 0x00000002UL,
-    0x00000004UL, 0x00000008UL, 0x00000010UL, 0x00000020UL, 0x00000040UL,
-    0x00000080UL, 0x00000100UL, 0x00000200UL, 0x00000400UL, 0x00000800UL,
-    0x00001000UL, 0x00002000UL, 0x00004000UL, 0x00008000UL, 0x00010000UL,
-    0x00020000UL, 0x00040000UL, 0x00080000UL, 0x00100000UL, 0x00200000UL,
-    0x00400000UL, 0x00800000UL
-  },
-  {
-    0x191b3141UL, 0x32366282UL, 0x646cc504UL, 0xc8d98a08UL, 0x4ac21251UL,
-    0x958424a2UL, 0xf0794f05UL, 0x3b83984bUL, 0x77073096UL, 0xee0e612cUL,
-    0x076dc419UL, 0x0edb8832UL, 0x1db71064UL, 0x3b6e20c8UL, 0x76dc4190UL,
-    0xedb88320UL, 0x00000001UL, 0x00000002UL, 0x00000004UL, 0x00000008UL,
-    0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL, 0x00000100UL,
-    0x00000200UL, 0x00000400UL, 0x00000800UL, 0x00001000UL, 0x00002000UL,
-    0x00004000UL, 0x00008000UL
-  },
-  {
-    0xb8bc6765UL, 0xaa09c88bUL, 0x8f629757UL, 0xc5b428efUL, 0x5019579fUL,
-    0xa032af3eUL, 0x9b14583dUL, 0xed59b63bUL, 0x01c26a37UL, 0x0384d46eUL,
-    0x0709a8dcUL, 0x0e1351b8UL, 0x1c26a370UL, 0x384d46e0UL, 0x709a8dc0UL,
-    0xe1351b80UL, 0x191b3141UL, 0x32366282UL, 0x646cc504UL, 0xc8d98a08UL,
-    0x4ac21251UL, 0x958424a2UL, 0xf0794f05UL, 0x3b83984bUL, 0x77073096UL,
-    0xee0e612cUL, 0x076dc419UL, 0x0edb8832UL, 0x1db71064UL, 0x3b6e20c8UL,
-    0x76dc4190UL, 0xedb88320UL
-  },
-  {
-    0xccaa009eUL, 0x4225077dUL, 0x844a0efaUL, 0xd3e51bb5UL, 0x7cbb312bUL,
-    0xf9766256UL, 0x299dc2edUL, 0x533b85daUL, 0xa6770bb4UL, 0x979f1129UL,
-    0xf44f2413UL, 0x33ef4e67UL, 0x67de9cceUL, 0xcfbd399cUL, 0x440b7579UL,
-    0x8816eaf2UL, 0xcb5cd3a5UL, 0x4dc8a10bUL, 0x9b914216UL, 0xec53826dUL,
-    0x03d6029bUL, 0x07ac0536UL, 0x0f580a6cUL, 0x1eb014d8UL, 0x3d6029b0UL,
-    0x7ac05360UL, 0xf580a6c0UL, 0x30704bc1UL, 0x60e09782UL, 0xc1c12f04UL,
-    0x58f35849UL, 0xb1e6b092UL
-  },
-  {
-    0xae689191UL, 0x87a02563UL, 0xd4314c87UL, 0x73139f4fUL, 0xe6273e9eUL,
-    0x173f7b7dUL, 0x2e7ef6faUL, 0x5cfdedf4UL, 0xb9fbdbe8UL, 0xa886b191UL,
-    0x8a7c6563UL, 0xcf89cc87UL, 0x44629f4fUL, 0x88c53e9eUL, 0xcafb7b7dUL,
-    0x4e87f0bbUL, 0x9d0fe176UL, 0xe16ec4adUL, 0x19ac8f1bUL, 0x33591e36UL,
-    0x66b23c6cUL, 0xcd6478d8UL, 0x41b9f7f1UL, 0x8373efe2UL, 0xdd96d985UL,
-    0x605cb54bUL, 0xc0b96a96UL, 0x5a03d36dUL, 0xb407a6daUL, 0xb37e4bf5UL,
-    0xbd8d91abUL, 0xa06a2517UL
-  },
-  {
-    0xf1da05aaUL, 0x38c50d15UL, 0x718a1a2aUL, 0xe3143454UL, 0x1d596ee9UL,
-    0x3ab2ddd2UL, 0x7565bba4UL, 0xeacb7748UL, 0x0ee7e8d1UL, 0x1dcfd1a2UL,
-    0x3b9fa344UL, 0x773f4688UL, 0xee7e8d10UL, 0x078c1c61UL, 0x0f1838c2UL,
-    0x1e307184UL, 0x3c60e308UL, 0x78c1c610UL, 0xf1838c20UL, 0x38761e01UL,
-    0x70ec3c02UL, 0xe1d87804UL, 0x18c1f649UL, 0x3183ec92UL, 0x6307d924UL,
-    0xc60fb248UL, 0x576e62d1UL, 0xaedcc5a2UL, 0x86c88d05UL, 0xd6e01c4bUL,
-    0x76b13ed7UL, 0xed627daeUL
-  },
-  {
-    0x8f352d95UL, 0xc51b5d6bUL, 0x5147bc97UL, 0xa28f792eUL, 0x9e6ff41dUL,
-    0xe7aeee7bUL, 0x142cdab7UL, 0x2859b56eUL, 0x50b36adcUL, 0xa166d5b8UL,
-    0x99bcad31UL, 0xe8085c23UL, 0x0b61be07UL, 0x16c37c0eUL, 0x2d86f81cUL,
-    0x5b0df038UL, 0xb61be070UL, 0xb746c6a1UL, 0xb5fc8b03UL, 0xb0881047UL,
-    0xba6126cfUL, 0xafb34bdfUL, 0x841791ffUL, 0xd35e25bfUL, 0x7dcd4d3fUL,
-    0xfb9a9a7eUL, 0x2c4432bdUL, 0x5888657aUL, 0xb110caf4UL, 0xb95093a9UL,
-    0xa9d02113UL, 0x88d14467UL
-  },
-  {
-    0x33fff533UL, 0x67ffea66UL, 0xcfffd4ccUL, 0x448eafd9UL, 0x891d5fb2UL,
-    0xc94bb925UL, 0x49e6740bUL, 0x93cce816UL, 0xfce8d66dUL, 0x22a0aa9bUL,
-    0x45415536UL, 0x8a82aa6cUL, 0xce745299UL, 0x4799a373UL, 0x8f3346e6UL,
-    0xc5178b8dUL, 0x515e115bUL, 0xa2bc22b6UL, 0x9e09432dUL, 0xe763801bUL,
-    0x15b60677UL, 0x2b6c0ceeUL, 0x56d819dcUL, 0xadb033b8UL, 0x80116131UL,
-    0xdb53c423UL, 0x6dd68e07UL, 0xdbad1c0eUL, 0x6c2b3e5dUL, 0xd8567cbaUL,
-    0x6bddff35UL, 0xd7bbfe6aUL
-  },
-  {
-    0xce3371cbUL, 0x4717e5d7UL, 0x8e2fcbaeUL, 0xc72e911dUL, 0x552c247bUL,
-    0xaa5848f6UL, 0x8fc197adUL, 0xc4f2291bUL, 0x52955477UL, 0xa52aa8eeUL,
-    0x9124579dUL, 0xf939a97bUL, 0x290254b7UL, 0x5204a96eUL, 0xa40952dcUL,
-    0x9363a3f9UL, 0xfdb641b3UL, 0x201d8527UL, 0x403b0a4eUL, 0x8076149cUL,
-    0xdb9d2f79UL, 0x6c4b58b3UL, 0xd896b166UL, 0x6a5c648dUL, 0xd4b8c91aUL,
-    0x72009475UL, 0xe40128eaUL, 0x13735795UL, 0x26e6af2aUL, 0x4dcd5e54UL,
-    0x9b9abca8UL, 0xec447f11UL
-  },
-  {
-    0x1072db28UL, 0x20e5b650UL, 0x41cb6ca0UL, 0x8396d940UL, 0xdc5cb4c1UL,
-    0x63c86fc3UL, 0xc790df86UL, 0x5450b94dUL, 0xa8a1729aUL, 0x8a33e375UL,
-    0xcf16c0abUL, 0x455c8717UL, 0x8ab90e2eUL, 0xce031a1dUL, 0x4777327bUL,
-    0x8eee64f6UL, 0xc6adcfadUL, 0x562a991bUL, 0xac553236UL, 0x83db622dUL,
-    0xdcc7c21bUL, 0x62fe8277UL, 0xc5fd04eeUL, 0x508b0f9dUL, 0xa1161f3aUL,
-    0x995d3835UL, 0xe9cb762bUL, 0x08e7ea17UL, 0x11cfd42eUL, 0x239fa85cUL,
-    0x473f50b8UL, 0x8e7ea170UL
-  },
-  {
-    0xf891f16fUL, 0x2a52e49fUL, 0x54a5c93eUL, 0xa94b927cUL, 0x89e622b9UL,
-    0xc8bd4333UL, 0x4a0b8027UL, 0x9417004eUL, 0xf35f06ddUL, 0x3dcf0bfbUL,
-    0x7b9e17f6UL, 0xf73c2fecUL, 0x35095999UL, 0x6a12b332UL, 0xd4256664UL,
-    0x733bca89UL, 0xe6779512UL, 0x179e2c65UL, 0x2f3c58caUL, 0x5e78b194UL,
-    0xbcf16328UL, 0xa293c011UL, 0x9e568663UL, 0xe7dc0a87UL, 0x14c9134fUL,
-    0x2992269eUL, 0x53244d3cUL, 0xa6489a78UL, 0x97e032b1UL, 0xf4b16323UL,
-    0x3213c007UL, 0x6427800eUL
-  },
-  {
-    0x88b6ba63UL, 0xca1c7287UL, 0x4f49e34fUL, 0x9e93c69eUL, 0xe6568b7dUL,
-    0x17dc10bbUL, 0x2fb82176UL, 0x5f7042ecUL, 0xbee085d8UL, 0xa6b00df1UL,
-    0x96111da3UL, 0xf7533d07UL, 0x35d77c4fUL, 0x6baef89eUL, 0xd75df13cUL,
-    0x75cae439UL, 0xeb95c872UL, 0x0c5a96a5UL, 0x18b52d4aUL, 0x316a5a94UL,
-    0x62d4b528UL, 0xc5a96a50UL, 0x5023d2e1UL, 0xa047a5c2UL, 0x9bfe4dc5UL,
-    0xec8d9dcbUL, 0x026a3dd7UL, 0x04d47baeUL, 0x09a8f75cUL, 0x1351eeb8UL,
-    0x26a3dd70UL, 0x4d47bae0UL
-  },
-  {
-    0x5ad8a92cUL, 0xb5b15258UL, 0xb013a2f1UL, 0xbb5643a3UL, 0xaddd8107UL,
-    0x80ca044fUL, 0xdae50edfUL, 0x6ebb1bffUL, 0xdd7637feUL, 0x619d69bdUL,
-    0xc33ad37aUL, 0x5d04a0b5UL, 0xba09416aUL, 0xaf638495UL, 0x85b60f6bUL,
-    0xd01d1897UL, 0x7b4b376fUL, 0xf6966edeUL, 0x365ddbfdUL, 0x6cbbb7faUL,
-    0xd9776ff4UL, 0x699fd9a9UL, 0xd33fb352UL, 0x7d0e60e5UL, 0xfa1cc1caUL,
-    0x2f4885d5UL, 0x5e910baaUL, 0xbd221754UL, 0xa13528e9UL, 0x991b5793UL,
-    0xe947a967UL, 0x09fe548fUL
-  },
-  {
-    0xb566f6e2UL, 0xb1bceb85UL, 0xb808d14bUL, 0xab60a4d7UL, 0x8db04fefUL,
-    0xc011999fUL, 0x5b52357fUL, 0xb6a46afeUL, 0xb639d3bdUL, 0xb702a13bUL,
-    0xb5744437UL, 0xb1998e2fUL, 0xb8421a1fUL, 0xabf5327fUL, 0x8c9b62bfUL,
-    0xc247c33fUL, 0x5ffe803fUL, 0xbffd007eUL, 0xa48b06bdUL, 0x92670b3bUL,
-    0xffbf1037UL, 0x240f262fUL, 0x481e4c5eUL, 0x903c98bcUL, 0xfb083739UL,
-    0x2d616833UL, 0x5ac2d066UL, 0xb585a0ccUL, 0xb07a47d9UL, 0xbb8589f3UL,
-    0xac7a15a7UL, 0x83852d0fUL
-  },
-  {
-    0x9d9129bfUL, 0xe053553fUL, 0x1bd7ac3fUL, 0x37af587eUL, 0x6f5eb0fcUL,
-    0xdebd61f8UL, 0x660bc5b1UL, 0xcc178b62UL, 0x435e1085UL, 0x86bc210aUL,
-    0xd6094455UL, 0x77638eebUL, 0xeec71dd6UL, 0x06ff3dedUL, 0x0dfe7bdaUL,
-    0x1bfcf7b4UL, 0x37f9ef68UL, 0x6ff3ded0UL, 0xdfe7bda0UL, 0x64be7d01UL,
-    0xc97cfa02UL, 0x4988f245UL, 0x9311e48aUL, 0xfd52cf55UL, 0x21d498ebUL,
-    0x43a931d6UL, 0x875263acUL, 0xd5d5c119UL, 0x70da8473UL, 0xe1b508e6UL,
-    0x181b178dUL, 0x30362f1aUL
-  },
-  {
-    0x2ee43a2cUL, 0x5dc87458UL, 0xbb90e8b0UL, 0xac50d721UL, 0x83d0a803UL,
-    0xdcd05647UL, 0x62d1aacfUL, 0xc5a3559eUL, 0x5037ad7dUL, 0xa06f5afaUL,
-    0x9bafb3b5UL, 0xec2e612bUL, 0x032dc417UL, 0x065b882eUL, 0x0cb7105cUL,
-    0x196e20b8UL, 0x32dc4170UL, 0x65b882e0UL, 0xcb7105c0UL, 0x4d930dc1UL,
-    0x9b261b82UL, 0xed3d3145UL, 0x010b64cbUL, 0x0216c996UL, 0x042d932cUL,
-    0x085b2658UL, 0x10b64cb0UL, 0x216c9960UL, 0x42d932c0UL, 0x85b26580UL,
-    0xd015cd41UL, 0x7b5a9cc3UL
-  },
-  {
-    0x1b4511eeUL, 0x368a23dcUL, 0x6d1447b8UL, 0xda288f70UL, 0x6f2018a1UL,
-    0xde403142UL, 0x67f164c5UL, 0xcfe2c98aUL, 0x44b49555UL, 0x89692aaaUL,
-    0xc9a35315UL, 0x4837a06bUL, 0x906f40d6UL, 0xfbaf87edUL, 0x2c2e099bUL,
-    0x585c1336UL, 0xb0b8266cUL, 0xba014a99UL, 0xaf739373UL, 0x859620a7UL,
-    0xd05d470fUL, 0x7bcb885fUL, 0xf79710beUL, 0x345f273dUL, 0x68be4e7aUL,
-    0xd17c9cf4UL, 0x79883fa9UL, 0xf3107f52UL, 0x3d51f8e5UL, 0x7aa3f1caUL,
-    0xf547e394UL, 0x31fec169UL
-  },
-  {
-    0xbce15202UL, 0xa2b3a245UL, 0x9e1642cbUL, 0xe75d83d7UL, 0x15ca01efUL,
-    0x2b9403deUL, 0x572807bcUL, 0xae500f78UL, 0x87d118b1UL, 0xd4d33723UL,
-    0x72d76807UL, 0xe5aed00eUL, 0x102ca65dUL, 0x20594cbaUL, 0x40b29974UL,
-    0x816532e8UL, 0xd9bb6391UL, 0x6807c163UL, 0xd00f82c6UL, 0x7b6e03cdUL,
-    0xf6dc079aUL, 0x36c90975UL, 0x6d9212eaUL, 0xdb2425d4UL, 0x6d394de9UL,
-    0xda729bd2UL, 0x6f9431e5UL, 0xdf2863caUL, 0x6521c1d5UL, 0xca4383aaUL,
-    0x4ff60115UL, 0x9fec022aUL
-  },
-  {
-    0xff08e5efUL, 0x2560cd9fUL, 0x4ac19b3eUL, 0x9583367cUL, 0xf0776ab9UL,
-    0x3b9fd333UL, 0x773fa666UL, 0xee7f4cccUL, 0x078f9fd9UL, 0x0f1f3fb2UL,
-    0x1e3e7f64UL, 0x3c7cfec8UL, 0x78f9fd90UL, 0xf1f3fb20UL, 0x3896f001UL,
-    0x712de002UL, 0xe25bc004UL, 0x1fc68649UL, 0x3f8d0c92UL, 0x7f1a1924UL,
-    0xfe343248UL, 0x271962d1UL, 0x4e32c5a2UL, 0x9c658b44UL, 0xe3ba10c9UL,
-    0x1c0527d3UL, 0x380a4fa6UL, 0x70149f4cUL, 0xe0293e98UL, 0x1b237b71UL,
-    0x3646f6e2UL, 0x6c8dedc4UL
-  },
-  {
-    0x6f76172eUL, 0xdeec2e5cUL, 0x66a95af9UL, 0xcd52b5f2UL, 0x41d46da5UL,
-    0x83a8db4aUL, 0xdc20b0d5UL, 0x633067ebUL, 0xc660cfd6UL, 0x57b099edUL,
-    0xaf6133daUL, 0x85b361f5UL, 0xd017c5abUL, 0x7b5e8d17UL, 0xf6bd1a2eUL,
-    0x360b321dUL, 0x6c16643aUL, 0xd82cc874UL, 0x6b2896a9UL, 0xd6512d52UL,
-    0x77d35ce5UL, 0xefa6b9caUL, 0x043c75d5UL, 0x0878ebaaUL, 0x10f1d754UL,
-    0x21e3aea8UL, 0x43c75d50UL, 0x878ebaa0UL, 0xd46c7301UL, 0x73a9e043UL,
-    0xe753c086UL, 0x15d6874dUL
-  },
-  {
-    0x56f5cab9UL, 0xadeb9572UL, 0x80a62ca5UL, 0xda3d5f0bUL, 0x6f0bb857UL,
-    0xde1770aeUL, 0x675fe71dUL, 0xcebfce3aUL, 0x460e9a35UL, 0x8c1d346aUL,
-    0xc34b6e95UL, 0x5de7db6bUL, 0xbbcfb6d6UL, 0xacee6bedUL, 0x82add19bUL,
-    0xde2aa577UL, 0x67244cafUL, 0xce48995eUL, 0x47e034fdUL, 0x8fc069faUL,
-    0xc4f1d5b5UL, 0x5292ad2bUL, 0xa5255a56UL, 0x913bb2edUL, 0xf906639bUL,
-    0x297dc177UL, 0x52fb82eeUL, 0xa5f705dcUL, 0x909f0df9UL, 0xfa4f1db3UL,
-    0x2fef3d27UL, 0x5fde7a4eUL
-  },
-  {
-    0x385993acUL, 0x70b32758UL, 0xe1664eb0UL, 0x19bd9b21UL, 0x337b3642UL,
-    0x66f66c84UL, 0xcdecd908UL, 0x40a8b451UL, 0x815168a2UL, 0xd9d3d705UL,
-    0x68d6a84bUL, 0xd1ad5096UL, 0x782ba76dUL, 0xf0574edaUL, 0x3bdf9bf5UL,
-    0x77bf37eaUL, 0xef7e6fd4UL, 0x058dd9e9UL, 0x0b1bb3d2UL, 0x163767a4UL,
-    0x2c6ecf48UL, 0x58dd9e90UL, 0xb1bb3d20UL, 0xb8077c01UL, 0xab7ffe43UL,
-    0x8d8efac7UL, 0xc06cf3cfUL, 0x5ba8e1dfUL, 0xb751c3beUL, 0xb5d2813dUL,
-    0xb0d4043bUL, 0xbad90e37UL
-  },
-  {
-    0xb4247b20UL, 0xb339f001UL, 0xbd02e643UL, 0xa174cac7UL, 0x999893cfUL,
-    0xe84021dfUL, 0x0bf145ffUL, 0x17e28bfeUL, 0x2fc517fcUL, 0x5f8a2ff8UL,
-    0xbf145ff0UL, 0xa559b9a1UL, 0x91c27503UL, 0xf8f5ec47UL, 0x2a9adecfUL,
-    0x5535bd9eUL, 0xaa6b7b3cUL, 0x8fa7f039UL, 0xc43ee633UL, 0x530cca27UL,
-    0xa619944eUL, 0x97422eddUL, 0xf5f55bfbUL, 0x309bb1b7UL, 0x6137636eUL,
-    0xc26ec6dcUL, 0x5fac8bf9UL, 0xbf5917f2UL, 0xa5c329a5UL, 0x90f7550bUL,
-    0xfa9fac57UL, 0x2e4e5eefUL
-  },
-  {
-    0x695186a7UL, 0xd2a30d4eUL, 0x7e371cddUL, 0xfc6e39baUL, 0x23ad7535UL,
-    0x475aea6aUL, 0x8eb5d4d4UL, 0xc61aafe9UL, 0x57445993UL, 0xae88b326UL,
-    0x8660600dUL, 0xd7b1c65bUL, 0x74128af7UL, 0xe82515eeUL, 0x0b3b2d9dUL,
-    0x16765b3aUL, 0x2cecb674UL, 0x59d96ce8UL, 0xb3b2d9d0UL, 0xbc14b5e1UL,
-    0xa3586d83UL, 0x9dc1dd47UL, 0xe0f2bccfUL, 0x1a947fdfUL, 0x3528ffbeUL,
-    0x6a51ff7cUL, 0xd4a3fef8UL, 0x7236fbb1UL, 0xe46df762UL, 0x13aae885UL,
-    0x2755d10aUL, 0x4eaba214UL
-  },
-  {
-    0x66bc001eUL, 0xcd78003cUL, 0x41810639UL, 0x83020c72UL, 0xdd751ea5UL,
-    0x619b3b0bUL, 0xc3367616UL, 0x5d1dea6dUL, 0xba3bd4daUL, 0xaf06aff5UL,
-    0x857c59abUL, 0xd189b517UL, 0x78626c6fUL, 0xf0c4d8deUL, 0x3af8b7fdUL,
-    0x75f16ffaUL, 0xebe2dff4UL, 0x0cb4b9a9UL, 0x19697352UL, 0x32d2e6a4UL,
-    0x65a5cd48UL, 0xcb4b9a90UL, 0x4de63361UL, 0x9bcc66c2UL, 0xece9cbc5UL,
-    0x02a291cbUL, 0x05452396UL, 0x0a8a472cUL, 0x15148e58UL, 0x2a291cb0UL,
-    0x54523960UL, 0xa8a472c0UL
-  },
-  {
-    0xb58b27b3UL, 0xb0674927UL, 0xbbbf940fUL, 0xac0e2e5fUL, 0x836d5affUL,
-    0xddabb3bfUL, 0x6026613fUL, 0xc04cc27eUL, 0x5be882bdUL, 0xb7d1057aUL,
-    0xb4d30cb5UL, 0xb2d71f2bUL, 0xbedf3817UL, 0xa6cf766fUL, 0x96efea9fUL,
-    0xf6aed37fUL, 0x362ca0bfUL, 0x6c59417eUL, 0xd8b282fcUL, 0x6a1403b9UL,
-    0xd4280772UL, 0x732108a5UL, 0xe642114aUL, 0x17f524d5UL, 0x2fea49aaUL,
-    0x5fd49354UL, 0xbfa926a8UL, 0xa4234b11UL, 0x93379063UL, 0xfd1e2687UL,
-    0x214d4b4fUL, 0x429a969eUL
-  },
-  {
-    0xfe273162UL, 0x273f6485UL, 0x4e7ec90aUL, 0x9cfd9214UL, 0xe28a2269UL,
-    0x1e654293UL, 0x3cca8526UL, 0x79950a4cUL, 0xf32a1498UL, 0x3d252f71UL,
-    0x7a4a5ee2UL, 0xf494bdc4UL, 0x32587dc9UL, 0x64b0fb92UL, 0xc961f724UL,
-    0x49b2e809UL, 0x9365d012UL, 0xfdbaa665UL, 0x20044a8bUL, 0x40089516UL,
-    0x80112a2cUL, 0xdb535219UL, 0x6dd7a273UL, 0xdbaf44e6UL, 0x6c2f8f8dUL,
-    0xd85f1f1aUL, 0x6bcf3875UL, 0xd79e70eaUL, 0x744de795UL, 0xe89bcf2aUL,
-    0x0a469815UL, 0x148d302aUL
-  },
-  {
-    0xd3c98813UL, 0x7ce21667UL, 0xf9c42cceUL, 0x28f95fddUL, 0x51f2bfbaUL,
-    0xa3e57f74UL, 0x9cbbf8a9UL, 0xe206f713UL, 0x1f7ce867UL, 0x3ef9d0ceUL,
-    0x7df3a19cUL, 0xfbe74338UL, 0x2cbf8031UL, 0x597f0062UL, 0xb2fe00c4UL,
-    0xbe8d07c9UL, 0xa66b09d3UL, 0x97a715e7UL, 0xf43f2d8fUL, 0x330f5d5fUL,
-    0x661ebabeUL, 0xcc3d757cUL, 0x430becb9UL, 0x8617d972UL, 0xd75eb4a5UL,
-    0x75cc6f0bUL, 0xeb98de16UL, 0x0c40ba6dUL, 0x188174daUL, 0x3102e9b4UL,
-    0x6205d368UL, 0xc40ba6d0UL
-  },
-  {
-    0xf7d6deb4UL, 0x34dcbb29UL, 0x69b97652UL, 0xd372eca4UL, 0x7d94df09UL,
-    0xfb29be12UL, 0x2d227a65UL, 0x5a44f4caUL, 0xb489e994UL, 0xb262d569UL,
-    0xbfb4ac93UL, 0xa4185f67UL, 0x9341b88fUL, 0xfdf2775fUL, 0x2095e8ffUL,
-    0x412bd1feUL, 0x8257a3fcUL, 0xdfde41b9UL, 0x64cd8533UL, 0xc99b0a66UL,
-    0x4847128dUL, 0x908e251aUL, 0xfa6d4c75UL, 0x2fab9eabUL, 0x5f573d56UL,
-    0xbeae7aacUL, 0xa62df319UL, 0x972ae073UL, 0xf524c6a7UL, 0x31388b0fUL,
-    0x6271161eUL, 0xc4e22c3cUL
-  },
-  {
-    0xedb88320UL, 0x00000001UL, 0x00000002UL, 0x00000004UL, 0x00000008UL,
-    0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL, 0x00000100UL,
-    0x00000200UL, 0x00000400UL, 0x00000800UL, 0x00001000UL, 0x00002000UL,
-    0x00004000UL, 0x00008000UL, 0x00010000UL, 0x00020000UL, 0x00040000UL,
-    0x00080000UL, 0x00100000UL, 0x00200000UL, 0x00400000UL, 0x00800000UL,
-    0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL, 0x10000000UL,
-    0x20000000UL, 0x40000000UL
-  },
-  {
-    0x76dc4190UL, 0xedb88320UL, 0x00000001UL, 0x00000002UL, 0x00000004UL,
-    0x00000008UL, 0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL,
-    0x00000100UL, 0x00000200UL, 0x00000400UL, 0x00000800UL, 0x00001000UL,
-    0x00002000UL, 0x00004000UL, 0x00008000UL, 0x00010000UL, 0x00020000UL,
-    0x00040000UL, 0x00080000UL, 0x00100000UL, 0x00200000UL, 0x00400000UL,
-    0x00800000UL, 0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL,
-    0x10000000UL, 0x20000000UL
-  },
-  {
-    0x1db71064UL, 0x3b6e20c8UL, 0x76dc4190UL, 0xedb88320UL, 0x00000001UL,
-    0x00000002UL, 0x00000004UL, 0x00000008UL, 0x00000010UL, 0x00000020UL,
-    0x00000040UL, 0x00000080UL, 0x00000100UL, 0x00000200UL, 0x00000400UL,
-    0x00000800UL, 0x00001000UL, 0x00002000UL, 0x00004000UL, 0x00008000UL,
-    0x00010000UL, 0x00020000UL, 0x00040000UL, 0x00080000UL, 0x00100000UL,
-    0x00200000UL, 0x00400000UL, 0x00800000UL, 0x01000000UL, 0x02000000UL,
-    0x04000000UL, 0x08000000UL
-  }
-};
-#endif /* CRC32_H_ */
+#endif /* CRC32_TBL_H_ */
--- a/libs/zlibng/deflate.c
+++ b/libs/zlibng/deflate.c
--- a/libs/zlibng/deflate.h
+++ b/libs/zlibng/deflate.h
@ -10,10 +10,8 @@
   subject to change. Applications should only use zlib.h.
 */

-/* @(#) $Id$ */
-
 #include "zutil.h"
-#include "gzendian.h"
+#include "zendian.h"

 /* define NO_GZIP when compiling if you want to disable gzip header and
   trailer creation by deflate().  NO_GZIP would be used to avoid linking in
@ -23,10 +21,6 @@
 #  define GZIP
 #endif

-#define NIL 0
-/* Tail of hash chains */
-
-
 /* ===========================================================================
 * Internal compression state.
 */
@ -52,7 +46,7 @@
 #define MAX_BITS 15
 /* All codes must not exceed MAX_BITS bits */

-#define Buf_size 16
+#define BIT_BUF_SIZE 64
 /* size of bit buffer in bi_buf */

 #define END_BLOCK 256
@ -70,6 +64,10 @@
 #define FINISH_STATE 666    /* stream complete */
 /* Stream status */

+#define HASH_BITS    16u           /* log2(HASH_SIZE) */
+#define HASH_SIZE 65536u           /* number of elements in hash table */
+#define HASH_MASK (HASH_SIZE - 1u) /* HASH_SIZE-1 */
+

 /* Data structure describing a single value and its code string. */
 typedef struct ct_data_s {
@ -97,34 +95,47 @@ typedef struct tree_desc_s {
 } tree_desc;

 typedef uint16_t Pos;
-typedef unsigned IPos;

 /* A Pos is an index in the character window. We use short instead of int to
- * save space in the various tables. IPos is used only for parameter passing.
+ * save space in the various tables.
 */

 typedef struct internal_state {
    PREFIX3(stream)      *strm;            /* pointer back to this zlib stream */
-    int                  status;           /* as the name implies */
    unsigned char        *pending_buf;     /* output still pending */
-    unsigned long        pending_buf_size; /* size of pending_buf */
    unsigned char        *pending_out;     /* next pending byte to output to the stream */
+    uint32_t             pending_buf_size; /* size of pending_buf */
    uint32_t             pending;          /* nb of bytes in the pending buffer */
    int                  wrap;             /* bit 0 true for zlib, bit 1 true for gzip */
-    PREFIX(gz_headerp)   gzhead;           /* gzip header information to write */
    uint32_t             gzindex;          /* where in extra, name, or comment */
-    unsigned char        method;           /* can only be DEFLATED */
+    PREFIX(gz_headerp)   gzhead;           /* gzip header information to write */
+    int                  status;           /* as the name implies */
    int                  last_flush;       /* value of flush param for previous deflate call */
+    int                  reproducible;     /* Whether reproducible compression results are required. */

-#ifdef X86_PCLMULQDQ_CRC
-    unsigned crc0[4 * 5];
-#endif
+    int block_open;
+    /* Whether or not a block is currently open for the QUICK deflation scheme.
+     * This is set to 1 if there is an active block, or 0 if the block was just closed.
+     */

                /* used by deflate.c: */

    unsigned int  w_size;            /* LZ77 window size (32K by default) */
    unsigned int  w_bits;            /* log2(w_size)  (8..16) */
    unsigned int  w_mask;            /* w_size - 1 */
+    unsigned int  lookahead;         /* number of valid bytes ahead in window */
+
+    unsigned int high_water;
+    /* High water mark offset in window for initialized bytes -- bytes above
+     * this are set to zero in order to avoid memory check warnings when
+     * longest match routines access bytes past the input.  This is then
+     * updated to the new high water mark.
+     */
+
+    unsigned int window_size;
+    /* Actual size of window: 2*wSize, except when the user input buffer
+     * is directly used as sliding window.
+     */

    unsigned char *window;
    /* Sliding window. Input bytes are read into the second half of the window,
@ -136,44 +147,24 @@ typedef struct internal_state {
     * To do: use the user input buffer as sliding window.
     */

-    unsigned long window_size;
-    /* Actual size of window: 2*wSize, except when the user input buffer
-     * is directly used as sliding window.
-     */
-
    Pos *prev;
    /* Link to older string with same hash index. To limit the size of this
     * array to 64K, this link is maintained only for the last 32K strings.
     * An index in this array is thus a window index modulo 32K.
     */

-    Pos *head; /* Heads of the hash chains or NIL. */
+    Pos *head; /* Heads of the hash chains or 0. */

-    unsigned int  ins_h;             /* hash index of string to be inserted */
-    unsigned int  hash_size;         /* number of elements in hash table */
-    unsigned int  hash_bits;         /* log2(hash_size) */
-    unsigned int  hash_mask;         /* hash_size-1 */
-
-    #if !defined(__x86_64__) && !defined(_M_X64) && !defined(__i386) && !defined(_M_IX86)
-    unsigned int  hash_shift;
-    #endif
-    /* Number of bits by which ins_h must be shifted at each input
-     * step. It must be such that after MIN_MATCH steps, the oldest
-     * byte no longer takes part in the hash key, that is:
-     *   hash_shift * MIN_MATCH >= hash_bits
-     */
-
-    long block_start;
+    int block_start;
    /* Window position at the beginning of the current output block. Gets
     * negative when the window is moved backwards.
     */

    unsigned int match_length;       /* length of best match */
-    IPos         prev_match;         /* previous match */
+    Pos          prev_match;         /* previous match */
    int          match_available;    /* set if previous match exists */
    unsigned int strstart;           /* start of string to insert */
    unsigned int match_start;        /* start of matching string */
-    unsigned int lookahead;          /* number of valid bytes ahead in window */

    unsigned int prev_length;
    /* Length of the best match at previous step. Matches not greater than this
@ -181,15 +172,13 @@ typedef struct internal_state {
     */

    unsigned int max_chain_length;
-    /* To speed up deflation, hash chains are never searched beyond this
-     * length.  A higher limit improves compression ratio but degrades the
-     * speed.
+    /* To speed up deflation, hash chains are never searched beyond this length.
+     * A higher limit improves compression ratio but degrades the speed.
     */

    unsigned int max_lazy_match;
-    /* Attempt to find a better match only when the current match is strictly
-     * smaller than this value. This mechanism is used only for compression
-     * levels >= 4.
+    /* Attempt to find a better match only when the current match is strictly smaller
+     * than this value. This mechanism is used only for compression levels >= 4.
     */
 #   define max_insert_length  max_lazy_match
    /* Insert new strings in the hash table only if the match length is not
@ -205,6 +194,11 @@ typedef struct internal_state {

    int nice_match; /* Stop searching when current match exceeds this */

+#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
+    /* Only used if X86_PCLMULQDQ_CRC is defined */
+    unsigned crc0[4 * 5];
+#endif
+
                /* used by trees.c: */
    /* Didn't use ct_data typedef below to suppress compiler warning */
    struct ct_data_s dyn_ltree[HEAP_SIZE];   /* literal and length tree */
@ -229,8 +223,6 @@ typedef struct internal_state {
    /* Depth of each subtree used as tie breaker for trees of equal frequency
     */

-    unsigned char *sym_buf;       /* buffer for distances and literals/lengths */
-
    unsigned int  lit_bufsize;
    /* Size of match buffer for literals/lengths.  There are 4 reasons for
     * limiting lit_bufsize to 64K:
@ -251,41 +243,31 @@ typedef struct internal_state {
     *   - I can't count above 4
     */

-    unsigned int sym_next;      /* running index in sym_buf */
-    unsigned int sym_end;       /* symbol table full when sym_next reaches this */
+    unsigned char *sym_buf;       /* buffer for distances and literals/lengths */
+    unsigned int sym_next;        /* running index in sym_buf */
+    unsigned int sym_end;         /* symbol table full when sym_next reaches this */

    unsigned long opt_len;        /* bit length of current block with optimal trees */
    unsigned long static_len;     /* bit length of current block with static trees */
    unsigned int matches;         /* number of string matches in current block */
    unsigned int insert;          /* bytes at end of window left to insert */

-#ifdef ZLIB_DEBUG
+    /* compressed_len and bits_sent are only used if ZLIB_DEBUG is defined */
    unsigned long compressed_len; /* total bit length of compressed file mod 2^32 */
    unsigned long bits_sent;      /* bit length of compressed data sent mod 2^32 */
-#endif

-    uint16_t bi_buf;
-    /* Output buffer. bits are inserted starting at the bottom (least
-     * significant bits).
-     */
-    int bi_valid;
-    /* Number of valid bits in bi_buf.  All bits above the last valid bit
-     * are always zero.
-     */
+    /* Reserved for future use and alignment purposes */
+    char *reserved_p;

-    unsigned long high_water;
-    /* High water mark offset in window for initialized bytes -- bytes above
-     * this are set to zero in order to avoid memory check warnings when
-     * longest match routines access bytes past the input.  This is then
-     * updated to the new high water mark.
-     */
-    int block_open;
-    /* Whether or not a block is currently open for the QUICK deflation scheme.
-     * This is set to 1 if there is an active block, or 0 if the block was just
-     * closed.
-     */
+    uint64_t bi_buf;
+    /* Output buffer. bits are inserted starting at the bottom (least significant bits). */

-} deflate_state;
+    int32_t bi_valid;
+    /* Number of valid bits in bi_buf.  All bits above the last valid bit are always zero. */
+
+    /* Reserved for future use and alignment purposes */
+    int32_t reserved[11];
+} ALIGNED_(8) deflate_state;

 typedef enum {
    need_more,      /* block not completed, need more input or more output */
@ -297,18 +279,88 @@ typedef enum {
 /* Output a byte on the stream.
 * IN assertion: there is enough room in pending_buf.
 */
-#define put_byte(s, c) {s->pending_buf[s->pending++] = (unsigned char)(c);}
+#define put_byte(s, c) { \
+    s->pending_buf[s->pending++] = (unsigned char)(c); \
+}

 /* ===========================================================================
 * Output a short LSB first on the stream.
- * IN assertion: there is enough room in pendingBuf.
+ * IN assertion: there is enough room in pending_buf.
 */
 static inline void put_short(deflate_state *s, uint16_t w) {
-#if BYTE_ORDER == BIG_ENDIAN
-  w = ZSWAP16(w);
+#if defined(UNALIGNED_OK)
+    *(uint16_t *)(&s->pending_buf[s->pending]) = w;
+    s->pending += 2;
+#else
+    put_byte(s, (w & 0xff));
+    put_byte(s, ((w >> 8) & 0xff));
+#endif
+}
+
+/* ===========================================================================
+ * Output a short MSB first on the stream.
+ * IN assertion: there is enough room in pending_buf.
+ */
+static inline void put_short_msb(deflate_state *s, uint16_t w) {
+    put_byte(s, ((w >> 8) & 0xff));
+    put_byte(s, (w & 0xff));
+}
+
+/* ===========================================================================
+ * Output a 32-bit unsigned int LSB first on the stream.
+ * IN assertion: there is enough room in pending_buf.
+ */
+static inline void put_uint32(deflate_state *s, uint32_t dw) {
+#if defined(UNALIGNED_OK)
+    *(uint32_t *)(&s->pending_buf[s->pending]) = dw;
+    s->pending += 4;
+#else
+    put_byte(s, (dw & 0xff));
+    put_byte(s, ((dw >> 8) & 0xff));
+    put_byte(s, ((dw >> 16) & 0xff));
+    put_byte(s, ((dw >> 24) & 0xff));
+#endif
+}
+
+/* ===========================================================================
+ * Output a 32-bit unsigned int MSB first on the stream.
+ * IN assertion: there is enough room in pending_buf.
+ */
+static inline void put_uint32_msb(deflate_state *s, uint32_t dw) {
+#if defined(UNALIGNED_OK)
+    *(uint32_t *)(&s->pending_buf[s->pending]) = ZSWAP32(dw);
+    s->pending += 4;
+#else
+    put_byte(s, ((dw >> 24) & 0xff));
+    put_byte(s, ((dw >> 16) & 0xff));
+    put_byte(s, ((dw >> 8) & 0xff));
+    put_byte(s, (dw & 0xff));
+#endif
+}
+
+/* ===========================================================================
+ * Output a 64-bit unsigned int LSB first on the stream.
+ * IN assertion: there is enough room in pending_buf.
+ */
+static inline void put_uint64(deflate_state *s, uint64_t lld) {
+#if defined(UNALIGNED64_OK)
+    *(uint64_t *)(&s->pending_buf[s->pending]) = lld;
+    s->pending += 8;
+#elif defined(UNALIGNED_OK)
+    *(uint32_t *)(&s->pending_buf[s->pending]) = lld & 0xffffffff;
+    s->pending += 4;
+    *(uint32_t *)(&s->pending_buf[s->pending]) = (lld >> 32) & 0xffffffff;
+    s->pending += 4;
+#else
+    put_byte(s, (lld & 0xff));
+    put_byte(s, ((lld >> 8) & 0xff));
+    put_byte(s, ((lld >> 16) & 0xff));
+    put_byte(s, ((lld >> 24) & 0xff));
+    put_byte(s, ((lld >> 32) & 0xff));
+    put_byte(s, ((lld >> 40) & 0xff));
+    put_byte(s, ((lld >> 48) & 0xff));
+    put_byte(s, ((lld >> 56) & 0xff));
 #endif
-  memcpy(&(s->pending_buf[s->pending]), &w, sizeof(uint16_t));
-  s->pending += 2;
 }

 #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
@ -326,120 +378,34 @@ static inline void put_short(deflate_state *s, uint16_t w) {
   memory checker errors from longest match routines */


-void ZLIB_INTERNAL fill_window_c(deflate_state *s);
+void Z_INTERNAL fill_window(deflate_state *s);
+void Z_INTERNAL slide_hash_c(deflate_state *s);

        /* in trees.c */
-void ZLIB_INTERNAL _tr_init(deflate_state *s);
-int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc);
-void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, char *buf, unsigned long stored_len, int last);
-void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s);
-void ZLIB_INTERNAL _tr_align(deflate_state *s);
-void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, char *buf, unsigned long stored_len, int last);
-void ZLIB_INTERNAL bi_windup(deflate_state *s);
-unsigned ZLIB_INTERNAL bi_reverse(unsigned code, int len);
-void ZLIB_INTERNAL flush_pending(PREFIX3(streamp) strm);
-
-#define d_code(dist) ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
+void Z_INTERNAL zng_tr_init(deflate_state *s);
+void Z_INTERNAL zng_tr_flush_block(deflate_state *s, char *buf, uint32_t stored_len, int last);
+void Z_INTERNAL zng_tr_flush_bits(deflate_state *s);
+void Z_INTERNAL zng_tr_align(deflate_state *s);
+void Z_INTERNAL zng_tr_stored_block(deflate_state *s, char *buf, uint32_t stored_len, int last);
+unsigned Z_INTERNAL bi_reverse(unsigned code, int len);
+void Z_INTERNAL flush_pending(PREFIX3(streamp) strm);
+#define d_code(dist) ((dist) < 256 ? zng_dist_code[dist] : zng_dist_code[256+((dist)>>7)])
 /* Mapping from a distance to a distance code. dist is the distance - 1 and
- * must not have side effects. _dist_code[256] and _dist_code[257] are never
+ * must not have side effects. zng_dist_code[256] and zng_dist_code[257] are never
 * used.
 */

-#ifndef ZLIB_DEBUG
-/* Inline versions of _tr_tally for speed: */
-
-# if defined(GEN_TREES_H)
-    extern unsigned char ZLIB_INTERNAL _length_code[];
-    extern unsigned char ZLIB_INTERNAL _dist_code[];
-# else
-    extern const unsigned char ZLIB_INTERNAL _length_code[];
-    extern const unsigned char ZLIB_INTERNAL _dist_code[];
-# endif
-
-# define _tr_tally_lit(s, c, flush) \
-  { unsigned char cc = (c); \
-    s->sym_buf[s->sym_next++] = 0; \
-    s->sym_buf[s->sym_next++] = 0; \
-    s->sym_buf[s->sym_next++] = cc; \
-    s->dyn_ltree[cc].Freq++; \
-    flush = (s->sym_next == s->sym_end); \
-  }
-# define _tr_tally_dist(s, distance, length, flush) \
-  { unsigned char len = (unsigned char)(length); \
-    uint16_t dist = (uint16_t)(distance); \
-    s->sym_buf[s->sym_next++] = dist; \
-    s->sym_buf[s->sym_next++] = dist >> 8; \
-    s->sym_buf[s->sym_next++] = len; \
-    dist--; \
-    s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
-    s->dyn_dtree[d_code(dist)].Freq++; \
-    flush = (s->sym_next == s->sym_end); \
-  }
-#else
-#   define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
-#   define _tr_tally_dist(s, distance, length, flush) \
-              flush = _tr_tally(s, (unsigned)(distance), (unsigned)(length))
-#endif
-
-/* ===========================================================================
- * Update a hash value with the given input byte
- * IN  assertion: all calls to to UPDATE_HASH are made with consecutive
- *    input characters, so that a running hash key can be computed from the
- *    previous key instead of complete recalculation each time.
- */
-
-#ifdef NOT_TWEAK_COMPILER
-#define TRIGGER_LEVEL 6
-#else
-#define TRIGGER_LEVEL 5
-#endif
-
-#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
-#define UPDATE_HASH(s, h, i) \
-    do {\
-        if (s->level < TRIGGER_LEVEL) \
-            h = (3483 * (s->window[i]) +\
-                 23081* (s->window[i+1]) +\
-                 6954 * (s->window[i+2]) +\
-                 20947* (s->window[i+3])) & s->hash_mask;\
-        else\
-            h = (25881* (s->window[i]) +\
-                 24674* (s->window[i+1]) +\
-                 25811* (s->window[i+2])) & s->hash_mask;\
-    } while (0)
-#else
-#   define UPDATE_HASH(s, h, i) (h = (((h) << s->hash_shift) ^ (s->window[i + (MIN_MATCH-1)])) & s->hash_mask)
-#endif
-
-#ifndef ZLIB_DEBUG
-#  define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
-/* Send a code of the given tree. c and tree must not have side effects */
-
-#else /* ZLIB_DEBUG */
-#  define send_code(s, c, tree) \
-    {  if (z_verbose > 2) { \
-           fprintf(stderr, "\ncd %3d ", (c)); \
-       } \
-       send_bits(s, tree[c].Code, tree[c].Len); \
-     }
-#endif
-
+/* Bit buffer and compress bits calculation debugging */
 #ifdef ZLIB_DEBUG
-void send_bits(deflate_state *s, int value, int length);
+#  define cmpr_bits_add(s, len)     s->compressed_len += (len)
+#  define cmpr_bits_align(s)        s->compressed_len = (s->compressed_len + 7) & ~7L
+#  define sent_bits_add(s, bits)    s->bits_sent += (bits)
+#  define sent_bits_align(s)        s->bits_sent = (s->bits_sent + 7) & ~7L
 #else
-#define send_bits(s, value, length) \
-{ int len = length;\
-  if (s->bi_valid > (int)Buf_size - len) {\
-    int val = (int)value;\
-    s->bi_buf |= (uint16_t)val << s->bi_valid;\
-    put_short(s, s->bi_buf);\
-    s->bi_buf = (uint16_t)val >> (Buf_size - s->bi_valid);\
-    s->bi_valid += len - Buf_size;\
-  } else {\
-    s->bi_buf |= (uint16_t)(value) << s->bi_valid;\
-    s->bi_valid += len;\
-  }\
-}
+#  define cmpr_bits_add(s, len)     (void)(len)
+#  define cmpr_bits_align(s)
+#  define sent_bits_add(s, bits)    (void)(bits)
+#  define sent_bits_align(s)
 #endif

 #endif /* DEFLATE_H_ */
--- a/libs/zlibng/deflate_fast.c
+++ b/libs/zlibng/deflate_fast.c
@ -7,7 +7,6 @@
 #include "zbuild.h"
 #include "deflate.h"
 #include "deflate_p.h"
-#include "match_p.h"
 #include "functable.h"

 /* ===========================================================================
@ -17,9 +16,11 @@
 * new strings in the dictionary only for unmatched strings or for short
 * matches. It is used only for the fast compression options.
 */
-ZLIB_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
-    IPos hash_head;       /* head of the hash chain */
-    int bflush;           /* set if current block must be flushed */
+Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
+    Pos hash_head;        /* head of the hash chain */
+    int bflush = 0;       /* set if current block must be flushed */
+    int64_t dist;
+    uint32_t match_len = 0;

    for (;;) {
        /* Make sure that we always have enough lookahead, except
@ -28,93 +29,78 @@ ZLIB_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
         * string following the next match.
         */
        if (s->lookahead < MIN_LOOKAHEAD) {
-            functable.fill_window(s);
-            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+            fill_window(s);
+            if (UNLIKELY(s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH)) {
                return need_more;
            }
-            if (s->lookahead == 0)
+            if (UNLIKELY(s->lookahead == 0))
                break; /* flush the current block */
        }

        /* Insert the string window[strstart .. strstart+2] in the
         * dictionary, and set hash_head to the head of the hash chain:
         */
-        hash_head = NIL;
        if (s->lookahead >= MIN_MATCH) {
-            hash_head = functable.insert_string(s, s->strstart, 1);
-        }
+            hash_head = functable.quick_insert_string(s, s->strstart);
+            dist = (int64_t)s->strstart - hash_head;

-        /* Find the longest match, discarding those <= prev_length.
-         * At this point we have always match_length < MIN_MATCH
-         */
-        if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) {
-            /* To simplify the code, we prevent matches with the string
-             * of window index 0 (in particular we have to avoid a match
-             * of the string with itself at the start of the input file).
+            /* Find the longest match, discarding those <= prev_length.
+             * At this point we have always match length < MIN_MATCH
             */
-            s->match_length = longest_match(s, hash_head);
-            /* longest_match() sets match_start */
+            
+            if (dist <= MAX_DIST(s) && dist > 0) {
+                /* To simplify the code, we prevent matches with the string
+                 * of window index 0 (in particular we have to avoid a match
+                 * of the string with itself at the start of the input file).
+                 */
+                match_len = functable.longest_match(s, hash_head);
+                /* longest_match() sets match_start */
+            }
        }
-        if (s->match_length >= MIN_MATCH) {
-            check_match(s, s->strstart, s->match_start, s->match_length);

-            _tr_tally_dist(s, s->strstart - s->match_start, s->match_length - MIN_MATCH, bflush);
+        if (match_len >= MIN_MATCH) {
+            check_match(s, s->strstart, s->match_start, match_len);

-            s->lookahead -= s->match_length;
+            bflush = zng_tr_tally_dist(s, s->strstart - s->match_start, match_len - MIN_MATCH);
+
+            s->lookahead -= match_len;

            /* Insert new strings in the hash table only if the match length
             * is not too large. This saves time but degrades compression.
             */
-            if (s->match_length <= s->max_insert_length && s->lookahead >= MIN_MATCH) {
-                s->match_length--; /* string at strstart already in table */
+            if (match_len <= s->max_insert_length && s->lookahead >= MIN_MATCH) {
+                match_len--; /* string at strstart already in table */
                s->strstart++;
-#ifdef NOT_TWEAK_COMPILER
-                do {
-                    functable.insert_string(s, s->strstart, 1);
-                    s->strstart++;
-                    /* strstart never exceeds WSIZE-MAX_MATCH, so there are
-                     * always MIN_MATCH bytes ahead.
-                     */
-                } while (--s->match_length != 0);
-#else
-                {
-                    functable.insert_string(s, s->strstart, s->match_length);
-                    s->strstart += s->match_length;
-                    s->match_length = 0;
-                }
-#endif
+
+                functable.insert_string(s, s->strstart, match_len);
+                s->strstart += match_len;
            } else {
-                s->strstart += s->match_length;
-                s->match_length = 0;
-                s->ins_h = s->window[s->strstart];
-#ifndef NOT_TWEAK_COMPILER
+                s->strstart += match_len;
+#if MIN_MATCH != 3
                functable.insert_string(s, s->strstart + 2 - MIN_MATCH, MIN_MATCH - 2);
 #else
-                functable.insert_string(s, s->strstart + 2 - MIN_MATCH, 1);
-#if MIN_MATCH != 3
-#warning        Call insert_string() MIN_MATCH-3 more times
-#endif
+                functable.quick_insert_string(s, s->strstart + 2 - MIN_MATCH);
 #endif
                /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
                 * matter since it will be recomputed at next deflate call.
                 */
            }
+            match_len = 0;
        } else {
            /* No match, output a literal byte */
-            Tracevv((stderr, "%c", s->window[s->strstart]));
-            _tr_tally_lit(s, s->window[s->strstart], bflush);
+            bflush = zng_tr_tally_lit(s, s->window[s->strstart]);
            s->lookahead--;
            s->strstart++;
        }
-        if (bflush)
+        if (UNLIKELY(bflush))
            FLUSH_BLOCK(s, 0);
    }
    s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
-    if (flush == Z_FINISH) {
+    if (UNLIKELY(flush == Z_FINISH)) {
        FLUSH_BLOCK(s, 1);
        return finish_done;
    }
-    if (s->sym_next)
+    if (UNLIKELY(s->sym_next))
        FLUSH_BLOCK(s, 0);
    return block_done;
 }
--- a/libs/zlibng/deflate_medium.c
+++ b/libs/zlibng/deflate_medium.c
@ -7,72 +7,50 @@
 * For conditions of distribution and use, see copyright notice in zlib.h
 */
 #ifndef NO_MEDIUM_STRATEGY
+#include <stdint.h>
 #include "zbuild.h"
 #include "deflate.h"
 #include "deflate_p.h"
-#include "match_p.h"
 #include "functable.h"

 struct match {
-    unsigned int match_start;
-    unsigned int match_length;
-    unsigned int strstart;
-    unsigned int orgstart;
+    uint16_t match_start;
+    uint16_t match_length;
+    uint16_t strstart;
+    uint16_t orgstart;
 };

-#define MAX_DIST2  ((1 << MAX_WBITS) - MIN_LOOKAHEAD)
-
-static int tr_tally_dist(deflate_state *s, int distance, int length) {
-    return _tr_tally(s, distance, length);
-}
-
-static int tr_tally_lit(deflate_state *s, int c) {
-    return  _tr_tally(s, 0, c);
-}
-
 static int emit_match(deflate_state *s, struct match match) {
-    int flush = 0;
+    int bflush = 0;

    /* matches that are not long enough we need to emit as literals */
    if (match.match_length < MIN_MATCH) {
        while (match.match_length) {
-            flush += tr_tally_lit(s, s->window[match.strstart]);
+            bflush += zng_tr_tally_lit(s, s->window[match.strstart]);
            s->lookahead--;
            match.strstart++;
            match.match_length--;
        }
-        return flush;
+        return bflush;
    }

    check_match(s, match.strstart, match.match_start, match.match_length);

-    flush += tr_tally_dist(s, match.strstart - match.match_start, match.match_length - MIN_MATCH);
+    bflush += zng_tr_tally_dist(s, match.strstart - match.match_start, match.match_length - MIN_MATCH);

    s->lookahead -= match.match_length;
-    return flush;
+    return bflush;
 }

 static void insert_match(deflate_state *s, struct match match) {
-    if (unlikely(s->lookahead <= match.match_length + MIN_MATCH))
+    if (UNLIKELY(s->lookahead <= (unsigned int)(match.match_length + MIN_MATCH)))
        return;

    /* matches that are not long enough we need to emit as literals */
-    if (match.match_length < MIN_MATCH) {
-#ifdef NOT_TWEAK_COMPILER
-        while (match.match_length) {
-            match.strstart++;
-            match.match_length--;
-
-            if (match.match_length) {
-                if (match.strstart >= match.orgstart) {
-                    functable.insert_string(s, match.strstart, 1);
-                }
-            }
-        }
-#else
+    if (LIKELY(match.match_length < MIN_MATCH)) {
        match.strstart++;
        match.match_length--;
-        if (match.match_length > 0) {
+        if (UNLIKELY(match.match_length > 0)) {
            if (match.strstart >= match.orgstart) {
                if (match.strstart + match.match_length - 1 >= match.orgstart) {
                    functable.insert_string(s, match.strstart, match.match_length);
@ -83,7 +61,6 @@ static void insert_match(deflate_state *s, struct match match) {
                match.match_length = 0;
            }
        }
-#endif
        return;
    }

@ -93,48 +70,35 @@ static void insert_match(deflate_state *s, struct match match) {
    if (match.match_length <= 16* s->max_insert_length && s->lookahead >= MIN_MATCH) {
        match.match_length--; /* string at strstart already in table */
        match.strstart++;
-#ifdef NOT_TWEAK_COMPILER
-        do {
-            if (likely(match.strstart >= match.orgstart)) {
-                functable.insert_string(s, match.strstart, 1);
-            }
-            match.strstart++;
-            /* strstart never exceeds WSIZE-MAX_MATCH, so there are
-             * always MIN_MATCH bytes ahead.
-             */
-        } while (--match.match_length != 0);
-#else
-        if (likely(match.strstart >= match.orgstart)) {
-            if (likely(match.strstart + match.match_length - 1 >= match.orgstart)) {
+
+        if (LIKELY(match.strstart >= match.orgstart)) {
+            if (LIKELY(match.strstart + match.match_length - 1 >= match.orgstart)) {
                functable.insert_string(s, match.strstart, match.match_length);
            } else {
                functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
            }
+        } else if (match.orgstart < match.strstart + match.match_length) {
+            functable.insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart);
        }
        match.strstart += match.match_length;
        match.match_length = 0;
-#endif
    } else {
        match.strstart += match.match_length;
        match.match_length = 0;
-        s->ins_h = s->window[match.strstart];
        if (match.strstart >= (MIN_MATCH - 2))
-#ifndef NOT_TWEAK_COMPILER
+#if MIN_MATCH != 3
            functable.insert_string(s, match.strstart + 2 - MIN_MATCH, MIN_MATCH - 2);
 #else
-            functable.insert_string(s, match.strstart + 2 - MIN_MATCH, 1);
-#if MIN_MATCH != 3
-#warning    Call insert_string() MIN_MATCH-3 more times
+            functable.quick_insert_string(s, match.strstart + 2 - MIN_MATCH);
 #endif
-#endif
-    /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
-     * matter since it will be recomputed at next deflate call.
-     */
+        /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
+         * matter since it will be recomputed at next deflate call.
+         */
    }
 }

 static void fizzle_matches(deflate_state *s, struct match *current, struct match *next) {
-    IPos limit;
+    Pos limit;
    unsigned char *match, *orig;
    int changed = 0;
    struct match c, n;
@ -143,36 +107,36 @@ static void fizzle_matches(deflate_state *s, struct match *current, struct match
    if (current->match_length <= 1)
        return;

-    if (unlikely(current->match_length > 1 + next->match_start))
+    if (UNLIKELY(current->match_length > 1 + next->match_start))
        return;

-    if (unlikely(current->match_length > 1 + next->strstart))
+    if (UNLIKELY(current->match_length > 1 + next->strstart))
        return;

    match = s->window - current->match_length + 1 + next->match_start;
    orig  = s->window - current->match_length + 1 + next->strstart;

    /* quick exit check.. if this fails then don't bother with anything else */
-    if (likely(*match != *orig))
+    if (LIKELY(*match != *orig))
        return;

    c = *current;
    n = *next;

    /* step one: try to move the "next" match to the left as much as possible */
-    limit = next->strstart > MAX_DIST2 ? next->strstart - MAX_DIST2 : 0;
+    limit = next->strstart > MAX_DIST(s) ? next->strstart - (Pos)MAX_DIST(s) : 0;

    match = s->window + n.match_start - 1;
    orig = s->window + n.strstart - 1;

    while (*match == *orig) {
-        if (c.match_length < 1)
+        if (UNLIKELY(c.match_length < 1))
            break;
-        if (n.strstart <= limit)
+        if (UNLIKELY(n.strstart <= limit))
            break;
-        if (n.match_length >= 256)
+        if (UNLIKELY(n.match_length >= 256))
            break;
-        if (n.match_start <= 1)
+        if (UNLIKELY(n.match_start <= 1))
            break;

        n.strstart--;
@ -196,15 +160,18 @@ static void fizzle_matches(deflate_state *s, struct match *current, struct match
    }
 }

-ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
-    struct match current_match, next_match;
+Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
+    /* Align the first struct to start on a new cacheline, this allows us to fit both structs in one cacheline */
+    ALIGNED_(16) struct match current_match;
+                 struct match next_match;

    memset(&current_match, 0, sizeof(struct match));
    memset(&next_match, 0, sizeof(struct match));

    for (;;) {
-        IPos hash_head = 0;   /* head of the hash chain */
-        int bflush;           /* set if current block must be flushed */
+        Pos hash_head = 0;    /* head of the hash chain */
+        int bflush = 0;       /* set if current block must be flushed */
+        int64_t dist;

        /* Make sure that we always have enough lookahead, except
         * at the end of the input file. We need MAX_MATCH bytes
@ -212,15 +179,14 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
         * string following the next current_match.
         */
        if (s->lookahead < MIN_LOOKAHEAD) {
-            functable.fill_window(s);
+            fill_window(s);
            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
                return need_more;
            }
-            if (s->lookahead == 0)
+            if (UNLIKELY(s->lookahead == 0))
                break; /* flush the current block */
            next_match.match_length = 0;
        }
-        s->prev_length = 2;

        /* Insert the string window[strstart .. strstart+2] in the
         * dictionary, and set hash_head to the head of the hash chain:
@ -230,63 +196,63 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
        if (next_match.match_length > 0) {
            current_match = next_match;
            next_match.match_length = 0;
-
        } else {
            hash_head = 0;
            if (s->lookahead >= MIN_MATCH) {
-                hash_head = functable.insert_string(s, s->strstart, 1);
+                hash_head = functable.quick_insert_string(s, s->strstart);
            }

-            /* set up the initial match to be a 1 byte literal */
-            current_match.match_start = 0;
-            current_match.match_length = 1;
-            current_match.strstart = s->strstart;
+            current_match.strstart = (uint16_t)s->strstart;
            current_match.orgstart = current_match.strstart;

            /* Find the longest match, discarding those <= prev_length.
             * At this point we have always match_length < MIN_MATCH
             */

-            if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST2) {
+            dist = (int64_t)s->strstart - hash_head;
+            if (dist <= MAX_DIST(s) && dist > 0) {
                /* To simplify the code, we prevent matches with the string
                 * of window index 0 (in particular we have to avoid a match
                 * of the string with itself at the start of the input file).
                 */
-                current_match.match_length = longest_match(s, hash_head);
-                current_match.match_start = s->match_start;
-                if (current_match.match_length < MIN_MATCH)
+                current_match.match_length = (uint16_t)functable.longest_match(s, hash_head);
+                current_match.match_start = (uint16_t)s->match_start;
+                if (UNLIKELY(current_match.match_length < MIN_MATCH))
                    current_match.match_length = 1;
-                if (current_match.match_start >= current_match.strstart) {
+                if (UNLIKELY(current_match.match_start >= current_match.strstart)) {
                    /* this can happen due to some restarts */
                    current_match.match_length = 1;
                }
+            } else {
+                /* Set up the match to be a 1 byte literal */
+                current_match.match_start = 0;
+                current_match.match_length = 1;
            }
        }

        insert_match(s, current_match);

        /* now, look ahead one */
-        if (s->lookahead > MIN_LOOKAHEAD && (current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD)) {
+        if (LIKELY(s->lookahead > MIN_LOOKAHEAD && (uint32_t)(current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD))) {
            s->strstart = current_match.strstart + current_match.match_length;
-            hash_head = functable.insert_string(s, s->strstart, 1);
+            hash_head = functable.quick_insert_string(s, s->strstart);

-            /* set up the initial match to be a 1 byte literal */
-            next_match.match_start = 0;
-            next_match.match_length = 1;
-            next_match.strstart = s->strstart;
+            next_match.strstart = (uint16_t)s->strstart;
            next_match.orgstart = next_match.strstart;

            /* Find the longest match, discarding those <= prev_length.
             * At this point we have always match_length < MIN_MATCH
             */
-            if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST2) {
+
+            dist = (int64_t)s->strstart - hash_head;
+            if (dist <= MAX_DIST(s) && dist > 0) {
                /* To simplify the code, we prevent matches with the string
                 * of window index 0 (in particular we have to avoid a match
                 * of the string with itself at the start of the input file).
                 */
-                next_match.match_length = longest_match(s, hash_head);
-                next_match.match_start = s->match_start;
-                if (next_match.match_start >= next_match.strstart) {
+                next_match.match_length = (uint16_t)functable.longest_match(s, hash_head);
+                next_match.match_start = (uint16_t)s->match_start;
+                if (UNLIKELY(next_match.match_start >= next_match.strstart)) {
                    /* this can happen due to some restarts */
                    next_match.match_length = 1;
                }
@ -294,13 +260,13 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
                    next_match.match_length = 1;
                else
                    fizzle_matches(s, &current_match, &next_match);
+            } else {
+                /* Set up the match to be a 1 byte literal */
+                next_match.match_start = 0;
+                next_match.match_length = 1;
            }

-            /* short matches with a very long distance are rarely a good idea encoding wise */
-            if (next_match.match_length == 3 && (next_match.strstart - next_match.match_start) > 12000)
-                    next_match.match_length = 1;
            s->strstart = current_match.strstart;
-
        } else {
            next_match.match_length = 0;
        }
@ -311,7 +277,7 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
        /* move the "cursor" forward */
        s->strstart += current_match.match_length;

-        if (bflush)
+        if (UNLIKELY(bflush))
            FLUSH_BLOCK(s, 0);
    }
    s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
@ -319,7 +285,7 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
        FLUSH_BLOCK(s, 1);
        return finish_done;
    }
-    if (s->sym_next)
+    if (UNLIKELY(s->sym_next))
        FLUSH_BLOCK(s, 0);

    return block_done;
--- a/libs/zlibng/deflate_p.h
+++ b/libs/zlibng/deflate_p.h
@ -12,39 +12,45 @@
 /* Forward declare common non-inlined functions declared in deflate.c */

 #ifdef ZLIB_DEBUG
-void check_match(deflate_state *s, IPos start, IPos match, int length);
+void check_match(deflate_state *s, Pos start, Pos match, int length);
 #else
 #define check_match(s, start, match, length)
 #endif
 void flush_pending(PREFIX3(stream) *strm);

 /* ===========================================================================
- * Insert string str in the dictionary and set match_head to the previous head
- * of the hash chain (the most recent string with same hash key). Return
- * the previous length of the hash chain.
- * IN  assertion: all calls to to INSERT_STRING are made with consecutive
- *    input characters and the first MIN_MATCH bytes of str are valid
- *    (except for the last MIN_MATCH-1 bytes of the input file).
+ * Save the match info and tally the frequency counts. Return true if
+ * the current block must be flushed.
 */

-static inline Pos insert_string_c(deflate_state *const s, const Pos str, unsigned int count) {
-    Pos ret = 0;
-    unsigned int idx;
+extern const unsigned char Z_INTERNAL zng_length_code[];
+extern const unsigned char Z_INTERNAL zng_dist_code[];

-    for (idx = 0; idx < count; idx++) {
-        UPDATE_HASH(s, s->ins_h, str+idx);
+static inline int zng_tr_tally_lit(deflate_state *s, unsigned char c) {
+    /* c is the unmatched char */
+    s->sym_buf[s->sym_next++] = 0;
+    s->sym_buf[s->sym_next++] = 0;
+    s->sym_buf[s->sym_next++] = c;
+    s->dyn_ltree[c].Freq++;
+    Tracevv((stderr, "%c", c));
+    Assert(c <= (MAX_MATCH-MIN_MATCH), "zng_tr_tally: bad literal");
+    return (s->sym_next == s->sym_end);
+}

-        Pos head = s->head[s->ins_h];
-        if (head != str+idx) {
-          s->prev[(str+idx) & s->w_mask] = head;
-          s->head[s->ins_h] = str+idx;
-          if (idx == count - 1)
-            ret = head;
-        } else if (idx == count - 1) {
-          ret = str + idx;
-        }
-    }
-    return ret;
+static inline int zng_tr_tally_dist(deflate_state *s, uint32_t dist, uint32_t len) {
+    /* dist: distance of matched string */
+    /* len: match length-MIN_MATCH */
+    s->sym_buf[s->sym_next++] = (uint8_t)(dist);
+    s->sym_buf[s->sym_next++] = (uint8_t)(dist >> 8);
+    s->sym_buf[s->sym_next++] = (uint8_t)len;
+    s->matches++;
+    dist--;
+    Assert(dist < MAX_DIST(s) && (uint16_t)d_code(dist) < (uint16_t)D_CODES, 
+        "zng_tr_tally: bad match");
+
+    s->dyn_ltree[zng_length_code[len]+LITERALS+1].Freq++;
+    s->dyn_dtree[d_code(dist)].Freq++;
+    return (s->sym_next == s->sym_end);
 }

 /* ===========================================================================
@ -52,14 +58,13 @@ static inline Pos insert_string_c(deflate_state *const s, const Pos str, unsigne
 * IN assertion: strstart is set to the end of the current match.
 */
 #define FLUSH_BLOCK_ONLY(s, last) { \
-    _tr_flush_block(s, (s->block_start >= 0L ? \
+    zng_tr_flush_block(s, (s->block_start >= 0 ? \
                   (char *)&s->window[(unsigned)s->block_start] : \
                   NULL), \
-                   (unsigned long)((long)s->strstart - s->block_start), \
+                   (uint32_t)((int)s->strstart - s->block_start), \
                   (last)); \
-    s->block_start = s->strstart; \
+    s->block_start = (int)s->strstart; \
    flush_pending(s->strm); \
-    Tracev((stderr, "[FLUSH]")); \
 }

 /* Same but force premature exit if necessary. */
--- a/libs/zlibng/deflate_quick.c
+++ b/libs/zlibng/deflate_quick.c
@ -0,0 +1,121 @@
+/*
+ * The deflate_quick deflate strategy, designed to be used when cycles are
+ * at a premium.
+ *
+ * Copyright (C) 2013 Intel Corporation. All rights reserved.
+ * Authors:
+ *  Wajdi Feghali   <wajdi.k.feghali@intel.com>
+ *  Jim Guilford    <james.guilford@intel.com>
+ *  Vinodh Gopal    <vinodh.gopal@intel.com>
+ *     Erdinc Ozturk   <erdinc.ozturk@intel.com>
+ *  Jim Kukunas     <james.t.kukunas@linux.intel.com>
+ *
+ * Portions are Copyright (C) 2016 12Sided Technology, LLC.
+ * Author:
+ *  Phil Vachon     <pvachon@12sidedtech.com>
+ *
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include "zbuild.h"
+#include "deflate.h"
+#include "deflate_p.h"
+#include "functable.h"
+#include "trees_emit.h"
+
+extern const ct_data static_ltree[L_CODES+2];
+extern const ct_data static_dtree[D_CODES];
+
+#define QUICK_START_BLOCK(s, last) { \
+    zng_tr_emit_tree(s, STATIC_TREES, last); \
+    s->block_open = 1 + (int)last; \
+    s->block_start = (int)s->strstart; \
+}
+
+#define QUICK_END_BLOCK(s, last) { \
+    if (s->block_open) { \
+        zng_tr_emit_end_block(s, static_ltree, last); \
+        s->block_open = 0; \
+        s->block_start = (int)s->strstart; \
+        flush_pending(s->strm); \
+        if (s->strm->avail_out == 0) \
+            return (last) ? finish_started : need_more; \
+    } \
+}
+
+Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) {
+    Pos hash_head;
+    int64_t dist;
+    unsigned match_len, last;
+
+
+    last = (flush == Z_FINISH) ? 1 : 0;
+    if (UNLIKELY(last && s->block_open != 2)) {
+        /* Emit end of previous block */
+        QUICK_END_BLOCK(s, 0);
+        /* Emit start of last block */
+        QUICK_START_BLOCK(s, last);
+    } else if (UNLIKELY(s->block_open == 0 && s->lookahead > 0)) {
+        /* Start new block only when we have lookahead data, so that if no
+           input data is given an empty block will not be written */
+        QUICK_START_BLOCK(s, last);
+    }
+
+    for (;;) {
+        if (UNLIKELY(s->pending + ((BIT_BUF_SIZE + 7) >> 3) >= s->pending_buf_size)) {
+            flush_pending(s->strm);
+            if (s->strm->avail_out == 0) {
+                return (last && s->strm->avail_in == 0) ? finish_started : need_more;
+            }
+        }
+
+        if (UNLIKELY(s->lookahead < MIN_LOOKAHEAD)) {
+            fill_window(s);
+            if (UNLIKELY(s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH)) {
+                return need_more;
+            }
+            if (UNLIKELY(s->lookahead == 0))
+                break;
+
+            if (UNLIKELY(s->block_open == 0)) {
+                /* Start new block when we have lookahead data, so that if no
+                   input data is given an empty block will not be written */
+                QUICK_START_BLOCK(s, last);
+            }
+        }
+
+        if (LIKELY(s->lookahead >= MIN_MATCH)) {
+            hash_head = functable.quick_insert_string(s, s->strstart);
+            dist = (int64_t)s->strstart - hash_head;
+
+            if (dist <= MAX_DIST(s) && dist > 0) {
+                match_len = functable.compare258(s->window + s->strstart, s->window + hash_head);
+
+                if (match_len >= MIN_MATCH) {
+                    if (UNLIKELY(match_len > s->lookahead))
+                        match_len = s->lookahead;
+
+                    check_match(s, s->strstart, hash_head, match_len);
+
+                    zng_tr_emit_dist(s, static_ltree, static_dtree, match_len - MIN_MATCH, (uint32_t)dist);
+                    s->lookahead -= match_len;
+                    s->strstart += match_len;
+                    continue;
+                }
+            }
+        }
+
+        zng_tr_emit_lit(s, static_ltree, s->window[s->strstart]);
+        s->strstart++;
+        s->lookahead--;
+    }
+
+    s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
+    if (UNLIKELY(last)) {
+        QUICK_END_BLOCK(s, 1);
+        return finish_done;
+    }
+
+    QUICK_END_BLOCK(s, 0);
+    return block_done;
+}
--- a/libs/zlibng/deflate_slow.c
+++ b/libs/zlibng/deflate_slow.c
@ -7,26 +7,18 @@
 #include "zbuild.h"
 #include "deflate.h"
 #include "deflate_p.h"
-#include "match_p.h"
 #include "functable.h"

-/* ===========================================================================
- * Local data
- */
-
-#ifndef TOO_FAR
-#  define TOO_FAR 4096
-#endif
-/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
-
 /* ===========================================================================
 * Same as deflate_medium, but achieves better compression. We use a lazy
 * evaluation for matches: a match is finally adopted only if there is
 * no better match at the next window position.
 */
-ZLIB_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
-    IPos hash_head;          /* head of hash chain */
+Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
+    Pos hash_head;           /* head of hash chain */
    int bflush;              /* set if current block must be flushed */
+    int64_t dist;
+    uint32_t match_len;

    /* Process the input block. */
    for (;;) {
@ -36,57 +28,53 @@ ZLIB_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
         * string following the next match.
         */
        if (s->lookahead < MIN_LOOKAHEAD) {
-            functable.fill_window(s);
-            if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
+            fill_window(s);
+            if (UNLIKELY(s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH)) {
                return need_more;
            }
-            if (s->lookahead == 0)
+            if (UNLIKELY(s->lookahead == 0))
                break; /* flush the current block */
        }

        /* Insert the string window[strstart .. strstart+2] in the
         * dictionary, and set hash_head to the head of the hash chain:
         */
-        hash_head = NIL;
-        if (s->lookahead >= MIN_MATCH) {
-            hash_head = functable.insert_string(s, s->strstart, 1);
+        hash_head = 0;
+        if (LIKELY(s->lookahead >= MIN_MATCH)) {
+            hash_head = functable.quick_insert_string(s, s->strstart);
        }

        /* Find the longest match, discarding those <= prev_length.
         */
-        s->prev_length = s->match_length, s->prev_match = s->match_start;
-        s->match_length = MIN_MATCH-1;
+        s->prev_match = (Pos)s->match_start;
+        match_len = MIN_MATCH-1;
+        dist = (int64_t)s->strstart - hash_head;

-        if (hash_head != NIL && s->prev_length < s->max_lazy_match && s->strstart - hash_head <= MAX_DIST(s)) {
+        if (dist <= MAX_DIST(s) && dist > 0 && s->prev_length < s->max_lazy_match) {
            /* To simplify the code, we prevent matches with the string
             * of window index 0 (in particular we have to avoid a match
             * of the string with itself at the start of the input file).
             */
-            s->match_length = longest_match(s, hash_head);
+            match_len = functable.longest_match(s, hash_head);
            /* longest_match() sets match_start */

-            if (s->match_length <= 5 && (s->strategy == Z_FILTERED
-#if TOO_FAR <= 32767
-                || (s->match_length == MIN_MATCH && s->strstart - s->match_start > TOO_FAR)
-#endif
-                )) {
-
+            if (match_len <= 5 && (s->strategy == Z_FILTERED)) {
                /* If prev_match is also MIN_MATCH, match_start is garbage
                 * but we will ignore the current match anyway.
                 */
-                s->match_length = MIN_MATCH-1;
+                match_len = MIN_MATCH-1;
            }
        }
        /* If there was a match at the previous step and the current
         * match is not better, output the previous match:
         */
-        if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
+        if (s->prev_length >= MIN_MATCH && match_len <= s->prev_length) {
            unsigned int max_insert = s->strstart + s->lookahead - MIN_MATCH;
            /* Do not insert strings in hash table beyond this. */

            check_match(s, s->strstart-1, s->prev_match, s->prev_length);

-            _tr_tally_dist(s, s->strstart -1 - s->prev_match, s->prev_length - MIN_MATCH, bflush);
+            bflush = zng_tr_tally_dist(s, s->strstart -1 - s->prev_match, s->prev_length - MIN_MATCH);

            /* Insert in hash table all strings up to the end of the match.
             * strstart-1 and strstart are already inserted. If there is not
@ -95,70 +83,55 @@ ZLIB_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
             */
            s->lookahead -= s->prev_length-1;

-#ifdef NOT_TWEAK_COMPILER
-            s->prev_length -= 2;
-            do {
-                if (++s->strstart <= max_insert) {
-                    functable.insert_string(s, s->strstart, 1);
-                }
-            } while (--s->prev_length != 0);
-            s->match_available = 0;
-            s->match_length = MIN_MATCH-1;
-            s->strstart++;
-#else
-            {
-                unsigned int mov_fwd = s->prev_length - 2;
-                if (max_insert > s->strstart) {
-                    unsigned int insert_cnt = mov_fwd;
-                    if (unlikely(insert_cnt > max_insert - s->strstart))
-                        insert_cnt = max_insert - s->strstart;
+            unsigned int mov_fwd = s->prev_length - 2;
+            if (max_insert > s->strstart) {
+                unsigned int insert_cnt = mov_fwd;
+                if (UNLIKELY(insert_cnt > max_insert - s->strstart))
+                    insert_cnt = max_insert - s->strstart;

-                    functable.insert_string(s, s->strstart + 1, insert_cnt);
-                }
-                s->prev_length = 0;
-                s->match_available = 0;
-                s->match_length = MIN_MATCH-1;
-                s->strstart += mov_fwd + 1;
+                functable.insert_string(s, s->strstart + 1, insert_cnt);
            }
-#endif /*NOT_TWEAK_COMPILER*/
+            s->prev_length = 0;
+            s->match_available = 0;
+            s->strstart += mov_fwd + 1;

-            if (bflush) FLUSH_BLOCK(s, 0);
+            if (UNLIKELY(bflush))
+                FLUSH_BLOCK(s, 0);

        } else if (s->match_available) {
            /* If there was no match at the previous position, output a
             * single literal. If there was a match but the current match
             * is longer, truncate the previous match to a single literal.
             */
-            Tracevv((stderr, "%c", s->window[s->strstart-1]));
-            _tr_tally_lit(s, s->window[s->strstart-1], bflush);
-            if (bflush) {
+            bflush = zng_tr_tally_lit(s, s->window[s->strstart-1]);
+            if (UNLIKELY(bflush))
                FLUSH_BLOCK_ONLY(s, 0);
-            }
+            s->prev_length = match_len;
            s->strstart++;
            s->lookahead--;
-            if (s->strm->avail_out == 0)
+            if (UNLIKELY(s->strm->avail_out == 0))
                return need_more;
        } else {
            /* There is no previous match to compare with, wait for
             * the next step to decide.
             */
+            s->prev_length = match_len;
            s->match_available = 1;
            s->strstart++;
            s->lookahead--;
        }
    }
    Assert(flush != Z_NO_FLUSH, "no flush?");
-    if (s->match_available) {
-        Tracevv((stderr, "%c", s->window[s->strstart-1]));
-        _tr_tally_lit(s, s->window[s->strstart-1], bflush);
+    if (UNLIKELY(s->match_available)) {
+        (void) zng_tr_tally_lit(s, s->window[s->strstart-1]);
        s->match_available = 0;
    }
    s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
-    if (flush == Z_FINISH) {
+    if (UNLIKELY(flush == Z_FINISH)) {
        FLUSH_BLOCK(s, 1);
        return finish_done;
    }
-    if (s->sym_next)
+    if (UNLIKELY(s->sym_next))
        FLUSH_BLOCK(s, 0);
    return block_done;
 }
--- a/libs/zlibng/fallback_builtins.h
+++ b/libs/zlibng/fallback_builtins.h
@ -0,0 +1,44 @@
+#ifndef X86_BUILTIN_CTZ_H
+#define X86_BUILTIN_CTZ_H
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) ||  defined(_M_ARM) || defined(_M_ARM64)
+
+#include <intrin.h>
+#ifdef X86_FEATURES
+#  include "arch/x86/x86.h"
+#endif
+
+/* This is not a general purpose replacement for __builtin_ctz. The function expects that value is != 0
+ * Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward is not checked
+ */
+static __forceinline unsigned long __builtin_ctz(uint32_t value) {
+#ifdef X86_FEATURES
+    if (x86_cpu_has_tzcnt)
+        return _tzcnt_u32(value);
+#endif
+    unsigned long trailing_zero;
+    _BitScanForward(&trailing_zero, value);
+    return trailing_zero;
+}
+#define HAVE_BUILTIN_CTZ
+
+#ifdef _M_AMD64
+/* This is not a general purpose replacement for __builtin_ctzll. The function expects that value is != 0
+ * Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward64 is not checked
+ */
+static __forceinline unsigned long long __builtin_ctzll(uint64_t value) {
+#ifdef X86_FEATURES
+    if (x86_cpu_has_tzcnt)
+        return _tzcnt_u64(value);
+#endif
+    unsigned long trailing_zero;
+    _BitScanForward64(&trailing_zero, value);
+    return trailing_zero;
+}
+#define HAVE_BUILTIN_CTZLL
+#endif
+
+#endif
+#endif
+#endif
--- a/libs/zlibng/functable.c
+++ b/libs/zlibng/functable.c
@ -4,40 +4,95 @@
 */

 #include "zbuild.h"
-#include "functable.h"
+#include "zendian.h"
 #include "deflate.h"
 #include "deflate_p.h"

-#include "gzendian.h"
+#include "functable.h"

-/* insert_string */
-#ifdef X86_SSE4_2_CRC_HASH
-extern Pos insert_string_sse(deflate_state *const s, const Pos str, unsigned int count);
-#elif defined(ARM_ACLE_CRC_HASH)
-extern Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count);
+#ifdef X86_FEATURES
+#  include "fallback_builtins.h"
 #endif

-/* fill_window */
+/* insert_string */
+extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count);
+#ifdef X86_SSE42_CRC_HASH
+extern void insert_string_sse4(deflate_state *const s, const uint32_t str, uint32_t count);
+#elif defined(ARM_ACLE_CRC_HASH)
+extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count);
+#endif
+
+/* quick_insert_string */
+extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str);
+#ifdef X86_SSE42_CRC_HASH
+extern Pos quick_insert_string_sse4(deflate_state *const s, const uint32_t str);
+#elif defined(ARM_ACLE_CRC_HASH)
+extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str);
+#endif
+
+/* slide_hash */
 #ifdef X86_SSE2
-extern void fill_window_sse(deflate_state *s);
-#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
-extern void fill_window_arm(deflate_state *s);
+void slide_hash_sse2(deflate_state *s);
+#elif defined(ARM_NEON_SLIDEHASH)
+void slide_hash_neon(deflate_state *s);
+#elif defined(POWER8_VSX_SLIDEHASH)
+void slide_hash_power8(deflate_state *s);
+#endif
+#ifdef X86_AVX2
+void slide_hash_avx2(deflate_state *s);
 #endif

 /* adler32 */
 extern uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len);
-#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(ARM_NEON_ADLER32)
+#ifdef ARM_NEON_ADLER32
 extern uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len);
 #endif
-
-ZLIB_INTERNAL uint32_t crc32_generic(uint32_t, const unsigned char *, uint64_t);
-
-#ifdef DYNAMIC_CRC_TABLE
-extern volatile int crc_table_empty;
-extern void make_crc_table(void);
+#ifdef X86_SSSE3_ADLER32
+extern uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, size_t len);
+#endif
+#ifdef X86_AVX2_ADLER32
+extern uint32_t adler32_avx2(uint32_t adler, const unsigned char *buf, size_t len);
+#endif
+#ifdef POWER8_VSX_ADLER32
+extern uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, size_t len);
 #endif

-#ifdef __ARM_FEATURE_CRC32
+/* memory chunking */
+extern uint32_t chunksize_c(void);
+extern uint8_t* chunkcopy_c(uint8_t *out, uint8_t const *from, unsigned len);
+extern uint8_t* chunkcopy_safe_c(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
+extern uint8_t* chunkunroll_c(uint8_t *out, unsigned *dist, unsigned *len);
+extern uint8_t* chunkmemset_c(uint8_t *out, unsigned dist, unsigned len);
+extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left);
+#ifdef X86_SSE2_CHUNKSET
+extern uint32_t chunksize_sse2(void);
+extern uint8_t* chunkcopy_sse2(uint8_t *out, uint8_t const *from, unsigned len);
+extern uint8_t* chunkcopy_safe_sse2(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
+extern uint8_t* chunkunroll_sse2(uint8_t *out, unsigned *dist, unsigned *len);
+extern uint8_t* chunkmemset_sse2(uint8_t *out, unsigned dist, unsigned len);
+extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
+#endif
+#ifdef X86_AVX_CHUNKSET
+extern uint32_t chunksize_avx(void);
+extern uint8_t* chunkcopy_avx(uint8_t *out, uint8_t const *from, unsigned len);
+extern uint8_t* chunkcopy_safe_avx(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
+extern uint8_t* chunkunroll_avx(uint8_t *out, unsigned *dist, unsigned *len);
+extern uint8_t* chunkmemset_avx(uint8_t *out, unsigned dist, unsigned len);
+extern uint8_t* chunkmemset_safe_avx(uint8_t *out, unsigned dist, unsigned len, unsigned left);
+#endif
+#ifdef ARM_NEON_CHUNKSET
+extern uint32_t chunksize_neon(void);
+extern uint8_t* chunkcopy_neon(uint8_t *out, uint8_t const *from, unsigned len);
+extern uint8_t* chunkcopy_safe_neon(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
+extern uint8_t* chunkunroll_neon(uint8_t *out, unsigned *dist, unsigned *len);
+extern uint8_t* chunkmemset_neon(uint8_t *out, unsigned dist, unsigned len);
+extern uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left);
+#endif
+
+/* CRC32 */
+Z_INTERNAL uint32_t crc32_generic(uint32_t, const unsigned char *, uint64_t);
+
+#ifdef ARM_ACLE_CRC_HASH
 extern uint32_t crc32_acle(uint32_t, const unsigned char *, uint64_t);
 #endif

@ -47,87 +102,365 @@ extern uint32_t crc32_little(uint32_t, const unsigned char *, uint64_t);
 extern uint32_t crc32_big(uint32_t, const unsigned char *, uint64_t);
 #endif

-/* stub definitions */
-ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsigned int count);
-ZLIB_INTERNAL void fill_window_stub(deflate_state *s);
-ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len);
-ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len);
+/* compare258 */
+extern uint32_t compare258_c(const unsigned char *src0, const unsigned char *src1);
+#ifdef UNALIGNED_OK
+extern uint32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char *src1);
+extern uint32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char *src1);
+#ifdef UNALIGNED64_OK
+extern uint32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char *src1);
+#endif
+#ifdef X86_SSE42_CMP_STR
+extern uint32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1);
+#endif
+#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
+extern uint32_t compare258_unaligned_avx2(const unsigned char *src0, const unsigned char *src1);
+#endif
+#endif

-/* functable init */
-ZLIB_INTERNAL __thread struct functable_s functable = {fill_window_stub,insert_string_stub,adler32_stub,crc32_stub};
+/* longest_match */
+extern uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
+#ifdef UNALIGNED_OK
+extern uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
+extern uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match);
+#ifdef UNALIGNED64_OK
+extern uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
+#endif
+#ifdef X86_SSE42_CMP_STR
+extern uint32_t longest_match_unaligned_sse4(deflate_state *const s, Pos cur_match);
+#endif
+#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
+extern uint32_t longest_match_unaligned_avx2(deflate_state *const s, Pos cur_match);
+#endif
+#endif

+Z_INTERNAL Z_TLS struct functable_s functable;
+
+Z_INTERNAL void cpu_check_features(void)
+{
+    static int features_checked = 0;
+    if (features_checked)
+        return;
+#if defined(X86_FEATURES)
+    x86_check_features();
+#elif defined(ARM_FEATURES)
+    arm_check_features();
+#elif defined(POWER_FEATURES)
+    power_check_features();
+#endif
+    features_checked = 1;
+}

 /* stub functions */
-ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsigned int count) {
+Z_INTERNAL void insert_string_stub(deflate_state *const s, const uint32_t str, uint32_t count) {
    // Initialize default
-    functable.insert_string=&insert_string_c;

-    #ifdef X86_SSE4_2_CRC_HASH
+    functable.insert_string = &insert_string_c;
+    cpu_check_features();
+
+#ifdef X86_SSE42_CRC_HASH
    if (x86_cpu_has_sse42)
-        functable.insert_string=&insert_string_sse;
-    #elif defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH)
+        functable.insert_string = &insert_string_sse4;
+#elif defined(ARM_ACLE_CRC_HASH)
    if (arm_cpu_has_crc32)
-        functable.insert_string=&insert_string_acle;
-    #endif
+        functable.insert_string = &insert_string_acle;
+#endif

-    return functable.insert_string(s, str, count);
+    functable.insert_string(s, str, count);
 }

-ZLIB_INTERNAL void fill_window_stub(deflate_state *s) {
-    // Initialize default
-    functable.fill_window=&fill_window_c;
+Z_INTERNAL Pos quick_insert_string_stub(deflate_state *const s, const uint32_t str) {
+    functable.quick_insert_string = &quick_insert_string_c;

-    #ifdef X86_SSE2
-    # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
+#ifdef X86_SSE42_CRC_HASH
+    if (x86_cpu_has_sse42)
+        functable.quick_insert_string = &quick_insert_string_sse4;
+#elif defined(ARM_ACLE_CRC_HASH)
+    if (arm_cpu_has_crc32)
+        functable.quick_insert_string = &quick_insert_string_acle;
+#endif
+
+    return functable.quick_insert_string(s, str);
+}
+
+Z_INTERNAL void slide_hash_stub(deflate_state *s) {
+
+    functable.slide_hash = &slide_hash_c;
+    cpu_check_features();
+
+#ifdef X86_SSE2
+#  if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
    if (x86_cpu_has_sse2)
-    # endif
-        functable.fill_window=&fill_window_sse;
-    #elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
-        functable.fill_window=&fill_window_arm;
-    #endif
+#  endif
+        functable.slide_hash = &slide_hash_sse2;
+#elif defined(ARM_NEON_SLIDEHASH)
+#  ifndef ARM_NOCHECK_NEON
+    if (arm_cpu_has_neon)
+#  endif
+        functable.slide_hash = &slide_hash_neon;
+#endif
+#ifdef X86_AVX2
+    if (x86_cpu_has_avx2)
+        functable.slide_hash = &slide_hash_avx2;
+#endif
+#ifdef POWER8_VSX_SLIDEHASH
+    if (power_cpu_has_arch_2_07)
+        functable.slide_hash = &slide_hash_power8;
+#endif

-    functable.fill_window(s);
+    functable.slide_hash(s);
 }

-ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len) {
+Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len) {
    // Initialize default
-    functable.adler32=&adler32_c;
+    functable.adler32 = &adler32_c;
+    cpu_check_features();

-    #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(ARM_NEON_ADLER32)
+#ifdef ARM_NEON_ADLER32
+#  ifndef ARM_NOCHECK_NEON
    if (arm_cpu_has_neon)
-        functable.adler32=&adler32_neon;
-    #endif
+#  endif
+        functable.adler32 = &adler32_neon;
+#endif
+#ifdef X86_SSSE3_ADLER32
+    if (x86_cpu_has_ssse3)
+        functable.adler32 = &adler32_ssse3;
+#endif
+#ifdef X86_AVX2_ADLER32
+    if (x86_cpu_has_avx2)
+        functable.adler32 = &adler32_avx2;
+#endif
+#ifdef POWER8_VSX_ADLER32
+    if (power_cpu_has_arch_2_07)
+        functable.adler32 = &adler32_power8;
+#endif

    return functable.adler32(adler, buf, len);
 }

-ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len) {
+Z_INTERNAL uint32_t chunksize_stub(void) {
+    // Initialize default
+    functable.chunksize = &chunksize_c;

+#ifdef X86_SSE2_CHUNKSET
+# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
+    if (x86_cpu_has_sse2)
+# endif
+        functable.chunksize = &chunksize_sse2;
+#endif
+#ifdef X86_AVX_CHUNKSET
+    if (x86_cpu_has_avx2)
+        functable.chunksize = &chunksize_avx;
+#endif
+#ifdef ARM_NEON_CHUNKSET
+    if (arm_cpu_has_neon)
+        functable.chunksize = &chunksize_neon;
+#endif

-   Assert(sizeof(uint64_t) >= sizeof(size_t),
-          "crc32_z takes size_t but internally we have a uint64_t len");
-/* return a function pointer for optimized arches here after a capability test */
+    return functable.chunksize();
+}

-#ifdef DYNAMIC_CRC_TABLE
-    if (crc_table_empty)
-        make_crc_table();
-#endif /* DYNAMIC_CRC_TABLE */
+Z_INTERNAL uint8_t* chunkcopy_stub(uint8_t *out, uint8_t const *from, unsigned len) {
+    // Initialize default
+    functable.chunkcopy = &chunkcopy_c;

-    if (sizeof(void *) == sizeof(ptrdiff_t)) {
+#ifdef X86_SSE2_CHUNKSET
+# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
+    if (x86_cpu_has_sse2)
+# endif
+        functable.chunkcopy = &chunkcopy_sse2;
+#endif
+#ifdef X86_AVX_CHUNKSET
+    if (x86_cpu_has_avx2)
+        functable.chunkcopy = &chunkcopy_avx;
+#endif
+#ifdef ARM_NEON_CHUNKSET
+    if (arm_cpu_has_neon)
+        functable.chunkcopy = &chunkcopy_neon;
+#endif
+
+    return functable.chunkcopy(out, from, len);
+}
+
+Z_INTERNAL uint8_t* chunkcopy_safe_stub(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe) {
+    // Initialize default
+    functable.chunkcopy_safe = &chunkcopy_safe_c;
+
+#ifdef X86_SSE2_CHUNKSET
+# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
+    if (x86_cpu_has_sse2)
+# endif
+        functable.chunkcopy_safe = &chunkcopy_safe_sse2;
+#endif
+#ifdef X86_AVX_CHUNKSET
+    if (x86_cpu_has_avx2)
+        functable.chunkcopy_safe = &chunkcopy_safe_avx;
+#endif
+#ifdef ARM_NEON_CHUNKSET
+    if (arm_cpu_has_neon)
+        functable.chunkcopy_safe = &chunkcopy_safe_neon;
+#endif
+
+    return functable.chunkcopy_safe(out, from, len, safe);
+}
+
+Z_INTERNAL uint8_t* chunkunroll_stub(uint8_t *out, unsigned *dist, unsigned *len) {
+    // Initialize default
+    functable.chunkunroll = &chunkunroll_c;
+
+#ifdef X86_SSE2_CHUNKSET
+# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
+    if (x86_cpu_has_sse2)
+# endif
+        functable.chunkunroll = &chunkunroll_sse2;
+#endif
+#ifdef X86_AVX_CHUNKSET
+    if (x86_cpu_has_avx2)
+        functable.chunkunroll = &chunkunroll_avx;
+#endif
+#ifdef ARM_NEON_CHUNKSET
+    if (arm_cpu_has_neon)
+        functable.chunkunroll = &chunkunroll_neon;
+#endif
+
+    return functable.chunkunroll(out, dist, len);
+}
+
+Z_INTERNAL uint8_t* chunkmemset_stub(uint8_t *out, unsigned dist, unsigned len) {
+    // Initialize default
+    functable.chunkmemset = &chunkmemset_c;
+
+#ifdef X86_SSE2_CHUNKSET
+# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
+    if (x86_cpu_has_sse2)
+# endif
+        functable.chunkmemset = &chunkmemset_sse2;
+#endif
+#ifdef X86_AVX_CHUNKSET
+    if (x86_cpu_has_avx2)
+        functable.chunkmemset = &chunkmemset_avx;
+#endif
+#ifdef ARM_NEON_CHUNKSET
+    if (arm_cpu_has_neon)
+        functable.chunkmemset = &chunkmemset_neon;
+#endif
+
+    return functable.chunkmemset(out, dist, len);
+}
+
+Z_INTERNAL uint8_t* chunkmemset_safe_stub(uint8_t *out, unsigned dist, unsigned len, unsigned left) {
+    // Initialize default
+    functable.chunkmemset_safe = &chunkmemset_safe_c;
+
+#ifdef X86_SSE2_CHUNKSET
+# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
+    if (x86_cpu_has_sse2)
+# endif
+        functable.chunkmemset_safe = &chunkmemset_safe_sse2;
+#endif
+#ifdef X86_AVX_CHUNKSET
+    if (x86_cpu_has_avx2)
+        functable.chunkmemset_safe = &chunkmemset_safe_avx;
+#endif
+#ifdef ARM_NEON_CHUNKSET
+    if (arm_cpu_has_neon)
+        functable.chunkmemset_safe = &chunkmemset_safe_neon;
+#endif
+
+    return functable.chunkmemset_safe(out, dist, len, left);
+}
+
+Z_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len) {
+    int32_t use_byfour = sizeof(void *) == sizeof(ptrdiff_t);
+
+    Assert(sizeof(uint64_t) >= sizeof(size_t),
+           "crc32_z takes size_t but internally we have a uint64_t len");
+    /* return a function pointer for optimized arches here after a capability test */
+
+    cpu_check_features();
+
+    if (use_byfour) {
 #if BYTE_ORDER == LITTLE_ENDIAN
-      functable.crc32=crc32_little;
-#  if defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH)
-      if (arm_cpu_has_crc32)
-        functable.crc32=crc32_acle;
+        functable.crc32 = crc32_little;
+#  if defined(ARM_ACLE_CRC_HASH)
+        if (arm_cpu_has_crc32)
+            functable.crc32 = crc32_acle;
 #  endif
 #elif BYTE_ORDER == BIG_ENDIAN
-        functable.crc32=crc32_big;
+        functable.crc32 = crc32_big;
 #else
 #  error No endian defined
 #endif
    } else {
-        functable.crc32=crc32_generic;
+        functable.crc32 = crc32_generic;
    }

    return functable.crc32(crc, buf, len);
 }
+
+Z_INTERNAL uint32_t compare258_stub(const unsigned char *src0, const unsigned char *src1) {
+
+    functable.compare258 = &compare258_c;
+
+#ifdef UNALIGNED_OK
+#  if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+    functable.compare258 = &compare258_unaligned_64;
+#  elif defined(HAVE_BUILTIN_CTZ)
+    functable.compare258 = &compare258_unaligned_32;
+#  else
+    functable.compare258 = &compare258_unaligned_16;
+#  endif
+#  ifdef X86_SSE42_CMP_STR
+    if (x86_cpu_has_sse42)
+        functable.compare258 = &compare258_unaligned_sse4;
+#  endif
+#  if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
+    if (x86_cpu_has_avx2)
+        functable.compare258 = &compare258_unaligned_avx2;
+#  endif
+#endif
+
+    return functable.compare258(src0, src1);
+}
+
+Z_INTERNAL uint32_t longest_match_stub(deflate_state *const s, Pos cur_match) {
+
+    functable.longest_match = &longest_match_c;
+
+#ifdef UNALIGNED_OK
+#  if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
+    functable.longest_match = &longest_match_unaligned_64;
+#  elif defined(HAVE_BUILTIN_CTZ)
+    functable.longest_match = &longest_match_unaligned_32;
+#  else
+    functable.longest_match = &longest_match_unaligned_16;
+#  endif
+#  ifdef X86_SSE42_CMP_STR
+    if (x86_cpu_has_sse42)
+        functable.longest_match = &longest_match_unaligned_sse4;
+#  endif
+#  if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
+    if (x86_cpu_has_avx2)
+        functable.longest_match = &longest_match_unaligned_avx2;
+#  endif
+#endif
+
+    return functable.longest_match(s, cur_match);
+}
+
+/* functable init */
+Z_INTERNAL Z_TLS struct functable_s functable = {
+    insert_string_stub,
+    quick_insert_string_stub,
+    adler32_stub,
+    crc32_stub,
+    slide_hash_stub,
+    compare258_stub,
+    longest_match_stub,
+    chunksize_stub,
+    chunkcopy_stub,
+    chunkcopy_safe_stub,
+    chunkunroll_stub,
+    chunkmemset_stub,
+    chunkmemset_safe_stub
+};
--- a/libs/zlibng/functable.h
+++ b/libs/zlibng/functable.h
@ -9,13 +9,21 @@
 #include "deflate.h"

 struct functable_s {
-    void     (* fill_window)    (deflate_state *s);
-    Pos      (* insert_string)  (deflate_state *const s, const Pos str, unsigned int count);
-    uint32_t (* adler32)        (uint32_t adler, const unsigned char *buf, size_t len);
-    uint32_t (* crc32)          (uint32_t crc, const unsigned char *buf, uint64_t len);
+    void     (* insert_string)      (deflate_state *const s, const uint32_t str, uint32_t count);
+    Pos      (* quick_insert_string)(deflate_state *const s, const uint32_t str);
+    uint32_t (* adler32)            (uint32_t adler, const unsigned char *buf, size_t len);
+    uint32_t (* crc32)              (uint32_t crc, const unsigned char *buf, uint64_t len);
+    void     (* slide_hash)         (deflate_state *s);
+    uint32_t (* compare258)         (const unsigned char *src0, const unsigned char *src1);
+    uint32_t (* longest_match)      (deflate_state *const s, Pos cur_match);
+    uint32_t (* chunksize)          (void);
+    uint8_t* (* chunkcopy)          (uint8_t *out, uint8_t const *from, unsigned len);
+    uint8_t* (* chunkcopy_safe)     (uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
+    uint8_t* (* chunkunroll)        (uint8_t *out, unsigned *dist, unsigned *len);
+    uint8_t* (* chunkmemset)        (uint8_t *out, unsigned dist, unsigned len);
+    uint8_t* (* chunkmemset_safe)   (uint8_t *out, unsigned dist, unsigned len, unsigned left);
 };

-ZLIB_INTERNAL extern __thread struct functable_s functable;
-
+Z_INTERNAL extern Z_TLS struct functable_s functable;

 #endif
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				disable=SC2140,SC2086,SC2046,SC2015,SC1097,SC1035,SC1036,SC1007,SC2154,SC2155,SC2000,SC2034,SC2016,SC1091,SC1090,SC2212,SC2143,SC2129,SC2102,SC2069,SC1041,SC1042,SC1044,SC1046,SC1119,SC1110,SC1111,SC1112,SC1102,SC1105,SC1101,SC1004,SC1003,SC1012,SC2068,SC2065,SC2064,SC2063,SC2059,SC2053,SC2048,SC2044,SC2032,SC2031,SC2030,SC2029,SC2025,SC2024,SC2022,SC2018,SC2019,SC2017,SC2014,SC2013,SC2012,SC2009,SC2001,SC2098,SC2096,SC2094,SC2091,SC2092,SC2088,SC2087,SC2076,SC2072,SC2071,SC2223,SC2221,SC2222,SC2217,SC2207,SC2206,SC2205,SC2190,SC2188,SC2187,SC2185,SC2179,SC2178,SC2174,SC2168,SC2167,SC2163,SC2161,SC2160,SC2153,SC2150,SC2148,SC2147,SC2146,SC2142,SC2139,SC2126,SC2123,SC2120,SC2119,SC2117,SC2114,SC1117,SC2164,SC1083,SC2004,SC2125,SC2128,SC2011,SC1008,SC1019,SC2093,SC1132,SC1129,SC2236,SC2237,SC2231,SC2230,SC2229,SC2106,SC2102,SC2243,SC2244,SC2245,SC2247,SC2248,SC2249,SC2250,SC2251,SC2252,SC2181