[Library] Update zlibng (#1255)

* Update zlibng

* Set cmake path more directly in zlibng to hopefully fix an issue with the build on drone

* I'm dumb, missing / in path

* Mackal helps with a dumb gitignore issue

* Adding all the files, not sure what's ignoring them and im tired of looking

* Some tweaks to zlibng build to hopefully get it to build properly. works on msvc now
This commit is contained in:
Alex 2021-02-23 17:00:26 -08:00 committed by GitHub
parent e6dee96266
commit 2957f5084d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
184 changed files with 22029 additions and 11703 deletions

View File

@ -252,6 +252,7 @@ IF(ZLIB_FOUND)
SET(ZLIB_LIBRARY_TYPE "zlib-ng")
SET(ZLIB_LIBRARY_LIBS "zlibstatic")
SET(ZLIB_LIBRARY_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/libs/zlibng")
INCLUDE_DIRECTORIES(SYSTEM "${CMAKE_CURRENT_BINARY_DIR}/libs/zlibng")
ELSE()
SET(ZLIB_LIBRARY_TYPE " zlib")
SET(ZLIB_LIBRARY_LIBS ${ZLIB_LIBRARY})

View File

@ -0,0 +1,39 @@
name: CI Static Analysis
on: [push, pull_request]
jobs:
GCC-10:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Install packages (Ubuntu)
run: |
sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y
sudo apt-get update
sudo apt-get install -y gcc-10
- name: Generate project files
run: |
cmake . -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DWITH_FUZZERS=OFF -DWITH_CODE_COVERAGE=OFF -DWITH_MAINTAINER_WARNINGS=OFF
env:
CC: gcc-10
CFLAGS: "-fanalyzer -Werror -Wanalyzer-double-fclose -Wanalyzer-double-free -Wanalyzer-exposure-through-output-file -Wanalyzer-file-leak -Wanalyzer-free-of-non-heap -Wanalyzer-malloc-leak -Wanalyzer-null-argument -Wanalyzer-null-dereference -Wanalyzer-possible-null-argument -Wanalyzer-possible-null-dereference -Wanalyzer-stale-setjmp-buffer -Wanalyzer-tainted-array-index -Wanalyzer-unsafe-call-within-signal-handler -Wanalyzer-use-after-free -Wanalyzer-use-of-pointer-in-stale-stack-frame"
CI: true
- name: Compile source code
run: |
cmake --build . --config Release > /dev/null
Clang-12:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Install packages (Ubuntu)
run: |
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
sudo apt-add-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic main" -y
sudo apt install clang-tools-12 -y
- name: Generate project files
run: |
scan-build-12 --status-bugs cmake . -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DWITH_FUZZERS=OFF -DWITH_CODE_COVERAGE=OFF -DWITH_MAINTAINER_WARNINGS=OFF
env:
CI: true
- name: Compile source code
run: |
scan-build-12 --status-bugs cmake --build . --config Release > /dev/null

381
libs/zlibng/.github/workflows/cmake.yml vendored Normal file
View File

@ -0,0 +1,381 @@
name: CI CMake
on: [push, pull_request]
jobs:
ci-cmake:
name: ${{ matrix.name }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
- name: Ubuntu GCC
os: ubuntu-latest
compiler: gcc
cmake-args: -DWITH_SANITIZER=Address
codecov: ubuntu_gcc
- name: Ubuntu GCC OSB -O1 No Unaligned64
os: ubuntu-latest
compiler: gcc
cmake-args: -DWITH_UNALIGNED=ON -DUNALIGNED64_OK=OFF -DWITH_SANITIZER=Undefined
build-dir: ../build
build-src-dir: ../zlib-ng
codecov: ubuntu_gcc_osb
cflags: -O1 -g3
- name: Ubuntu GCC -O3 No Unaligned
os: ubuntu-latest
compiler: gcc
cmake-args: -DWITH_UNALIGNED=OFF
codecov: ubuntu_gcc_o3
cflags: -O3
- name: Ubuntu GCC Link Zlib
os: ubuntu-latest
compiler: gcc
cmake-args: -DZLIB_DUAL_LINK=ON
- name: Ubuntu GCC No AVX2
os: ubuntu-latest
compiler: gcc
cmake-args: -DWITH_AVX2=OFF -DWITH_SANITIZER=Undefined
codecov: ubuntu_gcc_no_avx2
- name: Ubuntu GCC No SSE2
os: ubuntu-latest
compiler: gcc
cmake-args: -DWITH_SSE2=OFF -DWITH_SANITIZER=Undefined
codecov: ubuntu_gcc_no_sse2
- name: Ubuntu GCC No SSE4
os: ubuntu-latest
compiler: gcc
cmake-args: -DWITH_SSE4=OFF -DWITH_SANITIZER=Undefined
codecov: ubuntu_gcc_no_sse4
- name: Ubuntu GCC No PCLMULQDQ
os: ubuntu-latest
compiler: gcc
cmake-args: -DWITH_PCLMULQDQ=OFF -DWITH_SANITIZER=Undefined
codecov: ubuntu_gcc_no_pclmulqdq
- name: Ubuntu GCC Compat No Opt
os: ubuntu-latest
compiler: gcc
cmake-args: -DZLIB_COMPAT=ON -DWITH_NEW_STRATEGIES=OFF -DWITH_OPTIM=OFF -DWITH_SANITIZER=Address
codecov: ubuntu_gcc_compat_no_opt
cflags: -DNOT_TWEAK_COMPILER
- name: Ubuntu GCC ARM SF
os: ubuntu-latest
compiler: arm-linux-gnueabi-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi -DWITH_SANITIZER=Address
packages: qemu gcc-arm-linux-gnueabi libc-dev-armel-cross
codecov: ubuntu_gcc_armsf
- name: Ubuntu GCC ARM SF Compat No Opt
os: ubuntu-latest
compiler: arm-linux-gnueabi-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi -DZLIB_COMPAT=ON -DWITH_NEW_STRATEGIES=OFF -DWITH_OPTIM=OFF -DWITH_SANITIZER=Undefined
packages: qemu gcc-arm-linux-gnueabi libc-dev-armel-cross
codecov: ubuntu_gcc_armsf_compat_no_opt
- name: Ubuntu GCC ARM HF
os: ubuntu-latest
compiler: arm-linux-gnueabihf-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf -DWITH_SANITIZER=Address
packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
codecov: ubuntu_gcc_armhf
- name: Ubuntu GCC ARM HF No ACLE
os: ubuntu-latest
compiler: arm-linux-gnueabihf-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf -DWITH_ACLE=OFF -DWITH_SANITIZER=Address
packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
codecov: ubuntu_gcc_armhf_no_acle
- name: Ubuntu GCC ARM HF No NEON
os: ubuntu-latest
compiler: arm-linux-gnueabihf-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf -DWITH_NEON=OFF -DWITH_SANITIZER=Address
packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
codecov: ubuntu_gcc_armhf_no_neon
- name: Ubuntu GCC ARM HF Compat No Opt
os: ubuntu-latest
compiler: arm-linux-gnueabihf-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf -DZLIB_COMPAT=ON -DWITH_NEW_STRATEGIES=OFF -DWITH_OPTIM=OFF -DWITH_SANITIZER=Undefined
packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
codecov: ubuntu_gcc_armhf_compat_no_opt
- name: Ubuntu GCC AARCH64
os: ubuntu-latest
compiler: aarch64-linux-gnu-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake -DWITH_SANITIZER=Address
asan-options: detect_leaks=0
packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
codecov: ubuntu_gcc_aarch64
- name: Ubuntu GCC AARCH64 No ACLE
os: ubuntu-latest
compiler: aarch64-linux-gnu-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake -DWITH_ACLE=OFF -DWITH_SANITIZER=Undefined
asan-options: detect_leaks=0
packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
codecov: ubuntu_gcc_aarch64_no_acle
- name: Ubuntu GCC AARCH64 No NEON
os: ubuntu-latest
compiler: aarch64-linux-gnu-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake -DWITH_NEON=OFF -DWITH_SANITIZER=Undefined
asan-options: detect_leaks=0
packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
codecov: ubuntu_gcc_aarch64_no_neon
- name: Ubuntu GCC AARCH64 Compat No Opt
os: ubuntu-latest
compiler: aarch64-linux-gnu-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake -DZLIB_COMPAT=ON -DWITH_NEW_STRATEGIES=OFF -DWITH_OPTIM=OFF -DWITH_SANITIZER=Undefined
asan-options: detect_leaks=0
packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
codecov: ubuntu_gcc_aarch64_compat_no_opt
- name: Ubuntu GCC PPC
os: ubuntu-latest
compiler: powerpc-linux-gnu-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-powerpc.cmake
packages: qemu gcc-powerpc-linux-gnu libc-dev-powerpc-cross
ldflags: -static
codecov: ubuntu_gcc_ppc
- name: Ubuntu GCC PPC64
os: ubuntu-latest
compiler: powerpc64-linux-gnu-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-powerpc64.cmake
packages: qemu gcc-powerpc64-linux-gnu libc-dev-ppc64-cross
ldflags: -static
codecov: ubuntu_gcc_ppc64
- name: Ubuntu GCC PPC64LE
os: ubuntu-latest
compiler: powerpc64le-linux-gnu-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-powerpc64le.cmake
packages: qemu gcc-powerpc64le-linux-gnu libc-dev-ppc64el-cross
codecov: ubuntu_gcc_ppc64le
- name: Ubuntu GCC SPARC64
os: ubuntu-latest
compiler: sparc64-linux-gnu-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-sparc64.cmake
packages: qemu gcc-sparc64-linux-gnu libc-dev-sparc64-cross
ldflags: -static
codecov: ubuntu_gcc_sparc64
- name: Ubuntu GCC S390X
os: ubuntu-latest
compiler: s390x-linux-gnu-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-s390x.cmake -DWITH_SANITIZER=Address
packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
ldflags: -static
codecov: ubuntu_gcc_s390x
- name: Ubuntu GCC S390X DFLTCC
os: ubuntu-latest
compiler: s390x-linux-gnu-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-s390x.cmake -DWITH_DFLTCC_DEFLATE=ON -DWITH_DFLTCC_INFLATE=ON -DWITH_SANITIZER=Address
packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
ldflags: -static
codecov: ubuntu_gcc_s390x
- name: Ubuntu GCC S390X DFLTCC Compat
os: ubuntu-latest
compiler: s390x-linux-gnu-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-s390x.cmake -DZLIB_COMPAT=ON -DWITH_DFLTCC_DEFLATE=ON -DWITH_DFLTCC_INFLATE=ON -DWITH_SANITIZER=Undefined
packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
ldflags: -static
codecov: ubuntu_gcc_s390x
- name: Ubuntu MinGW i686
os: ubuntu-latest
compiler: i686-w64-mingw32-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-mingw-i686.cmake
packages: wine32 gcc-mingw-w64
# Codecov disabled due to gcov locking issue error
- name: Ubuntu MinGW x86_64
os: ubuntu-latest
compiler: x86_64-w64-mingw32-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-mingw-x86_64.cmake
packages: wine-stable gcc-mingw-w64
codecov: ubuntu_gcc_mingw_x86_64
- name: Ubuntu Clang
os: ubuntu-latest
compiler: clang
packages: llvm-6.0
gcov-exec: llvm-cov-6.0 gcov
codecov: ubuntu_clang
- name: Ubuntu Clang Inflate Strict
os: ubuntu-latest
compiler: clang
cmake-args: -DWITH_INFLATE_STRICT=ON
packages: llvm-6.0
gcov-exec: llvm-cov-6.0 gcov
codecov: ubuntu_clang_inflate_strict
- name: Ubuntu Clang Inflate Allow Invalid Dist
os: ubuntu-latest
compiler: clang
cmake-args: -DWITH_INFLATE_ALLOW_INVALID_DIST=ON
packages: llvm-6.0
gcov-exec: llvm-cov-6.0 gcov
codecov: ubuntu_clang_inflate_allow_invalid_dist
- name: Ubuntu Clang Memory Map
os: ubuntu-latest
compiler: clang
cflags: -DUSE_MMAP
packages: llvm-6.0
gcov-exec: llvm-cov-6.0 gcov
codecov: ubuntu_clang_mmap
- name: Ubuntu Clang Debug
os: ubuntu-latest
compiler: clang
packages: llvm-6.0
gcov-exec: llvm-cov-6.0 gcov
codecov: ubuntu_clang_debug
build-config: Debug
- name: Ubuntu Clang MSAN
os: ubuntu-latest
compiler: clang
cmake-args: -GNinja -DWITH_SANITIZER=Memory
packages: ninja-build llvm-6.0
gcov-exec: llvm-cov-6.0 gcov
cflags: -g3 -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanitize-memory-track-origins
codecov: ubuntu_clang_msan
- name: Windows MSVC Win32
os: windows-latest
compiler: cl
cmake-args: -A Win32
- name: Windows MSVC Win64
os: windows-latest
compiler: cl
cmake-args: -A x64
- name: Windows MSVC ARM No Test
os: windows-latest
compiler: cl
cmake-args: -A ARM
- name: Windows MSVC ARM64 No Test
os: windows-latest
compiler: cl
cmake-args: -A ARM64
- name: Windows GCC
os: windows-latest
compiler: gcc
cmake-args: -G Ninja
codecov: win64_gcc
- name: Windows GCC Compat No Opt
os: windows-latest
compiler: gcc
cmake-args: -G Ninja -DZLIB_COMPAT=ON -DWITH_NEW_STRATEGIES=OFF -DWITH_OPTIM=OFF
codecov: win64_gcc_compat_no_opt
- name: macOS Clang
os: macos-latest
compiler: clang
cmake-args: -DWITH_SANITIZER=Address
codecov: macos_clang
- name: macOS GCC
os: macos-latest
compiler: gcc-10
cmake-args: -DWITH_SANITIZER=Undefined
packages: gcc@10
gcov-exec: gcov-10
codecov: macos_gcc
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Checkout test corpora
uses: actions/checkout@v2
with:
repository: nmoinvaz/corpora
path: test/data/corpora
- name: Install packages (Ubuntu)
if: runner.os == 'Linux' && matrix.packages
run: |
sudo dpkg --add-architecture i386 # Required for wine32
sudo apt-get update
sudo apt-get install -y ${{ matrix.packages }}
- name: Install packages (Windows)
if: runner.os == 'Windows'
run: |
choco install ninja ${{ matrix.packages }} --no-progress
- name: Install packages (macOS)
if: runner.os == 'macOS'
run: |
brew install ninja ${{ matrix.packages }}
env:
HOMEBREW_NO_INSTALL_CLEANUP: 1
- name: Install codecov.io tools
if: matrix.codecov
run: |
python -u -m pip install codecov
- name: Generate project files
# Shared libaries turned off for qemu ppc* and sparc & reduce code coverage sources
run: |
mkdir ${{ matrix.build-dir || '.not-used' }}
cd ${{ matrix.build-dir || '.' }}
cmake ${{ matrix.build-src-dir || '.' }} ${{ matrix.cmake-args }} -DCMAKE_BUILD_TYPE=${{ matrix.build-config || 'Release' }} -DBUILD_SHARED_LIBS=OFF -DWITH_FUZZERS=ON -DWITH_CODE_COVERAGE=ON -DWITH_MAINTAINER_WARNINGS=ON
env:
CC: ${{ matrix.compiler }}
CFLAGS: ${{ matrix.cflags }}
LDFLAGS: ${{ matrix.ldflags }}
CI: true
- name: Compile source code
run: |
cd ${{ matrix.build-dir || '.' }}
cmake --build . --config ${{ matrix.build-config || 'Release' }}
- name: Run test cases
# Don't run tests on Windows ARM
if: runner.os != 'Windows' || contains(matrix.name, 'ARM') == false
run: |
cd ${{ matrix.build-dir || '.' }}
ctest --verbose -C Release --output-on-failure --max-width 120 -j 6
env:
ASAN_OPTIONS: ${{ matrix.asan-options || 'verbosity=0' }}:abort_on_error=1
MSAN_OPTIONS: ${{ matrix.msan-options || 'verbosity=0' }}:abort_on_error=1
TSAN_OPTIONS: ${{ matrix.tsan-options || 'verbosity=0' }}:abort_on_error=1
LSAN_OPTIONS: ${{ matrix.lsan-options || 'verbosity=0' }}:abort_on_error=1
- name: Upload coverage report
if: matrix.codecov && ( env.CODECOV_TOKEN_SECRET != '' || github.repository == 'zlib-ng/zlib-ng' )
shell: bash
run: |
bash tools/codecov-upload.sh
env:
# Codecov does not yet support GitHub Actions
CODECOV_TOKEN_SECRET: "${{secrets.CODECOV_TOKEN}}"
CODECOV_TOKEN: "${{ secrets.CODECOV_TOKEN || 'e4fdf847-f541-4ab1-9d50-3d27e5913906' }}"
CODECOV_FLAGS: "${{ matrix.codecov }}"
CODECOV_NAME: "${{ matrix.name }}"
CODECOV_EXEC: "${{ matrix.gcov-exec || 'gcov' }}"
CODECOV_DIR: "${{ matrix.build-dir || '.' }}"

View File

@ -0,0 +1,185 @@
name: CI Configure
on: [push, pull_request]
jobs:
ci-configure:
name: ${{ matrix.name }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
- name: Ubuntu GCC
os: ubuntu-latest
compiler: gcc
configure-args: --warn
- name: Ubuntu GCC OSB
os: ubuntu-latest
compiler: gcc
configure-args: --warn
build-dir: ../build
build-src-dir: ../zlib-ng
- name: Ubuntu GCC Compat No Opt
os: ubuntu-latest
compiler: gcc
configure-args: --warn --zlib-compat --without-optimizations --without-new-strategies
- name: Ubuntu GCC ARM SF
os: ubuntu-latest
compiler: arm-linux-gnueabi-gcc
configure-args: --warn
chost: arm-linux-gnueabi
packages: qemu gcc-arm-linux-gnueabi libc-dev-armel-cross
- name: Ubuntu GCC ARM SF Compat No Opt
os: ubuntu-latest
compiler: arm-linux-gnueabi-gcc
configure-args: --warn --zlib-compat --without-optimizations --without-new-strategies
chost: arm-linux-gnueabi
packages: qemu gcc-arm-linux-gnueabi libc-dev-armel-cross
- name: Ubuntu GCC ARM HF
os: ubuntu-latest
compiler: arm-linux-gnueabihf-gcc
configure-args: --warn
chost: arm-linux-gnueabihf
packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
- name: Ubuntu GCC ARM HF No ACLE
os: ubuntu-latest
compiler: arm-linux-gnueabihf-gcc
configure-args: --warn --without-acle
chost: arm-linux-gnueabihf
packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
- name: Ubuntu GCC ARM HF No NEON
os: ubuntu-latest
compiler: arm-linux-gnueabihf-gcc
configure-args: --warn --without-neon
chost: arm-linux-gnueabihf
packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
- name: Ubuntu GCC ARM HF Compat No Opt
os: ubuntu-latest
compiler: arm-linux-gnueabihf-gcc
configure-args: --warn --zlib-compat --without-optimizations --without-new-strategies
chost: arm-linux-gnueabihf
packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
- name: Ubuntu GCC AARCH64
os: ubuntu-latest
compiler: aarch64-linux-gnu-gcc
configure-args: --warn
chost: aarch64-linux-gnu
packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
- name: Ubuntu GCC AARCH64 No ACLE
os: ubuntu-latest
compiler: aarch64-linux-gnu-gcc
configure-args: --warn --without-acle
chost: aarch64-linux-gnu
packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
- name: Ubuntu GCC AARCH64 No NEON
os: ubuntu-latest
compiler: aarch64-linux-gnu-gcc
configure-args: --warn --without-neon
chost: aarch64-linux-gnu
packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
- name: Ubuntu GCC AARCH64 Compat No Opt
os: ubuntu-latest
compiler: aarch64-linux-gnu-gcc
configure-args: --warn --zlib-compat --without-optimizations --without-new-strategies
chost: aarch64-linux-gnu
packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
- name: Ubuntu GCC PPC
os: ubuntu-latest
compiler: powerpc-linux-gnu-gcc
configure-args: --warn --static
chost: powerpc-linux-gnu
packages: qemu gcc-powerpc-linux-gnu libc-dev-powerpc-cross
cflags: -static
ldflags: -static
- name: Ubuntu GCC PPC64
os: ubuntu-latest
compiler: powerpc64-linux-gnu-gcc
configure-args: --warn --static
chost: powerpc-linux-gnu
packages: qemu gcc-powerpc64-linux-gnu libc-dev-ppc64-cross
cflags: -static
ldflags: -static
- name: Ubuntu GCC PPC64LE
os: ubuntu-latest
compiler: powerpc64le-linux-gnu-gcc
configure-args: --warn
chost: powerpc64le-linux-gnu
packages: qemu gcc-powerpc64le-linux-gnu libc-dev-ppc64el-cross
- name: Ubuntu GCC S390X
os: ubuntu-latest
compiler: s390x-linux-gnu-gcc
configure-args: --warn --static
chost: s390x-linux-gnu
packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
cflags: -static
ldflags: -static
- name: Ubuntu GCC S390X DFLTCC
os: ubuntu-latest
compiler: s390x-linux-gnu-gcc
configure-args: --warn --static --with-dfltcc-deflate --with-dfltcc-inflate
chost: s390x-linux-gnu
packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
cflags: -static
ldflags: -static
- name: Ubuntu GCC S390X DFLTCC Compat
os: ubuntu-latest
compiler: s390x-linux-gnu-gcc
configure-args: --warn --zlib-compat --static --with-dfltcc-deflate --with-dfltcc-inflate
chost: s390x-linux-gnu
packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
cflags: -static
ldflags: -static
- name: macOS GCC
os: macOS-latest
compiler: gcc
configure-args: --warn
steps:
- name: Checkout repository
uses: actions/checkout@v1
- name: Install packages (Ubuntu)
if: runner.os == 'Linux' && matrix.packages
run: |
sudo apt-get update
sudo apt-get install -y ${{ matrix.packages }}
- name: Generate project files
run: |
mkdir ${{ matrix.build-dir || '.not-used' }}
cd ${{ matrix.build-dir || '.' }}
${{ matrix.build-src-dir || '.' }}/configure ${{ matrix.configure-args }}
env:
CC: ${{ matrix.compiler }}
CFLAGS: ${{ matrix.cflags }}
LDFLAGS: ${{ matrix.ldflags }}
CHOST: ${{ matrix.chost }}
CI: true
- name: Compile source code
run: |
cd ${{ matrix.build-dir || '.' }}
make -j2
- name: Run test cases
run: |
cd ${{ matrix.build-dir || '.' }}
make test

23
libs/zlibng/.github/workflows/fuzz.yml vendored Normal file
View File

@ -0,0 +1,23 @@
name: CI Fuzz
on: [pull_request]
jobs:
Fuzzing:
runs-on: ubuntu-latest
steps:
- name: Build Fuzzers
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
with:
oss-fuzz-project-name: 'zlib-ng'
dry-run: false
- name: Run Fuzzers
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
with:
oss-fuzz-project-name: 'zlib-ng'
fuzz-seconds: 600
dry-run: false
- name: Upload Crash
uses: actions/upload-artifact@v1
if: failure()
with:
name: artifacts
path: ./out/artifacts

View File

@ -0,0 +1,46 @@
name: CI Libpng
on: [pull_request]
jobs:
pngtest:
name: Ubuntu Clang
runs-on: ubuntu-latest
steps:
- name: Checkout repository (zlib-ng)
uses: actions/checkout@v1
- name: Generate project files (zlib-ng)
run: |
cmake . -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DZLIB_COMPAT=ON -DZLIB_ENABLE_TESTS=OFF
env:
CC: clang
CFLAGS: -fPIC
CI: true
- name: Compile source code (zlib-ng)
run: |
cmake --build . --config Release
- name: Checkout repository (libpng)
uses: actions/checkout@v2
with:
repository: glennrp/libpng
path: libpng
- name: Generate project files (libpng)
run: |
cd libpng
cmake . -DCMAKE_BUILD_TYPE=Release -DPNG_TESTS=ON -DPNG_STATIC=OFF -DZLIB_INCLUDE_DIR=.. -DZLIB_LIBRARY=$PWD/../libz.a
env:
CC: clang
CI: true
- name: Compile source code (libpng)
run: |
cd libpng
cmake --build . --config Release
- name: Run test cases (libpng)
run: |
cd libpng
ctest -C Release --output-on-failure --max-width 120

48
libs/zlibng/.github/workflows/nmake.yml vendored Normal file
View File

@ -0,0 +1,48 @@
name: CI NMake
on: [push, pull_request]
jobs:
ci-cmake:
name: ${{ matrix.name }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
- name: Windows NMake x86
os: windows-latest
makefile: win32/Makefile.msc
vc-vars: x86
- name: Windows NMake x64
os: windows-latest
makefile: win32/Makefile.msc
vc-vars: x86_amd64
- name: Windows NMake ARM No Test
os: windows-latest
makefile: win32/Makefile.arm
vc-vars: x86_arm
- name: Windows NMake ARM64 No Test
os: windows-latest
makefile: win32/Makefile.a64
vc-vars: x86_arm64
steps:
- name: Checkout repository
uses: actions/checkout@v1
- name: Compile source code
shell: cmd
run: |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.vc-vars }}
nmake -f ${{ matrix.makefile }}
- name: Run test cases
shell: cmd
# Don't run tests on Windows ARM
if: contains(matrix.vc-vars, 'arm') == false
run: |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.vc-vars }}
nmake -f ${{ matrix.makefile }} test
nmake -f ${{ matrix.makefile }} testdll

View File

@ -0,0 +1,121 @@
name: CI Pkgcheck
on: [push, pull_request]
jobs:
ci-pkgcheck:
name: ${{ matrix.name }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
- name: Ubuntu GCC
os: ubuntu-latest
compiler: gcc
- name: Ubuntu GCC -m32
os: ubuntu-latest
compiler: gcc
packages: gcc-multilib
cmake-args: -DCMAKE_C_FLAGS=-m32
cflags: -m32
ldflags: -m32
- name: Ubuntu GCC ARM HF
os: ubuntu-latest
chost: arm-linux-gnueabihf
compiler: arm-linux-gnueabihf-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf
packages: qemu gcc-arm-linux-gnueabihf libc6-dev-armhf-cross
- name: Ubuntu GCC AARCH64
os: ubuntu-latest
chost: aarch64-linux-gnu
compiler: aarch64-linux-gnu-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake
packages: qemu gcc-aarch64-linux-gnu libc6-dev-arm64-cross
- name: Ubuntu GCC PPC
os: ubuntu-latest
chost: powerpc-linux-gnu
compiler: powerpc-linux-gnu-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-powerpc.cmake
packages: qemu gcc-powerpc-linux-gnu libc6-dev-powerpc-cross
- name: Ubuntu GCC PPC64LE
os: ubuntu-latest
chost: powerpc64le-linux-gnu
compiler: powerpc64le-linux-gnu-gcc
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-powerpc64le.cmake
packages: qemu gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross
- name: macOS Clang
os: macOS-latest
compiler: clang
steps:
- name: Checkout repository
uses: actions/checkout@v1
- name: Install packages (Ubuntu)
if: runner.os == 'Linux'
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends abigail-tools ninja-build diffoscope ${{ matrix.packages }}
- name: Install packages (macOS)
if: runner.os == 'macOS'
run: |
brew install ninja diffoscope ${{ matrix.packages }}
env:
HOMEBREW_NO_INSTALL_CLEANUP: 1
- name: Select Xcode version (macOS)
# Use a version of Xcode that supports ZERO_AR_DATE until CMake supports
# AppleClang linking with libtool using -D argument
# https://gitlab.kitware.com/cmake/cmake/-/issues/19852
if: runner.os == 'macOS'
uses: maxim-lobanov/setup-xcode@v1
with:
xcode-version: '12.1.1'
- name: Compare builds
run: |
sh test/pkgcheck.sh
env:
CC: ${{ matrix.compiler }}
CFLAGS: ${{ matrix.cflags }}
CHOST: ${{ matrix.chost }}
CMAKE_ARGS: ${{ matrix.cmake-args }}
LDFLAGS: ${{ matrix.ldflags }}
- name: Compare builds (compat)
run: |
sh test/pkgcheck.sh --zlib-compat
env:
CC: ${{ matrix.compiler }}
CFLAGS: ${{ matrix.cflags }}
CHOST: ${{ matrix.chost }}
CMAKE_ARGS: ${{ matrix.cmake-args }}
LDFLAGS: ${{ matrix.ldflags }}
- name: Check ABI
# macOS runner does not contain abigail
if: runner.os != 'macOS'
run: |
sh test/abicheck.sh --refresh_if
env:
CC: ${{ matrix.compiler }}
CFLAGS: ${{ matrix.cflags }}
CHOST: ${{ matrix.chost }}
LDFLAGS: ${{ matrix.ldflags }}
- name: Check ABI (compat)
# macOS runner does not contain abigail
if: runner.os != 'macOS'
run: |
sh test/abicheck.sh --zlib-compat --refresh_if
env:
CC: ${{ matrix.compiler }}
CFLAGS: ${{ matrix.cflags }}
CHOST: ${{ matrix.chost }}
LDFLAGS: ${{ matrix.ldflags }}

View File

@ -0,0 +1,73 @@
name: CI Release
on:
push:
tags:
- '*'
jobs:
ci-cmake:
name: ${{ matrix.name }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
- name: Windows MSVC Win32
os: windows-latest
compiler: cl
cmake-args: -A Win32
deploy-name: win32
- name: Windows MSVC Win32 Compat
os: windows-latest
compiler: cl
cmake-args: -A Win32 -DZLIB_COMPAT=ON
deploy-name: win32-compat
- name: Windows MSVC Win64
os: windows-latest
compiler: cl
cmake-args: -A x64
deploy-name: win64
- name: Windows MSVC Win64 Compat
os: windows-latest
compiler: cl
cmake-args: -A x64 -DZLIB_COMPAT=ON
deploy-name: win64-compat
steps:
- name: Checkout repository
uses: actions/checkout@v1
- name: Set environment variables
shell: bash
run: echo "tag=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_ENV
- name: Generate project files
run: |
cmake . ${{ matrix.cmake-args }} -DCMAKE_BUILD_TYPE=Release -DZLIB_ENABLE_TESTS=ON -DCMAKE_INSTALL_PREFIX=out -DINSTALL_UTILS=ON
env:
CC: ${{ matrix.compiler }}
CI: true
- name: Compile source code
run: |
cmake --build . --config Release --target install
- name: Package release (Windows)
if: runner.os == 'Windows'
run: |
cd out
7z a -tzip ../zlib-ng-${{ matrix.deploy-name }}.zip bin include lib ../LICENSE.md ../README.md
- name: Upload release (Windows)
uses: svenstaro/upload-release-action@v1-release
if: runner.os == 'Windows'
with:
asset_name: zlib-ng-${{ matrix.deploy-name }}.zip
file: zlib-ng-${{ matrix.deploy-name }}.zip
tag: ${{env.tag}}
repo_token: ${{ secrets.GITHUB_TOKEN }}
overwrite: true
env:
GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"

View File

@ -13,14 +13,18 @@
*.gcno
*.gcov
/adler32_test
/adler32_testsh
/example
/example64
/examplesh
/libz.so*
/libz-ng.so*
/makefixed
/minigzip
/minigzip64
/minigzipsh
/switchlevels
/zlib.pc
/zlib-ng.pc
/CVE-2003-0107
@ -46,8 +50,9 @@ foo.gz
CMakeCache.txt
CMakeFiles
Testing
*.cmake
/*.cmake
*.stackdump
*._h
zconf.h
zconf.h.cmakein
zconf.h.included
@ -61,6 +66,7 @@ a.out
/Makefile
/arch/arm/Makefile
/arch/generic/Makefile
/arch/power/Makefile
/arch/x86/Makefile
.kdev4
*.kdev4
@ -71,6 +77,10 @@ a.out
/zlib.dir
/zlibstatic.dir
/win32/Debug
/build/
/build[.-]*/
/btmp[12]/
/pkgtmp[12]/
/.idea
/cmake-build-debug

View File

@ -0,0 +1 @@
disable=SC2140,SC2086,SC2046,SC2015,SC1097,SC1035,SC1036,SC1007,SC2154,SC2155,SC2000,SC2034,SC2016,SC1091,SC1090,SC2212,SC2143,SC2129,SC2102,SC2069,SC1041,SC1042,SC1044,SC1046,SC1119,SC1110,SC1111,SC1112,SC1102,SC1105,SC1101,SC1004,SC1003,SC1012,SC2068,SC2065,SC2064,SC2063,SC2059,SC2053,SC2048,SC2044,SC2032,SC2031,SC2030,SC2029,SC2025,SC2024,SC2022,SC2018,SC2019,SC2017,SC2014,SC2013,SC2012,SC2009,SC2001,SC2098,SC2096,SC2094,SC2091,SC2092,SC2088,SC2087,SC2076,SC2072,SC2071,SC2223,SC2221,SC2222,SC2217,SC2207,SC2206,SC2205,SC2190,SC2188,SC2187,SC2185,SC2179,SC2178,SC2174,SC2168,SC2167,SC2163,SC2161,SC2160,SC2153,SC2150,SC2148,SC2147,SC2146,SC2142,SC2139,SC2126,SC2123,SC2120,SC2119,SC2117,SC2114,SC1117,SC2164,SC1083,SC2004,SC2125,SC2128,SC2011,SC1008,SC1019,SC2093,SC1132,SC1129,SC2236,SC2237,SC2231,SC2230,SC2229,SC2106,SC2102,SC2243,SC2244,SC2245,SC2247,SC2248,SC2249,SC2250,SC2251,SC2252,SC2181

View File

@ -1,283 +0,0 @@
language: c
cache: ccache
dist: xenial
env:
global:
- BUILDDIR=.
- MAKER="make -j2"
- TESTER="make test"
matrix:
include:
- os: windows
compiler: clang
env:
- GENERATOR="cmake . "
- MAKER="cmake --build . --config Release"
- TESTER="ctest --verbose -C Release"
- os: windows
compiler: clang
env:
- GENERATOR="cmake ..\\zlib-ng -DZLIB_COMPAT=ON"
- MAKER="cmake --build . --config Release"
- TESTER="ctest --verbose -C Release"
- BUILDDIR=..\\build
- os: windows
compiler: gcc
env:
- GENERATOR="cmake ."
- MAKER="cmake --build . --config Release"
- TESTER="ctest --verbose -C Release"
- os: linux
compiler: gcc
env: GENERATOR="./configure --warn"
- os: linux
compiler: gcc
env: GENERATOR="cmake . -DZLIB_COMPAT=OFF -DWITH_GZFILEOP=ON -DWITH_NEW_STRATEGIES=YES -DWITH_OPTIM=ON"
- os: linux
compiler: gcc
env:
- GENERATOR="../zlib-ng/configure --warn --zlib-compat"
- BUILDDIR=../build
- os: linux
compiler: gcc
env: GENERATOR="./configure --warn --zlib-compat --without-optimizations --without-new-strategies"
- os: linux
compiler: gcc
env: GENERATOR="cmake ."
- os: linux
compiler: gcc
env:
- GENERATOR="cmake ../zlib-ng"
- BUILDDIR=../build
- os: linux
compiler: clang
env: GENERATOR="./configure --warn --zlib-compat"
- os: linux
compiler: clang
env:
- GENERATOR="cmake ../zlib-ng"
- BUILDDIR=../build
- os: linux
compiler: clang
env:
- GENERATOR="scan-build -v --status-bugs cmake ../zlib-ng"
- MAKER="scan-build -v --status-bugs make"
- BUILDDIR=../build
- os: osx
compiler: gcc
env: GENERATOR="./configure --warn --zlib-compat"
- os: osx
compiler: gcc
env:
- GENERATOR="../zlib-ng/configure --warn --zlib-compat"
- BUILDDIR=../build
- os: osx
compiler: gcc
env: GENERATOR="cmake ."
- os: osx
compiler: clang
env: GENERATOR="./configure --warn --zlib-compat"
- os: osx
compiler: clang
env:
- GENERATOR="cmake ../zlib-ng"
- BUILDDIR=../build
# compiling for linux-ppc64le variants
- os: linux-ppc64le
compiler: gcc
env: GENERATOR="cmake ."
- os: linux-ppc64le
compiler: gcc
env:
- GENERATOR="cmake ../zlib-ng"
- BUILDDIR=../build
- os: linux-ppc64le
compiler: clang
env: GENERATOR="./configure --warn --zlib-compat"
- os: linux-ppc64le
compiler: clang
env:
- GENERATOR="cmake ../zlib-ng"
- BUILDDIR=../build
# Cross compiling for arm variants
- os: linux
compiler: aarch64-linux-gnu-gcc
addons:
apt:
packages:
- qemu
- gcc-aarch64-linux-gnu
- libc-dev-arm64-cross
# For all aarch64 implementations NEON is mandatory, while crypto/crc are not.
env:
- GENERATOR="./configure --warn --zlib-compat"
- CHOST=aarch64-linux-gnu
- os: linux
compiler: aarch64-linux-gnu-gcc
addons:
apt:
packages:
- qemu
- gcc-aarch64-linux-gnu
- libc-dev-arm64-cross
# For all aarch64 implementations NEON is mandatory, while crypto/crc are not.
env:
- GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake . -DZLIB_COMPAT=ON"
- MAKER="cmake --build . --config Release"
- TESTER="ctest --verbose -C Release"
- os: linux
compiler: aarch64-linux-gnu-gcc
addons:
apt:
packages:
- qemu
- gcc-aarch64-linux-gnu
- libc-dev-arm64-cross
env:
- GENERATOR="./configure --warn --zlib-compat"
- CHOST=aarch64-linux-gnu
- os: linux
compiler: aarch64-linux-gnu-gcc
addons:
apt:
packages:
- qemu
- gcc-aarch64-linux-gnu
- libc-dev-arm64-cross
env:
- GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake ."
- MAKER="cmake --build . --config Release"
- TESTER="ctest --verbose -C Release"
# Hard-float subsets
- os: linux
compiler: arm-linux-gnueabihf-gcc
addons:
apt:
packages:
- qemu
- gcc-arm-linux-gnueabihf
- libc-dev-armhf-cross
env:
- GENERATOR="./configure --warn"
- CHOST=arm-linux-gnueabihf
- os: linux
compiler: arm-linux-gnueabihf-gcc
addons:
apt:
packages:
- qemu
- gcc-arm-linux-gnueabihf
- libc-dev-armhf-cross
env:
- GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake . -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf"
- MAKER="cmake --build . --config Release"
- TESTER="ctest --verbose -C Release"
- os: linux
compiler: arm-linux-gnueabihf-gcc
addons:
apt:
packages:
- qemu
- gcc-arm-linux-gnueabihf
- libc-dev-armhf-cross
env:
- GENERATOR="./configure --warn --zlib-compat --without-neon"
- CHOST=arm-linux-gnueabihf
- os: linux
compiler: arm-linux-gnueabihf-gcc
addons:
apt:
packages:
- qemu
- gcc-arm-linux-gnueabihf
- libc-dev-armhf-cross
env:
- GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake . -DZLIB_COMPAT=ON -DWITH_NEON=OFF -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf"
- MAKER="cmake --build . --config Release"
- TESTER="ctest --verbose -C Release"
- os: linux
compiler: arm-linux-gnueabihf-gcc
addons:
apt:
packages:
- qemu
- gcc-arm-linux-gnueabihf
- libc-dev-armhf-cross
env:
- GENERATOR="./configure --warn --zlib-compat"
- CHOST=arm-linux-gnueabihf
- os: linux
compiler: arm-linux-gnueabihf-gcc
addons:
apt:
packages:
- qemu
- gcc-arm-linux-gnueabihf
- libc-dev-armhf-cross
env:
- GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake . -DZLIB_COMPAT=ON -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf"
- MAKER="cmake --build . --config Release"
- TESTER="ctest --verbose -C Release"
# Soft-float subset
- os: linux
compiler: arm-linux-gnueabi-gcc
addons:
apt:
packages:
- qemu
- gcc-arm-linux-gnueabi
- libc-dev-armel-cross
env:
- GENERATOR="./configure"
- CHOST=arm-linux-gnueabi
- os: linux
compiler: arm-linux-gnueabi-gcc
addons:
apt:
packages:
- qemu
- gcc-arm-linux-gnueabi
- libc-dev-armel-cross
env:
- GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake . -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi"
- MAKER="cmake --build . --config Release"
- TESTER="ctest --verbose -C Release"
- os: linux
compiler: arm-linux-gnueabi-gcc
addons:
apt:
packages:
- qemu
- gcc-arm-linux-gnueabi
- libc-dev-armel-cross
env:
- GENERATOR="./configure --zlib-compat"
- CHOST=arm-linux-gnueabi
- os: linux
compiler: arm-linux-gnueabi-gcc
addons:
apt:
packages:
- qemu
- gcc-arm-linux-gnueabi
- libc-dev-armel-cross
env:
- GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake . -DZLIB_COMPAT=ON -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi"
- MAKER="cmake --build . --config Release"
- TESTER="ctest --verbose -C Release"
script:
- mkdir -p $BUILDDIR
- cd $BUILDDIR
- $GENERATOR
- $MAKER
- $TESTER

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,55 +0,0 @@
CMakeLists.txt cmake build file
ChangeLog.zlib history of changes up to the fork from zlib 1.2.11
FAQ.zlib Frequently Asked Questions about zlib, as distributed in zlib 1.2.11
INDEX this file
Makefile dummy Makefile that tells you to ./configure
Makefile.in template for Unix Makefile
README guess what
README.zlib Copy of the original README file distributed in zlib 1.2.11
configure configure script for Unix
test/example.c zlib usages examples for build testing
test/minigzip.c minimal gzip-like functionality for build testing
test/infcover.c inf*.c code coverage for build coverage testing
treebuild.xml XML description of source file dependencies
zconf.h.cmakein zconf.h template for cmake
zconf.h.in zconf.h template for configure
zlib.3 Man page for zlib
zlib.3.pdf Man page in PDF format
zlib.map Linux symbol information
zlib.pc.in Template for pkg-config descriptor
zlib.pc.cmakein zlib.pc template for cmake
zlib2ansi perl script to convert source files for C++ compilation
arch/ architecture-specific code
doc/ documentation for formats and algorithms
win32/ makefiles for Windows
zlib public header files (required for library use):
zconf.h
zlib.h
private source files used to build the zlib library:
adler32.c
compress.c
crc32.c
crc32.h
deflate.c
deflate.h
gzclose.c
gzguts.h
gzlib.c
gzread.c
gzwrite.c
infback.c
inffast.c
inffast.h
inffixed.h
inflate.c
inflate.h
inftrees.c
inftrees.h
trees.c
trees.h
uncompr.c
zutil.c
zutil.h

37
libs/zlibng/INDEX.md Normal file
View File

@ -0,0 +1,37 @@
Contents
--------
| Name | Description |
|:-----------------|:---------------------------------------------------------------|
| arch/ | Architecture-specific code |
| doc/ | Documentation for formats and algorithms |
| test/example.c | Zlib usages examples for build testing |
| test/minigzip.c | Minimal gzip-like functionality for build testing |
| test/infcover.c | Inflate code coverage for build testing |
| win32/ | Shared library version resources for Windows |
| CMakeLists.txt | Cmake build script |
| configure | Bash configure/build script |
| adler32.c | Compute the Adler-32 checksum of a data stream |
| chunkset.* | Inline functions to copy small data chunks |
| compress.c | Compress a memory buffer |
| deflate.* | Compress data using the deflate algorithm |
| deflate_fast.c | Compress data using the deflate algorithm with fast strategy |
| deflate_medium.c | Compress data using the deflate algorithm with medium strategy |
| deflate_slow.c | Compress data using the deflate algorithm with slow strategy |
| functable.* | Struct containing function pointers to optimized functions |
| gzguts.h | Internal definitions for gzip operations |
| gzlib.c | Functions common to reading and writing gzip files |
| gzread.c | Read gzip files |
| gzwrite.c | Write gzip files |
| infback.* | Inflate using a callback interface |
| inflate.* | Decompress data |
| inffast.* | Decompress data with speed optimizations |
| inffixed_tbl.h | Table for decoding fixed codes |
| inftrees.h | Generate Huffman trees for efficient decoding |
| trees.* | Output deflated data using Huffman coding |
| uncompr.c | Decompress a memory buffer |
| zconf.h.cmakein | zconf.h template for cmake |
| zendian.h | BYTE_ORDER for endian tests |
| zlib.3 | Man page for zlib |
| zlib.map | Linux symbol information |
| zlib.pc.in | Pkg-config template |

View File

@ -1,64 +0,0 @@
Overview
========
There are several methods for compiling and installing zlib-ng, depending
on your favorite operating system and development toolkits.
This document will attempt to give a general overview of some of them.
PS: We do not recommend running 'make install' unless you know what you
are doing, as this can override the system default zlib library, and
any wrong configuration or incompatability of zlib-ng can make the
whole system unusable.
On linux distros, an alternative way to use zlib-ng instead of zlib
for specific programs exist, use LD_PRELOAD.
If the program is dynamically linked with zlib, then zlib-ng can take
its place without risking system-wide instability. Ex:
LD_PRELOAD=/opt/zlib-ng/libz.so.1.2.11.zlib-ng /usr/bin/program
Configure
=========
Using the configure script is currently the main method of setting up the
makefiles and preparing for compilation. Configure will attempt to detect
the specifics of your system, and enable some of the relevant options for you.
Configure accepts several command-line options, some of the most important
ones are detailed below.
--zlib-compat
This enables options that will ensure that zlib-ng is compiled with all the
functions that a standard zlib library contains, you will need to use this
if you are going to be using zlib-ng as a drop-in replacement for zlib.
--without-optimizations
This will disable zlib-ng specific optimizations (does not disable strategies).
--without-new-strategies
This will disable specially optimized strategies, such as deflate_quick and
deflate_medium.
Run configure like this:
./configure --zlib-compat
Then you can compile using make:
make
make test
Cmake
=====
Cmake is an alternative to configure, basically letting you do the same thing,
but with different tools and user interfaces.
Start by initializing cmake:
cmake .
Then you can start the configuration tui to set the wanted options
ccmake .
You can now compile using make:
make
make test

View File

@ -29,8 +29,8 @@ TEST_LIBS=$(LIBNAME1).a
LDSHARED=$(CC)
LDSHAREDFLAGS=-shared
VER=1.9.9
VER1=1
VER=2.0.0-RC2
VER1=2
STATICLIB=$(LIBNAME1).a
SHAREDLIB=$(LIBNAME1).so
@ -51,7 +51,7 @@ RCOBJS=
STRIP=
RANLIB=ranlib
LDCONFIG=ldconfig
LDSHAREDLIBC=-lc
LDSHAREDLIBC=
EXE=
SRCDIR=.
@ -71,12 +71,64 @@ mandir = ${prefix}/share/man
man3dir = ${mandir}/man3
pkgconfigdir = ${libdir}/pkgconfig
OBJZ = adler32.o compress.o crc32.o deflate.o deflate_fast.o deflate_medium.o deflate_slow.o functable.o infback.o inffast.o inflate.o inftrees.o trees.o uncompr.o zutil.o $(ARCH_STATIC_OBJS)
OBJG = gzclose.o gzlib.o gzread.o gzwrite.o
OBJZ = \
adler32.o \
chunkset.o \
compare258.o \
compress.o \
crc32.o \
crc32_comb.o \
deflate.o \
deflate_fast.o \
deflate_medium.o \
deflate_quick.o \
deflate_slow.o \
functable.o \
infback.o \
inffast.o \
inflate.o \
inftrees.o \
insert_string.o \
trees.o \
uncompr.o \
zutil.o \
$(ARCH_STATIC_OBJS)
OBJG = \
gzlib.o \
gzread.o \
gzwrite.o
OBJC = $(OBJZ) $(OBJG)
PIC_OBJZ = adler32.lo compress.lo crc32.lo deflate.lo deflate_fast.lo deflate_medium.lo deflate_slow.lo functable.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo uncompr.lo zutil.lo $(ARCH_SHARED_OBJS)
PIC_OBJG = gzclose.lo gzlib.lo gzread.lo gzwrite.lo
PIC_OBJZ = \
adler32.lo \
chunkset.lo \
compare258.lo \
compress.lo \
crc32.lo \
crc32_comb.lo \
deflate.lo \
deflate_fast.lo \
deflate_medium.lo \
deflate_quick.lo \
deflate_slow.lo \
functable.lo \
infback.lo \
inffast.lo \
inflate.lo \
inftrees.lo \
insert_string.lo \
trees.lo \
uncompr.lo \
zutil.lo \
$(ARCH_SHARED_OBJS)
PIC_OBJG = \
gzlib.lo \
gzread.lo \
gzwrite.lo
PIC_OBJC = $(PIC_OBJZ) $(PIC_OBJG)
OBJS = $(OBJC)
@ -85,11 +137,9 @@ PIC_OBJS = $(PIC_OBJC)
all: static shared
static: example$(EXE) minigzip$(EXE) fuzzers
static: adler32_test$(EXE) example$(EXE) minigzip$(EXE) fuzzers makefixed$(EXE) maketrees$(EXE) makecrct$(EXE)
shared: examplesh$(EXE) minigzipsh$(EXE)
all64: example64$(EXE) minigzip64$(EXE)
shared: adler32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE)
check: test
@ -181,17 +231,23 @@ $(STATICLIB): $(OBJS)
$(AR) $(ARFLAGS) $@ $(OBJS)
-@ ($(RANLIB) $@ || true) >/dev/null 2>&1
adler32_test.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/test/adler32_test.c
example.o:
$(CC) $(CFLAGS) -DWITH_GZFILEOP $(INCLUDES) -c -o $@ $(SRCDIR)/test/example.c
minigzip.o:
$(CC) $(CFLAGS) -DWITH_GZFILEOP $(INCLUDES) -c -o $@ $(SRCDIR)/test/minigzip.c
example64.o:
$(CC) $(CFLAGS) -DWITH_GZFILEOP -D_FILE_OFFSET_BITS=64 $(INCLUDES) -c -o $@ $(SRCDIR)/test/example.c
makefixed.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/tools/makefixed.c
minigzip64.o:
$(CC) $(CFLAGS) -DWITH_GZFILEOP -D_FILE_OFFSET_BITS=64 $(INCLUDES) -c -o $@ $(SRCDIR)/test/minigzip.c
maketrees.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/tools/maketrees.c
makecrct.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/tools/makecrct.c
zlibrc.o: win32/zlib$(SUFFIX)1.rc
$(RC) $(RCFLAGS) -o $@ win32/zlib$(SUFFIX)1.rc
@ -209,7 +265,7 @@ $(OBJG): %.o: $(SRCDIR)/%.c
$(SHAREDTARGET): $(PIC_OBJS) $(DEFFILE) $(RCOBJS)
ifneq ($(SHAREDTARGET),)
$(LDSHARED) $(LDSHAREDFLAGS) $(LDFLAGS) -o $@ $(DEFFILE) $(PIC_OBJS) $(RCOBJS) $(LDSHAREDLIBC)
$(LDSHARED) $(CFLAGS) $(LDSHAREDFLAGS) $(LDFLAGS) -o $@ $(DEFFILE) $(PIC_OBJS) $(RCOBJS) $(LDSHAREDLIBC)
ifneq ($(STRIP),)
$(STRIP) $@
endif
@ -220,38 +276,56 @@ ifneq ($(SHAREDLIB),$(SHAREDTARGET))
endif
endif
adler32_test$(EXE): adler32_test.o $(OBJG) $(STATICLIB)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ adler32_test.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
ifneq ($(STRIP),)
$(STRIP) $@
endif
example$(EXE): example.o $(OBJG) $(STATICLIB)
$(CC) $(LDFLAGS) -o $@ example.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ example.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
ifneq ($(STRIP),)
$(STRIP) $@
endif
minigzip$(EXE): minigzip.o $(OBJG) $(STATICLIB)
$(CC) $(LDFLAGS) -o $@ minigzip.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ minigzip.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
ifneq ($(STRIP),)
$(STRIP) $@
endif
adler32_testsh$(EXE): adler32_test.o $(OBJG) $(SHAREDTARGET)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ adler32_test.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC)
ifneq ($(STRIP),)
$(STRIP) $@
endif
examplesh$(EXE): example.o $(OBJG) $(SHAREDTARGET)
$(CC) $(LDFLAGS) -o $@ example.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ example.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC)
ifneq ($(STRIP),)
$(STRIP) $@
endif
minigzipsh$(EXE): minigzip.o $(OBJG) $(SHAREDTARGET)
$(CC) $(LDFLAGS) -o $@ minigzip.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ minigzip.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC)
ifneq ($(STRIP),)
$(STRIP) $@
endif
example64$(EXE): example64.o $(OBJG) $(STATICLIB)
$(CC) $(LDFLAGS) -o $@ example64.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
makefixed$(EXE): makefixed.o $(OBJG) $(STATICLIB)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ makefixed.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
ifneq ($(STRIP),)
$(STRIP) $@
endif
minigzip64$(EXE): minigzip64.o $(OBJG) $(STATICLIB)
$(CC) $(LDFLAGS) -o $@ minigzip64.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
maketrees$(EXE): maketrees.o $(OBJG) $(STATICLIB)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ maketrees.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
ifneq ($(STRIP),)
$(STRIP) $@
endif
makecrct$(EXE): makecrct.o $(OBJG) $(STATICLIB)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ makecrct.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
ifneq ($(STRIP),)
$(STRIP) $@
endif
@ -326,11 +400,11 @@ clean:
@if [ -f $(ARCHDIR)/Makefile ]; then $(MAKE) -C $(ARCHDIR) clean; fi
@if [ -f test/Makefile ]; then $(MAKE) -C test clean; fi
rm -f *.o *.lo *~ \
example$(EXE) minigzip$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
example64$(EXE) minigzip64$(EXE) \
adler32_test$(EXE) example$(EXE) minigzip$(EXE) \
adler32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
checksum_fuzzer$(EXE) compress_fuzzer$(EXE) example_small_fuzzer$(EXE) example_large_fuzzer$(EXE) \
example_flush_fuzzer$(EXE) example_dict_fuzzer$(EXE) minigzip_fuzzer$(EXE) \
infcover \
infcover makefixed$(EXE) maketrees$(EXE) makecrct$(EXE) \
$(STATICLIB) $(IMPORTLIB) $(SHAREDLIB) $(SHAREDLIBV) $(SHAREDLIBM) \
foo.gz so_locations \
_match.s maketree
@ -338,6 +412,8 @@ clean:
rm -f *.gcda *.gcno *.gcov
rm -f a.out a.exe
rm -f *.pc
rm -f *._h
rm -rf btmp1 btmp2 pkgtmp1 pkgtmp2
maintainer-clean: distclean
distclean: clean

View File

@ -1,10 +1,41 @@
zlib-ng - zlib for the next generation systems
## zlib-ng
*zlib data compression library for the next generation systems*
Maintained by Hans Kristian Rosbach
aka Dead2 (zlib-ng àt circlestorm dót org)
|CI|Status|
|:-|-|
|GitHub Actions|[![Master Branch Status](https://github.com/zlib-ng/zlib-ng/workflows/CI%20CMake/badge.svg)](https://github.com/zlib-ng/zlib-ng/actions) [![Master Branch Status](https://github.com/zlib-ng/zlib-ng/workflows/CI%20Configure/badge.svg)](https://github.com/zlib-ng/zlib-ng/actions) [![Master Branch Status](https://github.com/zlib-ng/zlib-ng/workflows/CI%20NMake/badge.svg)](https://github.com/zlib-ng/zlib-ng/actions)|
|Buildkite|[![Build status](https://badge.buildkite.com/7bb1ef84356d3baee26202706cc053ee1de871c0c712b65d26.svg?branch=develop)](https://buildkite.com/circlestorm-productions/zlib-ng)|
|CodeFactor|[![CodeFactor](https://www.codefactor.io/repository/github/zlib-ng/zlib-ng/badge)](https://www.codefactor.io/repository/github/zlib-ng/zlib-ng)|
|OSS-Fuzz|[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/zlib-ng.svg)](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:zlib-ng)
|Codecov|[![codecov.io](https://codecov.io/github/zlib-ng/zlib-ng/coverage.svg?branch=develop)](https://codecov.io/github/zlib-ng/zlib-ng/)|
Fork Motivation and History
Features
--------
* Zlib compatible API with support for dual-linking
* Modernized native API based on zlib API for ease of porting
* Modern C99 syntax and a clean code layout
* Deflate medium and quick algorithms based on Intels zlib fork
* Support for CPU intrinsics when available
* Adler32 implementation using SSSE3, AVX2, Neon & VSX
* CRC32-B implementation using PCLMULQDQ & ACLE
* Hash table implementation using CRC32-C intrinsics on x86 and ARM
* Slide hash implementations using SSE2, AVX2, Neon & VSX
* Compare256/258 implementations using SSE4.2 & AVX2
* Inflate chunk copying using SSE2, AVX2 & Neon
* Support for hardware-accelerated deflate using IBM Z DFLTCC
* Unaligned memory read/writes and large bit buffer improvements
* Includes improvements from Cloudflare and Intel forks
* Configure, CMake, and NMake build system support
* Comprehensive set of CMake unit tests
* Code sanitizers, fuzzing, and coverage
* GitHub Actions continuous integration on Windows, macOS, and Linux
* Emulated CI for ARM, AARCH64, PPC, PPC64, SPARC64, S390x using qemu
Fork Motivation
---------------------------
The motivation for this fork was due to seeing several 3rd party
@ -38,17 +69,97 @@ various dead code, all contrib and example code as there is little
point in having those in this fork for various reasons.
A lot of improvements have gone into zlib-ng since its start, and
numerous people have contributed both small and big improvements,
or valuable testing.
numerous people and companies have contributed both small and big
improvements, or valuable testing.
Please read LICENSE.md, it is very simple and very liberal.
Build
-----
There are two ways to build zlib-ng:
### Cmake
To build zlib-ng using the cross-platform makefile generator cmake.
```
cmake .
cmake --build . --config Release
ctest --verbose -C Release
```
Alternatively, you can use the cmake configuration GUI tool ccmake:
```
ccmake .
```
### Configure
To build zlib-ng using the bash configure script:
```
./configure
make
make test
```
Build Options
-------------
| CMake | configure | Description | Default |
|:-------------------------|:-------------------------|:--------------------------------------------------------------------------------------|---------|
| ZLIB_COMPAT | --zlib-compat | Compile with zlib compatible API | OFF |
| ZLIB_ENABLE_TESTS | | Build test binaries | ON |
| WITH_GZFILEOP | --without-gzfileops | Compile with support for gzFile related functions | ON |
| WITH_OPTIM | --without-optimizations | Build with optimisations | ON |
| WITH_NEW_STRATEGIES | --without-new-strategies | Use new strategies | ON |
| WITH_NATIVE_INSTRUCTIONS | --native | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF |
| WITH_SANITIZER | --with-sanitizer | Build with sanitizer (memory, address, undefined) | OFF |
| WITH_FUZZERS | --with-fuzzers | Build test/fuzz | OFF |
| WITH_MAINTAINER_WARNINGS | | Build with project maintainer warnings | OFF |
| WITH_CODE_COVERAGE | | Enable code coverage reporting | OFF |
Install
-------
WARNING: We do not recommend manually installing unless you really
know what you are doing, because this can potentially override the system
default zlib library, and any incompatibility or wrong configuration of
zlib-ng can make the whole system unusable, requiring recovery or reinstall.
If you still want a manual install, we recommend using the /opt/ path prefix.
For Linux distros, an alternative way to use zlib-ng (if compiled in
zlib-compat mode) instead of zlib, is through the use of the
_LD_PRELOAD_ environment variable. If the program is dynamically linked
with zlib, then zlib-ng will temporarily be used instead by the program,
without risking system-wide instability.
```
LD_PRELOAD=/opt/zlib-ng/libz.so.1.2.11.zlib-ng /usr/bin/program
```
### Cmake
To install zlib-ng system-wide using cmake:
```
cmake --build . --target install
```
### Configure
To install zlib-ng system-wide using the configure script:
```
make install
```
Contributing
------------
Zlib-ng is a young project, and we aim to be open to contributions,
and we would be delighted to receive pull requests on github.
Zlib-ng is a aiming to be open to contributions, and we would be
delighted to receive pull requests on github.
Just remember that any code you submit must be your own and it must
be zlib licensed.
Help with testing and reviewing of pull requests etc is also very
@ -73,9 +184,23 @@ The deflate and zlib specifications were written by L. Peter Deutsch.
zlib was originally created by Jean-loup Gailly (compression)
and Mark Adler (decompression).
Advanced Build Options
----------------------
Build Status
------------
Travis CI: [![build status](https://api.travis-ci.org/zlib-ng/zlib-ng.svg)](https://travis-ci.org/zlib-ng/zlib-ng/)
Buildkite: [![Build status](https://badge.buildkite.com/7bb1ef84356d3baee26202706cc053ee1de871c0c712b65d26.svg?branch=develop)](https://buildkite.com/circlestorm-productions/zlib-ng)
| CMake | configure | Description | Default |
|:--------------------------------|:----------------------|:--------------------------------------------------------------------|------------------------|
| ZLIB_DUAL_LINK | | Dual link tests with system zlib | OFF |
| | --force-sse2 | Assume SSE2 instructions are always available | ON (x86), OFF (x86_64) |
| WITH_AVX2 | | Build with AVX2 intrinsics | ON |
| WITH_SSE2 | | Build with SSE2 intrinsics | ON |
| WITH_SSE4 | | Build with SSE4 intrinsics | ON |
| WITH_PCLMULQDQ | | Build with PCLMULQDQ intrinsics | ON |
| WITH_ACLE | --without-acle | Build with ACLE intrinsics | ON |
| WITH_NEON | --without-neon | Build with NEON intrinsics | ON |
| WITH_POWER8 | | Build with POWER8 optimisations | ON |
| WITH_DFLTCC_DEFLATE | --with-dfltcc-deflate | Use DEFLATE COMPRESSION CALL instruction for compression on IBM Z | OFF |
| WITH_DFLTCC_INFLATE | --with-dfltcc-inflate | Use DEFLATE COMPRESSION CALL instruction for decompression on IBM Z | OFF |
| WITH_UNALIGNED | | Allow optimizations that use unaligned reads if safe on current arch| ON |
| WITH_INFLATE_STRICT | | Build with strict inflate distance checking | OFF |
| WITH_INFLATE_ALLOW_INVALID_DIST | | Build with zero fill for inflate invalid distances | OFF |
| INSTALL_UTILS | | Copy minigzip and minideflate during install | OFF |

View File

@ -1,118 +0,0 @@
ZLIB DATA COMPRESSION LIBRARY
zlib 1.2.11 is a general purpose data compression library. All the code is
thread safe. The data format used by the zlib library is described by RFCs
(Request for Comments) 1950 to 1952 in the files
http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and
rfc1952 (gzip format).
All functions of the compression library are documented in the file zlib.h
(volunteer to write man pages welcome, contact zlib@gzip.org). A usage example
of the library is given in the file test/example.c which also tests that
the library is working correctly. Another example is given in the file
test/minigzip.c. The compression library itself is composed of all source
files in the root directory.
To compile all files and run the test program, follow the instructions given at
the top of Makefile.in. In short "./configure; make test", and if that goes
well, "make install" should work for most flavors of Unix. For Windows, use
one of the special makefiles in win32/ or contrib/vstudio/ . For VMS, use
make_vms.com.
Questions about zlib should be sent to <zlib@gzip.org>, or to Gilles Vollant
<info@winimage.com> for the Windows DLL version. The zlib home page is
http://zlib.net/ . Before reporting a problem, please check this site to
verify that you have the latest version of zlib; otherwise get the latest
version and check whether the problem still exists or not.
PLEASE read the zlib FAQ http://zlib.net/zlib_faq.html before asking for help.
Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan. 1997
issue of Dr. Dobb's Journal; a copy of the article is available at
http://marknelson.us/1997/01/01/zlib-engine/ .
The changes made in version 1.2.11 are documented in the file ChangeLog.
Unsupported third party contributions are provided in directory contrib/ .
zlib is available in Java using the java.util.zip package, documented at
http://java.sun.com/developer/technicalArticles/Programming/compression/ .
A Perl interface to zlib written by Paul Marquess <pmqs@cpan.org> is available
at CPAN (Comprehensive Perl Archive Network) sites, including
http://search.cpan.org/~pmqs/IO-Compress-Zlib/ .
A Python interface to zlib written by A.M. Kuchling <amk@amk.ca> is
available in Python 1.5 and later versions, see
http://docs.python.org/library/zlib.html .
zlib is built into tcl: http://wiki.tcl.tk/4610 .
An experimental package to read and write files in .zip format, written on top
of zlib by Gilles Vollant <info@winimage.com>, is available in the
contrib/minizip directory of zlib.
Notes for some targets:
- For Windows DLL versions, please see win32/DLL_FAQ.txt
- For 64-bit Irix, deflate.c must be compiled without any optimization. With
-O, one libpng test fails. The test works in 32 bit mode (with the -n32
compiler flag). The compiler bug has been reported to SGI.
- zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works
when compiled with cc.
- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is
necessary to get gzprintf working correctly. This is done by configure.
- zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with
other compilers. Use "make test" to check your compiler.
- gzdopen is not supported on RISCOS or BEOS.
- For PalmOs, see http://palmzlib.sourceforge.net/
Acknowledgments:
The deflate format used by zlib was defined by Phil Katz. The deflate and
zlib specifications were written by L. Peter Deutsch. Thanks to all the
people who reported problems and suggested various improvements in zlib; they
are too numerous to cite here.
Copyright notice:
(C) 1995-2017 Jean-loup Gailly and Mark Adler
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Jean-loup Gailly Mark Adler
jloup@gzip.org madler@alumni.caltech.edu
If you use the zlib library in a product, we would appreciate *not* receiving
lengthy legal documents to sign. The sources are provided for free but without
warranty of any kind. The library has been entirely written by Jean-loup
Gailly and Mark Adler; it does not include third-party code. We make all
contributions to and distributions of this project solely in our personal
capacity, and are not conveying any rights to any intellectual property of
any third parties.
If you redistribute modified sources, we would appreciate that you include in
the file ChangeLog history information documenting your changes. Please read
the FAQ for more information on the distribution of modified source versions.

View File

@ -3,24 +3,13 @@
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#include "zbuild.h"
#include "zutil.h"
#include "functable.h"
#include "adler32_p.h"
uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len);
static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len2);
#define DO1(buf, i) {adler += (buf)[i]; sum2 += adler;}
#define DO2(buf, i) DO1(buf, i); DO1(buf, i+1);
#define DO4(buf, i) DO2(buf, i); DO2(buf, i+2);
#define DO8(buf, i) DO4(buf, i); DO4(buf, i+4);
#define DO16(buf) DO8(buf, 0); DO8(buf, 8);
/* ========================================================================= */
uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
Z_INTERNAL uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
uint32_t sum2;
unsigned n;
@ -29,15 +18,15 @@ uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
adler &= 0xffff;
/* in case user likes doing a byte at a time, keep it fast */
if (len == 1)
if (UNLIKELY(len == 1))
return adler32_len_1(adler, buf, sum2);
/* initial Adler-32 value (deferred check for len == 1 speed) */
if (buf == NULL)
if (UNLIKELY(buf == NULL))
return 1L;
/* in case short lengths are provided, keep it somewhat fast */
if (len < 16)
if (UNLIKELY(len < 16))
return adler32_len_16(adler, buf, len, sum2);
/* do length NMAX blocks -- requires just one modulo operation */
@ -50,15 +39,15 @@ uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
#endif
do {
#ifdef UNROLL_MORE
DO16(buf); /* 16 sums unrolled */
DO16(adler, sum2, buf); /* 16 sums unrolled */
buf += 16;
#else
DO8(buf, 0); /* 8 sums unrolled */
DO8(adler, sum2, buf, 0); /* 8 sums unrolled */
buf += 8;
#endif
} while (--n);
MOD(adler);
MOD(sum2);
adler %= BASE;
sum2 %= BASE;
}
/* do remaining bytes (less than NMAX, still just one modulo) */
@ -66,12 +55,12 @@ uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
#ifdef UNROLL_MORE
while (len >= 16) {
len -= 16;
DO16(buf);
DO16(adler, sum2, buf);
buf += 16;
#else
while (len >= 8) {
len -= 8;
DO8(buf, 0);
DO8(adler, sum2, buf, 0);
buf += 8;
#endif
}
@ -80,22 +69,34 @@ uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
adler += *buf++;
sum2 += adler;
}
MOD(adler);
MOD(sum2);
adler %= BASE;
sum2 %= BASE;
}
/* return recombined sums */
return adler | (sum2 << 16);
}
uint32_t ZEXPORT PREFIX(adler32_z)(uint32_t adler, const unsigned char *buf, size_t len) {
#ifdef ZLIB_COMPAT
unsigned long Z_EXPORT PREFIX(adler32_z)(unsigned long adler, const unsigned char *buf, size_t len) {
return (unsigned long)functable.adler32((uint32_t)adler, buf, len);
}
#else
uint32_t Z_EXPORT PREFIX(adler32_z)(uint32_t adler, const unsigned char *buf, size_t len) {
return functable.adler32(adler, buf, len);
}
#endif
/* ========================================================================= */
uint32_t ZEXPORT PREFIX(adler32)(uint32_t adler, const unsigned char *buf, uint32_t len) {
#ifdef ZLIB_COMPAT
unsigned long Z_EXPORT PREFIX(adler32)(unsigned long adler, const unsigned char *buf, unsigned int len) {
return (unsigned long)functable.adler32((uint32_t)adler, buf, len);
}
#else
uint32_t Z_EXPORT PREFIX(adler32)(uint32_t adler, const unsigned char *buf, uint32_t len) {
return functable.adler32(adler, buf, len);
}
#endif
/* ========================================================================= */
static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len2) {
@ -108,11 +109,11 @@ static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len
return 0xffffffff;
/* the derivation of this formula is left as an exercise for the reader */
MOD63(len2); /* assumes len2 >= 0 */
len2 %= BASE; /* assumes len2 >= 0 */
rem = (unsigned)len2;
sum1 = adler1 & 0xffff;
sum2 = rem * sum1;
MOD(sum2);
sum2 %= BASE;
sum1 += (adler2 & 0xffff) + BASE - 1;
sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
if (sum1 >= BASE) sum1 -= BASE;
@ -123,10 +124,16 @@ static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len
}
/* ========================================================================= */
uint32_t ZEXPORT PREFIX(adler32_combine)(uint32_t adler1, uint32_t adler2, z_off_t len2) {
return adler32_combine_(adler1, adler2, len2);
#ifdef ZLIB_COMPAT
unsigned long Z_EXPORT PREFIX(adler32_combine)(unsigned long adler1, unsigned long adler2, z_off_t len2) {
return (unsigned long)adler32_combine_((uint32_t)adler1, (uint32_t)adler2, len2);
}
uint32_t ZEXPORT PREFIX(adler32_combine64)(uint32_t adler1, uint32_t adler2, z_off64_t len2) {
unsigned long Z_EXPORT PREFIX4(adler32_combine)(unsigned long adler1, unsigned long adler2, z_off64_t len2) {
return (unsigned long)adler32_combine_((uint32_t)adler1, (uint32_t)adler2, len2);
}
#else
uint32_t Z_EXPORT PREFIX4(adler32_combine)(uint32_t adler1, uint32_t adler2, z_off64_t len2) {
return adler32_combine_(adler1, adler2, len2);
}
#endif

View File

@ -12,45 +12,11 @@
#define NMAX 5552
/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
/* use NO_DIVIDE if your processor does not do division in hardware --
try it both ways to see which is faster */
#ifdef NO_DIVIDE
/* note that this assumes BASE is 65521, where 65536 % 65521 == 15
(thank you to John Reiser for pointing this out) */
# define CHOP(a) \
do { \
uint32_t tmp = a >> 16; \
a &= 0xffff; \
a += (tmp << 4) - tmp; \
} while (0)
# define MOD28(a) \
do { \
CHOP(a); \
if (a >= BASE) a -= BASE; \
} while (0)
# define MOD(a) \
do { \
CHOP(a); \
MOD28(a); \
} while (0)
# define MOD63(a) \
do { /* this assumes a is not negative */ \
z_off64_t tmp = a >> 32; \
a &= 0xffffffffL; \
a += (tmp << 8) - (tmp << 5) + tmp; \
tmp = a >> 16; \
a &= 0xffffL; \
a += (tmp << 4) - tmp; \
tmp = a >> 16; \
a &= 0xffffL; \
a += (tmp << 4) - tmp; \
if (a >= BASE) a -= BASE; \
} while (0)
#else
# define MOD(a) a %= BASE
# define MOD28(a) a %= BASE
# define MOD63(a) a %= BASE
#endif
#define DO1(sum1, sum2, buf, i) {(sum1) += buf[(i)]; (sum2) += (sum1);}
#define DO2(sum1, sum2, buf, i) {DO1(sum1, sum2, buf, i); DO1(sum1, sum2, buf, i+1);}
#define DO4(sum1, sum2, buf, i) {DO2(sum1, sum2, buf, i); DO2(sum1, sum2, buf, i+2);}
#define DO8(sum1, sum2, buf, i) {DO4(sum1, sum2, buf, i); DO4(sum1, sum2, buf, i+4);}
#define DO16(sum1, sum2, buf) {DO8(sum1, sum2, buf, 0); DO8(sum1, sum2, buf, 8);}
static inline uint32_t adler32_len_1(uint32_t adler, const unsigned char *buf, uint32_t sum2) {
adler += buf[0];
@ -70,8 +36,18 @@ static inline uint32_t adler32_len_16(uint32_t adler, const unsigned char *buf,
}
if (adler >= BASE)
adler -= BASE;
MOD28(sum2); /* only added so many BASE's */
sum2 %= BASE; /* only added so many BASE's */
return adler | (sum2 << 16);
}
static inline uint32_t adler32_len_64(uint32_t adler, const unsigned char *buf, size_t len, uint32_t sum2) {
while (len >= 16) {
len -= 16;
DO16(adler, sum2, buf);
buf += 16;
}
/* Process tail (len < 16). */
return adler32_len_16(adler, buf, len, sum2);
}
#endif /* ADLER32_P_H */

View File

@ -6,19 +6,27 @@ CC=
CFLAGS=
SFLAGS=
INCLUDES=
ACLEFLAG=
NEONFLAG=
SUFFIX=
SRCDIR=.
SRCTOP=../..
TOPDIR=$(SRCTOP)
all: adler32_neon.o adler32_neon.lo armfeature.o armfeature.lo crc32_acle.o crc32_acle.lo fill_window_arm.o fill_window_arm.lo insert_string_acle.o insert_string_acle.lo
all: \
adler32_neon.o adler32_neon.lo \
armfeature.o armfeature.lo \
chunkset_neon.o chunkset_neon.lo \
crc32_acle.o crc32_acle.lo \
slide_neon.o slide_neon.lo \
insert_string_acle.o insert_string_acle.lo
adler32_neon.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
$(CC) $(CFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
adler32_neon.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
$(CC) $(SFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
armfeature.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c
@ -26,23 +34,29 @@ armfeature.o:
armfeature.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c
chunkset_neon.o:
$(CC) $(CFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_neon.c
chunkset_neon.lo:
$(CC) $(SFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_neon.c
crc32_acle.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
$(CC) $(CFLAGS) $(ACLEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
crc32_acle.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
$(CC) $(SFLAGS) $(ACLEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
fill_window_arm.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_arm.c
slide_neon.o:
$(CC) $(CFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_neon.c
fill_window_arm.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_arm.c
slide_neon.lo:
$(CC) $(SFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_neon.c
insert_string_acle.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
$(CC) $(CFLAGS) $(ACLEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
insert_string_acle.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
$(CC) $(SFLAGS) $(ACLEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
mostlyclean: clean
clean:

View File

@ -2,24 +2,16 @@
* Copyright (C) 2017 ARM Holdings Inc.
* Author: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "adler32_neon.h"
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#ifdef ARM_NEON_ADLER32
#ifdef _M_ARM64
# include <arm64_neon.h>
#else
# include <arm_neon.h>
#include "adler32_p.h"
#endif
#include "../../zutil.h"
#include "../../adler32_p.h"
static void NEON_accum32(uint32_t *s, const unsigned char *buf, size_t len) {
static const uint8_t taps[32] = {
@ -109,7 +101,7 @@ uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len) {
for (i = 0; i < len; i += n) {
if ((i + n) > len)
n = len - i;
n = (int)(len - i);
if (n < 16)
break;

View File

@ -1,29 +0,0 @@
/* Copyright (C) 1995-2011, 2016 Mark Adler
* Copyright (C) 2017 ARM Holdings Inc.
* Author: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
#ifndef __ADLER32_NEON__
#define __ADLER32_NEON__
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
// Depending on the compiler flavor, size_t may be defined in one or the other header. See:
// http://stackoverflow.com/questions/26410466/gcc-linaro-compiler-throws-error-unknown-type-name-size-t
#include <stdint.h>
#include <stddef.h>
uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len);
#endif
#endif

View File

@ -8,6 +8,6 @@
extern int arm_cpu_has_neon;
extern int arm_cpu_has_crc32;
void ZLIB_INTERNAL arm_check_features(void);
void Z_INTERNAL arm_check_features(void);
#endif /* ARM_H_ */

View File

@ -1,8 +1,15 @@
#include "zutil.h"
#include "../../zutil.h"
#if defined(__linux__)
# include <sys/auxv.h>
# include <asm/hwcap.h>
#elif defined(__FreeBSD__) && defined(__aarch64__)
# include <machine/armreg.h>
# ifndef ID_AA64ISAR0_CRC32_VAL
# define ID_AA64ISAR0_CRC32_VAL ID_AA64ISAR0_CRC32
# endif
#elif defined(__APPLE__)
# include <sys/sysctl.h>
#elif defined(_WIN32)
# include <winapifamily.h>
#endif
@ -10,6 +17,14 @@
static int arm_has_crc32() {
#if defined(__linux__) && defined(HWCAP2_CRC32)
return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0 ? 1 : 0;
#elif defined(__FreeBSD__) && defined(__aarch64__)
return getenv("QEMU_EMULATING") == NULL
&& ID_AA64ISAR0_CRC32_VAL(READ_SPECIALREG(id_aa64isar0_el1)) >= ID_AA64ISAR0_CRC32_BASE;
#elif defined(__APPLE__)
int hascrc32;
size_t size = sizeof(hascrc32);
return sysctlbyname("hw.optional.armv8_crc32", &hascrc32, &size, NULL, 0) == 0
&& hascrc32 == 1;
#elif defined(ARM_NOCHECK_ACLE)
return 1;
#else
@ -18,11 +33,15 @@ static int arm_has_crc32() {
}
/* AArch64 has neon. */
#if !defined(__aarch64__)
static inline int arm_has_neon()
{
#if !defined(__aarch64__) && !defined(_M_ARM64)
static inline int arm_has_neon() {
#if defined(__linux__) && defined(HWCAP_NEON)
return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0 ? 1 : 0;
#elif defined(__APPLE__)
int hasneon;
size_t size = sizeof(hasneon);
return sysctlbyname("hw.optional.neon", &hasneon, &size, NULL, 0) == 0
&& hasneon == 1;
#elif defined(_M_ARM) && defined(WINAPI_FAMILY_PARTITION)
# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP)
return 1; /* Always supported */
@ -37,11 +56,11 @@ static inline int arm_has_neon()
}
#endif
ZLIB_INTERNAL int arm_cpu_has_neon;
ZLIB_INTERNAL int arm_cpu_has_crc32;
Z_INTERNAL int arm_cpu_has_neon;
Z_INTERNAL int arm_cpu_has_crc32;
void ZLIB_INTERNAL arm_check_features(void) {
#if defined(__aarch64__)
void Z_INTERNAL arm_check_features(void) {
#if defined(__aarch64__) || defined(_M_ARM64)
arm_cpu_has_neon = 1; /* always available */
#else
arm_cpu_has_neon = arm_has_neon();

View File

@ -0,0 +1,54 @@
/* chunkset_neon.c -- NEON inline functions to copy small data chunks.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifdef ARM_NEON_CHUNKSET
#ifdef _M_ARM64
# include <arm64_neon.h>
#else
# include <arm_neon.h>
#endif
#include "../../zbuild.h"
#include "../../zutil.h"
typedef uint8x16_t chunk_t;
#define HAVE_CHUNKMEMSET_1
#define HAVE_CHUNKMEMSET_2
#define HAVE_CHUNKMEMSET_4
#define HAVE_CHUNKMEMSET_8
static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
*chunk = vld1q_dup_u8(from);
}
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
*chunk = vreinterpretq_u8_s16(vdupq_n_s16(*(int16_t *)from));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
*chunk = vreinterpretq_u8_s32(vdupq_n_s32(*(int32_t *)from));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
*chunk = vcombine_u8(vld1_u8(from), vld1_u8(from));
}
#define CHUNKSIZE chunksize_neon
#define CHUNKCOPY chunkcopy_neon
#define CHUNKCOPY_SAFE chunkcopy_safe_neon
#define CHUNKUNROLL chunkunroll_neon
#define CHUNKMEMSET chunkmemset_neon
#define CHUNKMEMSET_SAFE chunkmemset_safe_neon
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
*chunk = vld1q_u8(s);
}
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
vst1q_u8(out, *chunk);
}
#include "chunkset_tpl.h"
#endif

View File

@ -5,21 +5,16 @@
*
*/
#ifdef __ARM_FEATURE_CRC32
#ifdef ARM_ACLE_CRC_HASH
#ifndef _MSC_VER
# include <arm_acle.h>
# ifdef ZLIB_COMPAT
# include <zconf.h>
# else
# include <zconf-ng.h>
# endif
# ifdef __linux__
# include <stddef.h>
#endif
#include "../../zutil.h"
uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
register uint32_t c;
register const uint16_t *buf2;
register const uint32_t *buf4;
Z_REGISTER uint32_t c;
Z_REGISTER const uint16_t *buf2;
Z_REGISTER const uint32_t *buf4;
c = ~crc;
if (len && ((ptrdiff_t)buf & 1)) {
@ -112,4 +107,4 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
c = ~c;
return c;
}
#endif /* __ARM_FEATURE_CRC32 */
#endif

View File

@ -1,169 +0,0 @@
/* fill_window_arm.c -- Optimized hash table shifting for ARM with support for NEON instructions
* Copyright (C) 2017 Mika T. Lindqvist
*
* Authors:
* Mika T. Lindqvist <postmaster@raasu.org>
* Jun He <jun.he@arm.com>
*
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#include "zbuild.h"
#include "deflate.h"
#include "deflate_p.h"
#include "functable.h"
extern ZLIB_INTERNAL int read_buf(PREFIX3(stream) *strm, unsigned char *buf, unsigned size);
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#include <arm_neon.h>
/* SIMD version of hash_chain rebase */
static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
register uint16x8_t v, *p;
register size_t n;
size_t size = entries*sizeof(table[0]);
Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
Assert(sizeof(Pos) == 2, "Wrong Pos size");
v = vdupq_n_u16(window_size);
p = (uint16x8_t *)table;
n = size / (sizeof(uint16x8_t) * 8);
do {
p[0] = vqsubq_u16(p[0], v);
p[1] = vqsubq_u16(p[1], v);
p[2] = vqsubq_u16(p[2], v);
p[3] = vqsubq_u16(p[3], v);
p[4] = vqsubq_u16(p[4], v);
p[5] = vqsubq_u16(p[5], v);
p[6] = vqsubq_u16(p[6], v);
p[7] = vqsubq_u16(p[7], v);
p += 8;
} while (--n);
}
#else
/* generic version for hash rebase */
static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
unsigned int i;
for (i = 0; i < entries; i++) {
table[i] = (table[i] >= window_size) ? (table[i] - window_size) : NIL;
}
}
#endif
void fill_window_arm(deflate_state *s) {
register unsigned n;
unsigned long more; /* Amount of free space at the end of the window. */
unsigned int wsize = s->w_size;
Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
do {
more = s->window_size - s->lookahead - s->strstart;
/* If the window is almost full and there is insufficient lookahead,
* move the upper half to the lower one to make room in the upper half.
*/
if (s->strstart >= wsize+MAX_DIST(s)) {
memcpy(s->window, s->window+wsize, wsize);
s->match_start -= wsize;
s->strstart -= wsize; /* we now have strstart >= MAX_DIST */
s->block_start -= wsize;
/* Slide the hash table (could be avoided with 32 bit values
at the expense of memory usage). We slide even when level == 0
to keep the hash table consistent if we switch back to level > 0
later. (Using level 0 permanently is not an optimal usage of
zlib, so we don't care about this pathological case.)
*/
slide_hash_chain(s->head, s->hash_size, wsize);
slide_hash_chain(s->prev, wsize, wsize);
more += wsize;
}
if (s->strm->avail_in == 0)
break;
/* If there was no sliding:
* strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
* more == window_size - lookahead - strstart
* => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
* => more >= window_size - 2*WSIZE + 2
* In the BIG_MEM or MMAP case (not yet supported),
* window_size == input_size + MIN_LOOKAHEAD &&
* strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
* Otherwise, window_size == 2*WSIZE so more >= 2.
* If there was sliding, more >= WSIZE. So in all cases, more >= 2.
*/
Assert(more >= 2, "more < 2");
n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
s->lookahead += n;
/* Initialize the hash value now that we have some input: */
if (s->lookahead + s->insert >= MIN_MATCH) {
unsigned int str = s->strstart - s->insert;
unsigned int insert_cnt = s->insert;
unsigned int slen;
s->ins_h = s->window[str];
if (unlikely(s->lookahead < MIN_MATCH))
insert_cnt += s->lookahead - MIN_MATCH;
slen = insert_cnt;
if (str >= (MIN_MATCH - 2))
{
str += 2 - MIN_MATCH;
insert_cnt += MIN_MATCH - 2;
}
if (insert_cnt > 0)
{
functable.insert_string(s, str, insert_cnt);
s->insert -= slen;
}
}
/* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
* but this is not important since only literal bytes will be emitted.
*/
} while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
/* If the WIN_INIT bytes after the end of the current data have never been
* written, then zero those bytes in order to avoid memory check reports of
* the use of uninitialized (or uninitialised as Julian writes) bytes by
* the longest match routines. Update the high water mark for the next
* time through here. WIN_INIT is set to MAX_MATCH since the longest match
* routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
*/
if (s->high_water < s->window_size) {
unsigned long curr = s->strstart + (unsigned long)s->lookahead;
unsigned long init;
if (s->high_water < curr) {
/* Previous high water mark below current data -- zero WIN_INIT
* bytes or up to end of window, whichever is less.
*/
init = s->window_size - curr;
if (init > WIN_INIT)
init = WIN_INIT;
memset(s->window + curr, 0, init);
s->high_water = curr + init;
} else if (s->high_water < curr + WIN_INIT) {
/* High water mark at or above current data, but below current data
* plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
* to end of window, whichever is less.
*/
init = curr + WIN_INIT;
if (init > s->window_size)
init = s->window_size;
init -= s->high_water;
memset(s->window + s->high_water, 0, init);
s->high_water += init;
}
}
Assert((unsigned long)s->strstart <= s->window_size - MIN_LOOKAHEAD, "not enough room for search");
}

View File

@ -5,49 +5,18 @@
*
*/
#if defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH)
#ifdef ARM_ACLE_CRC_HASH
#ifndef _MSC_VER
# include <arm_acle.h>
#include "zbuild.h"
#include "deflate.h"
/* ===========================================================================
* Insert string str in the dictionary and set match_head to the previous head
* of the hash chain (the most recent string with same hash key). Return
* the previous length of the hash chain.
* IN assertion: all calls to to INSERT_STRING are made with consecutive
* input characters and the first MIN_MATCH bytes of str are valid
* (except for the last MIN_MATCH-1 bytes of the input file).
*/
Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count) {
Pos p, lp, ret;
if (unlikely(count == 0)) {
return s->prev[str & s->w_mask];
}
ret = 0;
lp = str + count - 1; /* last position */
for (p = str; p <= lp; p++) {
uint32_t val, h, hm;
memcpy(&val, &s->window[p], sizeof(val));
if (s->level >= TRIGGER_LEVEL)
val &= 0xFFFFFF;
h = __crc32w(0, val);
hm = h & s->hash_mask;
Pos head = s->head[hm];
if (head != p) {
s->prev[p & s->w_mask] = head;
s->head[hm] = p;
if (p == lp)
ret = head;
} else if (p == lp) {
ret = p;
}
}
return ret;
}
#endif
#include "../../zbuild.h"
#include "../../deflate.h"
#define UPDATE_HASH(s, h, val) \
h = __crc32w(0, val)
#define INSERT_STRING insert_string_acle
#define QUICK_INSERT_STRING quick_insert_string_acle
#include "../../insert_string_tpl.h"
#endif

View File

@ -0,0 +1,52 @@
/* slide_neon.c -- Optimized hash table shifting for ARM with support for NEON instructions
* Copyright (C) 2017-2020 Mika T. Lindqvist
*
* Authors:
* Mika T. Lindqvist <postmaster@raasu.org>
* Jun He <jun.he@arm.com>
*
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#if defined(ARM_NEON_SLIDEHASH)
#ifdef _M_ARM64
# include <arm64_neon.h>
#else
# include <arm_neon.h>
#endif
#include "../../zbuild.h"
#include "../../deflate.h"
/* SIMD version of hash_chain rebase */
static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
Z_REGISTER uint16x8_t v, *p;
Z_REGISTER size_t n;
size_t size = entries*sizeof(table[0]);
Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
Assert(sizeof(Pos) == 2, "Wrong Pos size");
v = vdupq_n_u16(window_size);
p = (uint16x8_t *)table;
n = size / (sizeof(uint16x8_t) * 8);
do {
p[0] = vqsubq_u16(p[0], v);
p[1] = vqsubq_u16(p[1], v);
p[2] = vqsubq_u16(p[2], v);
p[3] = vqsubq_u16(p[3], v);
p[4] = vqsubq_u16(p[4], v);
p[5] = vqsubq_u16(p[5], v);
p[6] = vqsubq_u16(p[6], v);
p[7] = vqsubq_u16(p[7], v);
p += 8;
} while (--n);
}
Z_INTERNAL void slide_hash_neon(deflate_state *s) {
unsigned int wsize = s->w_size;
slide_hash_chain(s->head, HASH_SIZE, wsize);
slide_hash_chain(s->prev, wsize, wsize);
}
#endif

View File

@ -0,0 +1,49 @@
# Makefile for POWER-specific files
# Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
# For conditions of distribution and use, see copyright notice in zlib.h
CC=
CFLAGS=
SFLAGS=
INCLUDES=
SUFFIX=
SRCDIR=.
SRCTOP=../..
TOPDIR=$(SRCTOP)
P8FLAGS=-mcpu=power8
all: power.o \
power.lo \
adler32_power8.o \
adler32_power8.lo \
slide_hash_power8.o \
slide_hash_power8.lo
power.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power.c
power.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power.c
adler32_power8.o:
$(CC) $(CFLAGS) $(P8FLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_power8.c
adler32_power8.lo:
$(CC) $(SFLAGS) $(P8FLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_power8.c
slide_hash_power8.o:
$(CC) $(CFLAGS) $(P8FLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_power8.c
slide_hash_power8.lo:
$(CC) $(SFLAGS) $(P8FLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_power8.c
mostlyclean: clean
clean:
rm -f *.o *.lo *~
rm -rf objs
rm -f *.gcda *.gcno *.gcov
distclean:
rm -f Makefile

View File

@ -0,0 +1,154 @@
/* Adler32 for POWER8 using VSX instructions.
* Copyright (C) 2020 IBM Corporation
* Author: Rogerio Alves <rcardoso@linux.ibm.com>
* For conditions of distribution and use, see copyright notice in zlib.h
*
* Calculate adler32 checksum for 16 bytes at once using POWER8+ VSX (vector)
* instructions.
*
* If adler32 do 1 byte at time on the first iteration s1 is s1_0 (_n means
* iteration n) is the initial value of adler - at start _0 is 1 unless
* adler initial value is different than 1. So s1_1 = s1_0 + c[0] after
* the first calculation. For the iteration s1_2 = s1_1 + c[1] and so on.
* Hence, for iteration N, s1_N = s1_(N-1) + c[N] is the value of s1 on
* after iteration N.
*
* Therefore, for s2 and iteration N, s2_N = s2_0 + N*s1_N + N*c[0] +
* N-1*c[1] + ... + c[N]
*
* In a more general way:
*
* s1_N = s1_0 + sum(i=1 to N)c[i]
* s2_N = s2_0 + N*s1 + sum (i=1 to N)(N-i+1)*c[i]
*
* Where s1_N, s2_N are the values for s1, s2 after N iterations. So if we
* can process N-bit at time we can do this at once.
*
* Since VSX can support 16-bit vector instructions, we can process
* 16-bit at time using N = 16 we have:
*
* s1 = s1_16 = s1_(16-1) + c[16] = s1_0 + sum(i=1 to 16)c[i]
* s2 = s2_16 = s2_0 + 16*s1 + sum(i=1 to 16)(16-i+1)*c[i]
*
* After the first iteration we calculate the adler32 checksum for 16 bytes.
*
* For more background about adler32 please check the RFC:
* https://www.ietf.org/rfc/rfc1950.txt
*/
#ifdef POWER8_VSX_ADLER32
#include <altivec.h>
#include "zbuild.h"
#include "zutil.h"
#include "adler32_p.h"
/* Vector across sum unsigned int (saturate). */
inline vector unsigned int vec_sumsu(vector unsigned int __a, vector unsigned int __b) {
__b = vec_sld(__a, __a, 8);
__b = vec_add(__b, __a);
__a = vec_sld(__b, __b, 4);
__a = vec_add(__a, __b);
return __a;
}
uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, size_t len) {
uint32_t s1 = adler & 0xffff;
uint32_t s2 = (adler >> 16) & 0xffff;
/* in case user likes doing a byte at a time, keep it fast */
if (UNLIKELY(len == 1))
return adler32_len_1(s1, buf, s2);
/* If buffer is empty or len=0 we need to return adler initial value. */
if (UNLIKELY(buf == NULL))
return 1;
/* This is faster than VSX code for len < 64. */
if (len < 64)
return adler32_len_64(s1, buf, len, s2);
/* Use POWER VSX instructions for len >= 64. */
const vector unsigned int v_zeros = { 0 };
const vector unsigned char v_mul = {16, 15, 14, 13, 12, 11, 10, 9, 8, 7,
6, 5, 4, 3, 2, 1};
const vector unsigned char vsh = vec_splat_u8(4);
const vector unsigned int vmask = {0xffffffff, 0x0, 0x0, 0x0};
vector unsigned int vs1 = { 0 };
vector unsigned int vs2 = { 0 };
vector unsigned int vs1_save = { 0 };
vector unsigned int vsum1, vsum2;
vector unsigned char vbuf;
int n;
vs1[0] = s1;
vs2[0] = s2;
/* Do length bigger than NMAX in blocks of NMAX size. */
while (len >= NMAX) {
len -= NMAX;
n = NMAX / 16;
do {
vbuf = vec_xl(0, (unsigned char *) buf);
vsum1 = vec_sum4s(vbuf, v_zeros); /* sum(i=1 to 16) buf[i]. */
/* sum(i=1 to 16) buf[i]*(16-i+1). */
vsum2 = vec_msum(vbuf, v_mul, v_zeros);
/* Save vs1. */
vs1_save = vec_add(vs1_save, vs1);
/* Accumulate the sums. */
vs1 = vec_add(vsum1, vs1);
vs2 = vec_add(vsum2, vs2);
buf += 16;
} while (--n);
/* Once each block of NMAX size. */
vs1 = vec_sumsu(vs1, vsum1);
vs1_save = vec_sll(vs1_save, vsh); /* 16*vs1_save. */
vs2 = vec_add(vs1_save, vs2);
vs2 = vec_sumsu(vs2, vsum2);
/* vs1[0] = (s1_i + sum(i=1 to 16)buf[i]) mod 65521. */
vs1[0] = vs1[0] % BASE;
/* vs2[0] = s2_i + 16*s1_save +
sum(i=1 to 16)(16-i+1)*buf[i] mod 65521. */
vs2[0] = vs2[0] % BASE;
vs1 = vec_and(vs1, vmask);
vs2 = vec_and(vs2, vmask);
vs1_save = v_zeros;
}
/* len is less than NMAX one modulo is needed. */
if (len >= 16) {
while (len >= 16) {
len -= 16;
vbuf = vec_xl(0, (unsigned char *) buf);
vsum1 = vec_sum4s(vbuf, v_zeros); /* sum(i=1 to 16) buf[i]. */
/* sum(i=1 to 16) buf[i]*(16-i+1). */
vsum2 = vec_msum(vbuf, v_mul, v_zeros);
/* Save vs1. */
vs1_save = vec_add(vs1_save, vs1);
/* Accumulate the sums. */
vs1 = vec_add(vsum1, vs1);
vs2 = vec_add(vsum2, vs2);
buf += 16;
}
/* Since the size will be always less than NMAX we do this once. */
vs1 = vec_sumsu(vs1, vsum1);
vs1_save = vec_sll(vs1_save, vsh); /* 16*vs1_save. */
vs2 = vec_add(vs1_save, vs2);
vs2 = vec_sumsu(vs2, vsum2);
}
/* Copy result back to s1, s2 (mod 65521). */
s1 = vs1[0] % BASE;
s2 = vs2[0] % BASE;
/* Process tail (len < 16).and return */
return adler32_len_16(s1, buf, len, s2);
}
#endif /* POWER8_VSX_ADLER32 */

View File

@ -0,0 +1,19 @@
/* POWER feature check
* Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include <sys/auxv.h>
#include "../../zutil.h"
Z_INTERNAL int power_cpu_has_arch_2_07;
void Z_INTERNAL power_check_features(void) {
unsigned long hwcap2;
hwcap2 = getauxval(AT_HWCAP2);
#ifdef POWER8
if (hwcap2 & PPC_FEATURE2_ARCH_2_07)
power_cpu_has_arch_2_07 = 1;
#endif
}

View File

@ -0,0 +1,13 @@
/* power.h -- check for POWER CPU features
* Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifndef POWER_H_
#define POWER_H_
extern int power_cpu_has_arch_2_07;
void Z_INTERNAL power_check_features(void);
#endif /* POWER_H_ */

View File

@ -0,0 +1,60 @@
/* Optimized slide_hash for POWER processors
* Copyright (C) 2019-2020 IBM Corporation
* Author: Matheus Castanho <msc@linux.ibm.com>
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifdef POWER8_VSX_SLIDEHASH
#include <altivec.h>
#include "zbuild.h"
#include "deflate.h"
static inline void slide_hash_power8_loop(deflate_state *s, unsigned n_elems, Pos *table_end) {
vector unsigned short vw, vm, *vp;
unsigned chunks;
/* Each vector register (chunk) corresponds to 128 bits == 8 Posf,
* so instead of processing each of the n_elems in the hash table
* individually, we can do it in chunks of 8 with vector instructions.
*
* This function is only called from slide_hash_power8(), and both calls
* pass n_elems as a power of 2 higher than 2^7, as defined by
* deflateInit2_(), so n_elems will always be a multiple of 8. */
chunks = n_elems >> 3;
Assert(n_elems % 8 == 0, "Weird hash table size!");
/* This type casting is safe since s->w_size is always <= 64KB
* as defined by deflateInit2_() and Posf == unsigned short */
vw[0] = (Pos) s->w_size;
vw = vec_splat(vw,0);
vp = (vector unsigned short *) table_end;
do {
/* Processing 8 elements at a time */
vp--;
vm = *vp;
/* This is equivalent to: m >= w_size ? m - w_size : 0
* Since we are using a saturated unsigned subtraction, any
* values that are > w_size will be set to 0, while the others
* will be subtracted by w_size. */
*vp = vec_subs(vm,vw);
} while (--chunks);
}
void Z_INTERNAL slide_hash_power8(deflate_state *s) {
unsigned int n;
Pos *p;
n = HASH_SIZE;
p = &s->head[n];
slide_hash_power8_loop(s,n,p);
n = s->w_size;
p = &s->prev[n];
slide_hash_power8_loop(s,n,p);
}
#endif /* POWER8_VSX_SLIDEHASH */

View File

@ -1,6 +1,7 @@
This directory contains IBM Z DEFLATE CONVERSION CALL support for
zlib-ng. In order to enable it, the following build commands should be
used:
# Introduction
This directory contains SystemZ deflate hardware acceleration support.
It can be enabled using the following build commands:
$ ./configure --with-dfltcc-deflate --with-dfltcc-inflate
$ make
@ -10,60 +11,206 @@ or
$ cmake -DWITH_DFLTCC_DEFLATE=1 -DWITH_DFLTCC_INFLATE=1 .
$ make
When built like this, zlib-ng would compress in hardware on level 1,
and in software on all other levels. Decompression will always happen
in hardware. In order to enable DFLTCC compression for levels 1-6 (i.e.
to make it used by default) one could add -DDFLTCC_LEVEL_MASK=0x7e to
CFLAGS when building zlib-ng.
When built like this, zlib-ng would compress using hardware on level 1,
and using software on all other levels. Decompression will always happen
in hardware. In order to enable hardware compression for levels 1-6
(i.e. to make it used by default) one could add
`-DDFLTCC_LEVEL_MASK=0x7e` to CFLAGS when building zlib-ng.
Two DFLTCC compression calls produce the same results only when they
both are made on machines of the same generation, and when the
respective buffers have the same offset relative to the start of the
page. Therefore care should be taken when using hardware compression
when reproducible results are desired.
SystemZ deflate hardware acceleration is available on [IBM z15](
https://www.ibm.com/products/z15) and newer machines under the name [
"Integrated Accelerator for zEnterprise Data Compression"](
https://www.ibm.com/support/z-content-solutions/compression/). The
programming interface to it is a machine instruction called DEFLATE
CONVERSION CALL (DFLTCC). It is documented in Chapter 26 of [Principles
of Operation](http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf). Both
the code and the rest of this document refer to this feature simply as
"DFLTCC".
# Performance
Performance figures are published [here](
https://github.com/iii-i/zlib-ng/wiki/Performance-with-dfltcc-patch-applied-and-dfltcc-support-built-on-dfltcc-enabled-machine
). The compression speed-up can be as high as 110x and the decompression
speed-up can be as high as 15x.
# Limitations
Two DFLTCC compression calls with identical inputs are not guaranteed to
produce identical outputs. Therefore care should be taken when using
hardware compression when reproducible results are desired. In
particular, zlib-ng-specific `zng_deflateSetParams` call allows setting
`Z_DEFLATE_REPRODUCIBLE` parameter, which disables DFLTCC support for a
particular stream.
DFLTCC does not support every single zlib-ng feature, in particular:
* inflate(Z_BLOCK) and inflate(Z_TREES)
* inflateMark()
* inflatePrime()
* deflateParams() after the first deflate() call
* `inflate(Z_BLOCK)` and `inflate(Z_TREES)`
* `inflateMark()`
* `inflatePrime()`
* `inflateSyncPoint()`
When used, these functions will either switch to software, or, in case
this is not possible, gracefully fail.
All SystemZ-specific code lives in a separate file and is integrated
with the rest of zlib-ng using hook macros, which are explained below.
# Code structure
All SystemZ-specific code lives in `arch/s390` directory and is
integrated with the rest of zlib-ng using hook macros.
## Hook macros
DFLTCC takes as arguments a parameter block, an input buffer, an output
buffer and a window. ZALLOC_STATE, ZFREE_STATE, ZCOPY_STATE,
ZALLOC_WINDOW and TRY_FREE_WINDOW macros encapsulate allocation details
for the parameter block (which is allocated alongside zlib-ng state)
and the window (which must be page-aligned).
buffer and a window. `ZALLOC_STATE()`, `ZFREE_STATE()`, `ZCOPY_STATE()`,
`ZALLOC_WINDOW()` and `TRY_FREE_WINDOW()` macros encapsulate allocation
details for the parameter block (which is allocated alongside zlib-ng
state) and the window (which must be page-aligned).
While for inflate software and hardware window formats match, this is
not the case for deflate. Therefore, deflateSetDictionary and
deflateGetDictionary need special handling, which is triggered using
the DEFLATE_SET_DICTIONARY_HOOK and DEFLATE_GET_DICTIONARY_HOOK macros.
While inflate software and hardware window formats match, this is not
the case for deflate. Therefore, `deflateSetDictionary()` and
`deflateGetDictionary()` need special handling, which is triggered using
`DEFLATE_SET_DICTIONARY_HOOK()` and `DEFLATE_GET_DICTIONARY_HOOK()`
macros.
deflateResetKeep() and inflateResetKeep() update the DFLTCC parameter
block using DEFLATE_RESET_KEEP_HOOK and INFLATE_RESET_KEEP_HOOK macros.
`deflateResetKeep()` and `inflateResetKeep()` update the DFLTCC
parameter block using `DEFLATE_RESET_KEEP_HOOK()` and
`INFLATE_RESET_KEEP_HOOK()` macros.
DEFLATE_PARAMS_HOOK, INFLATE_PRIME_HOOK and INFLATE_MARK_HOOK macros
make the unsupported deflateParams(), inflatePrime() and inflateMark()
calls fail gracefully.
`INFLATE_PRIME_HOOK()`, `INFLATE_MARK_HOOK()` and
`INFLATE_SYNC_POINT_HOOK()` macros make the respective unsupported
calls gracefully fail.
`DEFLATE_PARAMS_HOOK()` implements switching between hardware and
software compression mid-stream using `deflateParams()`. Switching
normally entails flushing the current block, which might not be possible
in low memory situations. `deflateParams()` uses `DEFLATE_DONE()` hook
in order to detect and gracefully handle such situations.
The algorithm implemented in hardware has different compression ratio
than the one implemented in software. DEFLATE_BOUND_ADJUST_COMPLEN and
DEFLATE_NEED_CONSERVATIVE_BOUND macros make deflateBound() return the
correct results for the hardware implementation.
than the one implemented in software. `DEFLATE_BOUND_ADJUST_COMPLEN()`
and `DEFLATE_NEED_CONSERVATIVE_BOUND()` macros make `deflateBound()`
return the correct results for the hardware implementation.
Actual compression and decompression are handled by DEFLATE_HOOK and
INFLATE_TYPEDO_HOOK macros. Since inflation with DFLTCC manages the
window on its own, calling updatewindow() is suppressed using
INFLATE_NEED_UPDATEWINDOW() macro.
Actual compression and decompression are handled by `DEFLATE_HOOK()` and
`INFLATE_TYPEDO_HOOK()` macros. Since inflation with DFLTCC manages the
window on its own, calling `updatewindow()` is suppressed using
`INFLATE_NEED_UPDATEWINDOW()` macro.
In addition to compression, DFLTCC computes CRC-32 and Adler-32
checksums, therefore, whenever it's used, software checksumming is
suppressed using DEFLATE_NEED_CHECKSUM and INFLATE_NEED_CHECKSUM
suppressed using `DEFLATE_NEED_CHECKSUM()` and `INFLATE_NEED_CHECKSUM()`
macros.
While software always produces reproducible compression results, this
is not the case for DFLTCC. Therefore, zlib-ng users are given the
ability to specify whether or not reproducible compression results
are required. While it is always possible to specify this setting
before the compression begins, it is not always possible to do so in
the middle of a deflate stream - the exact conditions for that are
determined by `DEFLATE_CAN_SET_REPRODUCIBLE()` macro.
## SystemZ-specific code
When zlib-ng is built with DFLTCC, the hooks described above are
converted to calls to functions, which are implemented in
`arch/s390/dfltcc_*` files. The functions can be grouped in three broad
categories:
* Base DFLTCC support, e.g. wrapping the machine instruction -
`dfltcc()` and allocating aligned memory - `dfltcc_alloc_state()`.
* Translating between software and hardware data formats, e.g.
`dfltcc_deflate_set_dictionary()`.
* Translating between software and hardware state machines, e.g.
`dfltcc_deflate()` and `dfltcc_inflate()`.
The functions from the first two categories are fairly simple, however,
various quirks in both software and hardware state machines make the
functions from the third category quite complicated.
### `dfltcc_deflate()` function
This function is called by `deflate()` and has the following
responsibilities:
* Checking whether DFLTCC can be used with the current stream. If this
is not the case, then it returns `0`, making `deflate()` use some
other function in order to compress in software. Otherwise it returns
`1`.
* Block management and Huffman table generation. DFLTCC ends blocks only
when explicitly instructed to do so by the software. Furthermore,
whether to use fixed or dynamic Huffman tables must also be determined
by the software. Since looking at data in order to gather statistics
would negate performance benefits, the following approach is used: the
first `DFLTCC_FIRST_FHT_BLOCK_SIZE` bytes are placed into a fixed
block, and every next `DFLTCC_BLOCK_SIZE` bytes are placed into
dynamic blocks.
* Writing EOBS. Block Closing Control bit in the parameter block
instructs DFLTCC to write EOBS, however, certain conditions need to be
met: input data length must be non-zero or Continuation Flag must be
set. To put this in simpler terms, DFLTCC will silently refuse to
write EOBS if this is the only thing that it is asked to do. Since the
code has to be able to emit EOBS in software anyway, in order to avoid
tricky corner cases Block Closing Control is never used. Whether to
write EOBS is instead controlled by `soft_bcc` variable.
* Triggering block post-processing. Depending on flush mode, `deflate()`
must perform various additional actions when a block or a stream ends.
`dfltcc_deflate()` informs `deflate()` about this using
`block_state *result` parameter.
* Converting software state fields into hardware parameter block fields,
and vice versa. For example, `wrap` and Check Value Type or `bi_valid`
and Sub-Byte Boundary. Certain fields cannot be translated and must
persist untouched in the parameter block between calls, for example,
Continuation Flag or Continuation State Buffer.
* Handling flush modes and low-memory situations. These aspects are
quite intertwined and pervasive. The general idea here is that the
code must not do anything in software - whether explicitly by e.g.
calling `send_eobs()`, or implicitly - by returning to `deflate()`
with certain return and `*result` values, when Continuation Flag is
set.
* Ending streams. When a new block is started and flush mode is
`Z_FINISH`, Block Header Final parameter block bit is used to mark
this block as final. However, sometimes an empty final block is
needed, and, unfortunately, just like with EOBS, DFLTCC will silently
refuse to do this. The general idea of DFLTCC implementation is to
rely as much as possible on the existing code. Here in order to do
this, the code pretends that it does not support DFLTCC, which makes
`deflate()` call a software compression function, which writes an
empty final block. Whether this is required is controlled by
`need_empty_block` variable.
* Error handling. This is simply converting
Operation-Ending-Supplemental Code to string. Errors can only happen
due to things like memory corruption, and therefore they don't affect
the `deflate()` return code.
### `dfltcc_inflate()` function
This function is called by `inflate()` from the `TYPEDO` state (that is,
when all the metadata is parsed and the stream is positioned at the type
bits of deflate block header) and it's responsible for the following:
* Falling back to software when flush mode is `Z_BLOCK` or `Z_TREES`.
Unfortunately, there is no way to ask DFLTCC to stop decompressing on
block or tree boundary.
* `inflate()` decompression loop management. This is controlled using
the return value, which can be either `DFLTCC_INFLATE_BREAK` or
`DFLTCC_INFLATE_CONTINUE`.
* Converting software state fields into hardware parameter block fields,
and vice versa. For example, `whave` and History Length or `wnext` and
History Offset.
* Ending streams. This instructs `inflate()` to return `Z_STREAM_END`
and is controlled by `last` state field.
* Error handling. Like deflate, error handling comprises
Operation-Ending-Supplemental Code to string conversion. Unlike
deflate, errors may happen due to bad inputs, therefore they are
propagated to `inflate()` by setting `mode` field to `MEM` or `BAD`.
# Testing
Given complexity of DFLTCC machine instruction, it is not clear whether
QEMU TCG will ever support it. At the time of writing, one has to have
access to an IBM z15+ VM or LPAR in order to test DFLTCC support. Since
DFLTCC is a non-privileged instruction, neither special VM/LPAR
configuration nor root are required.
Still, zlib-ng CI has a few QEMU TCG-based configurations that check
whether fallback to software is working.

View File

@ -1,6 +1,6 @@
/* dfltcc_deflate.c - IBM Z DEFLATE CONVERSION CALL general support. */
#include "zbuild.h"
#include "../../zbuild.h"
#include "dfltcc_common.h"
#include "dfltcc_detail.h"
@ -12,20 +12,31 @@
`posix_memalign' is not an option. Thus, we overallocate and take the
aligned portion of the buffer.
*/
static inline int is_dfltcc_enabled(void)
{
static inline int is_dfltcc_enabled(void) {
uint64_t facilities[(DFLTCC_FACILITY / 64) + 1];
register uint8_t r0 __asm__("r0");
Z_REGISTER uint8_t r0 __asm__("r0");
memset(facilities, 0, sizeof(facilities));
r0 = sizeof(facilities) / sizeof(facilities[0]) - 1;
__asm__ volatile("stfle %[facilities]\n" : [facilities] "=Q" (facilities), [r0] "+r" (r0) :: "cc");
/* STFLE is supported since z9-109 and only in z/Architecture mode. When
* compiling with -m31, gcc defaults to ESA mode, however, since the kernel
* is 64-bit, it's always z/Architecture mode at runtime.
*/
__asm__ volatile(
#ifndef __clang__
".machinemode push\n"
".machinemode zarch\n"
#endif
"stfle %[facilities]\n"
#ifndef __clang__
".machinemode pop\n"
#endif
: [facilities] "=Q" (facilities), [r0] "+r" (r0) :: "cc");
return is_bit_set((const char *)facilities, DFLTCC_FACILITY);
}
void ZLIB_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size)
{
struct dfltcc_state *dfltcc_state = (struct dfltcc_state *)((char *)strm->state + size);
void Z_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size) {
struct dfltcc_state *dfltcc_state = (struct dfltcc_state *)((char *)strm->state + ALIGN_UP(size, 8));
struct dfltcc_qaf_param *param = (struct dfltcc_qaf_param *)&dfltcc_state->param;
/* Initialize available functions */
@ -47,24 +58,17 @@ void ZLIB_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size)
dfltcc_state->param.ribm = DFLTCC_RIBM;
}
void ZLIB_INTERNAL *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt items, uInt size)
{
Assert((items * size) % 8 == 0,
"The size of zlib-ng state must be a multiple of 8");
return ZALLOC(strm, items * size + sizeof(struct dfltcc_state), sizeof(unsigned char));
void Z_INTERNAL *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt items, uInt size) {
return ZALLOC(strm, ALIGN_UP(items * size, 8) + sizeof(struct dfltcc_state), sizeof(unsigned char));
}
void ZLIB_INTERNAL dfltcc_copy_state(void *dst, const void *src, uInt size)
{
memcpy(dst, src, size + sizeof(struct dfltcc_state));
void Z_INTERNAL dfltcc_copy_state(void *dst, const void *src, uInt size) {
memcpy(dst, src, ALIGN_UP(size, 8) + sizeof(struct dfltcc_state));
}
static const int PAGE_ALIGN = 0x1000;
#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1))
void ZLIB_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt size)
{
void Z_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt size) {
void *p;
void *w;
@ -79,8 +83,7 @@ void ZLIB_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt
return w;
}
void ZLIB_INTERNAL dfltcc_free_window(PREFIX3(streamp) strm, void *w)
{
void Z_INTERNAL dfltcc_free_window(PREFIX3(streamp) strm, void *w) {
if (w)
ZFREE(strm, *(void **)((unsigned char *)w - sizeof(void *)));
}

View File

@ -2,17 +2,17 @@
#define DFLTCC_COMMON_H
#ifdef ZLIB_COMPAT
#include "zlib.h"
#include "../../zlib.h"
#else
#include "zlib-ng.h"
#include "../../zlib-ng.h"
#endif
#include "zutil.h"
#include "../../zutil.h"
void ZLIB_INTERNAL *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt items, uInt size);
void ZLIB_INTERNAL dfltcc_copy_state(void *dst, const void *src, uInt size);
void ZLIB_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size);
void ZLIB_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt size);
void ZLIB_INTERNAL dfltcc_free_window(PREFIX3(streamp) strm, void *w);
void Z_INTERNAL *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt items, uInt size);
void Z_INTERNAL dfltcc_copy_state(void *dst, const void *src, uInt size);
void Z_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size);
void Z_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt size);
void Z_INTERNAL dfltcc_free_window(PREFIX3(streamp) strm, void *w);
#define ZALLOC_STATE dfltcc_alloc_state

View File

@ -13,27 +13,26 @@
$ make
*/
#include "zbuild.h"
#include "zutil.h"
#include "deflate.h"
#include "../../zbuild.h"
#include "../../zutil.h"
#include "../../deflate.h"
#include "../../trees_emit.h"
#include "dfltcc_deflate.h"
#include "dfltcc_detail.h"
static inline int dfltcc_are_params_ok(int level, uInt window_bits, int strategy, uint16_t level_mask)
{
return (level_mask & ((uint16_t)1 << level)) != 0 &&
(window_bits == HB_BITS) &&
(strategy == Z_FIXED || strategy == Z_DEFAULT_STRATEGY);
}
int ZLIB_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm)
{
static inline int dfltcc_can_deflate_with_params(PREFIX3(streamp) strm, int level, uInt window_bits, int strategy,
int reproducible) {
deflate_state *state = (deflate_state *)strm->state;
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
/* Unsupported compression settings */
if (!dfltcc_are_params_ok(state->level, state->w_bits, state->strategy, dfltcc_state->level_mask))
if ((dfltcc_state->level_mask & (1 << level)) == 0)
return 0;
if (window_bits != HB_BITS)
return 0;
if (strategy != Z_FIXED && strategy != Z_DEFAULT_STRATEGY)
return 0;
if (reproducible)
return 0;
/* Unsupported hardware */
@ -45,8 +44,13 @@ int ZLIB_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm)
return 1;
}
static inline void dfltcc_gdht(PREFIX3(streamp) strm)
{
int Z_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm) {
deflate_state *state = (deflate_state *)strm->state;
return dfltcc_can_deflate_with_params(strm, state->level, state->w_bits, state->strategy, state->reproducible);
}
static inline void dfltcc_gdht(PREFIX3(streamp) strm) {
deflate_state *state = (deflate_state *)strm->state;
struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
size_t avail_in = strm->avail_in;
@ -54,8 +58,7 @@ static inline void dfltcc_gdht(PREFIX3(streamp) strm)
dfltcc(DFLTCC_GDHT, param, NULL, NULL, &strm->next_in, &avail_in, NULL);
}
static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm)
{
static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm) {
deflate_state *state = (deflate_state *)strm->state;
struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
size_t avail_in = strm->avail_in;
@ -72,11 +75,10 @@ static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm)
return cc;
}
static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0 *param)
{
static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0 *param) {
deflate_state *state = (deflate_state *)strm->state;
send_bits(state, bi_reverse(param->eobs >> (15 - param->eobl), param->eobl), param->eobl);
send_bits(state, bi_reverse(param->eobs >> (15 - param->eobl), param->eobl), param->eobl, state->bi_buf, state->bi_valid);
flush_pending(strm);
if (state->pending != 0) {
/* The remaining data is located in pending_out[0:pending]. If someone
@ -93,8 +95,7 @@ static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0
#endif
}
int ZLIB_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *result)
{
int Z_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *result) {
deflate_state *state = (deflate_state *)strm->state;
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
struct dfltcc_param_v0 *param = &dfltcc_state->param;
@ -104,31 +105,38 @@ int ZLIB_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *
int soft_bcc;
int no_flush;
if (!dfltcc_can_deflate(strm))
if (!dfltcc_can_deflate(strm)) {
/* Clear history. */
if (flush == Z_FULL_FLUSH)
param->hl = 0;
return 0;
}
again:
masked_avail_in = 0;
soft_bcc = 0;
no_flush = flush == Z_NO_FLUSH;
/* Trailing empty block. Switch to software, except when Continuation Flag
* is set, which means that DFLTCC has buffered some output in the
* parameter block and needs to be called again in order to flush it.
/* No input data. Return, except when Continuation Flag is set, which means
* that DFLTCC has buffered some output in the parameter block and needs to
* be called again in order to flush it.
*/
if (flush == Z_FINISH && strm->avail_in == 0 && !param->cf) {
if (param->bcf) {
if (strm->avail_in == 0 && !param->cf) {
/* A block is still open, and the hardware does not support closing
* blocks without adding data. Thus, close it manually.
*/
if (!no_flush && param->bcf) {
send_eobs(strm, param);
param->bcf = 0;
}
/* Let one of deflate_* functions write a trailing empty block. */
if (flush == Z_FINISH)
return 0;
}
if (strm->avail_in == 0 && !param->cf) {
*result = need_more;
/* Clear history. */
if (flush == Z_FULL_FLUSH)
param->hl = 0;
/* Trigger block post-processing if necessary. */
*result = no_flush ? need_more : block_done;
return 1;
}
@ -154,12 +162,17 @@ again:
send_eobs(strm, param);
param->bcf = 0;
dfltcc_state->block_threshold = strm->total_in + dfltcc_state->block_size;
}
}
/* No space for compressed data. If we proceed, dfltcc_cmpr() will return
* DFLTCC_CC_OP1_TOO_SHORT without buffering header bits, but we will still
* set BCF=1, which is wrong. Avoid complications and return early.
*/
if (strm->avail_out == 0) {
*result = need_more;
return 1;
}
}
}
/* The caller gave us too much data. Pass only one block worth of
* uncompressed data to DFLTCC and mask the rest, so that on the next
@ -180,7 +193,7 @@ again:
param->cvt = state->wrap == 2 ? CVT_CRC32 : CVT_ADLER32;
if (!no_flush)
/* We need to close a block. Always do this in software - when there is
* no input data, the hardware will not nohor BCC. */
* no input data, the hardware will not honor BCC. */
soft_bcc = 1;
if (flush == Z_FINISH && !param->bcf)
/* We are about to open a BFINAL block, set Block Header Final bit
@ -195,8 +208,8 @@ again:
param->sbb = (unsigned int)state->bi_valid;
if (param->sbb > 0)
*strm->next_out = (unsigned char)state->bi_buf;
if (param->hl)
param->nt = 0; /* Honor history */
/* Honor history and check value */
param->nt = 0;
param->cv = state->wrap == 2 ? ZSWAP32(strm->adler) : strm->adler;
/* When opening a block, choose a Huffman-Table Type */
@ -277,31 +290,60 @@ again:
fly with deflateParams, we need to convert between hardware and software
window formats.
*/
int ZLIB_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strategy)
{
static int dfltcc_was_deflate_used(PREFIX3(streamp) strm) {
deflate_state *state = (deflate_state *)strm->state;
struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
return strm->total_in > 0 || param->nt == 0 || param->hl > 0;
}
int Z_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strategy, int *flush) {
deflate_state *state = (deflate_state *)strm->state;
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
struct dfltcc_param_v0 *param = &dfltcc_state->param;
int could_deflate = dfltcc_can_deflate(strm);
int can_deflate = dfltcc_are_params_ok(level, state->w_bits, strategy, dfltcc_state->level_mask);
int can_deflate = dfltcc_can_deflate_with_params(strm, level, state->w_bits, strategy, state->reproducible);
if (can_deflate == could_deflate)
/* We continue to work in the same mode - no changes needed */
return Z_OK;
if (strm->total_in == 0 && param->nt == 1 && param->hl == 0)
if (!dfltcc_was_deflate_used(strm))
/* DFLTCC was not used yet - no changes needed */
return Z_OK;
/* Switching between hardware and software is not implemented */
return Z_STREAM_ERROR;
/* For now, do not convert between window formats - simply get rid of the old data instead */
*flush = Z_FULL_FLUSH;
return Z_OK;
}
int Z_INTERNAL dfltcc_deflate_done(PREFIX3(streamp) strm, int flush) {
deflate_state *state = (deflate_state *)strm->state;
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
struct dfltcc_param_v0 *param = &dfltcc_state->param;
/* When deflate(Z_FULL_FLUSH) is called with small avail_out, it might
* close the block without resetting the compression state. Detect this
* situation and return that deflation is not done.
*/
if (flush == Z_FULL_FLUSH && strm->avail_out == 0)
return 0;
/* Return that deflation is not done if DFLTCC is used and either it
* buffered some data (Continuation Flag is set), or has not written EOBS
* yet (Block-Continuation Flag is set).
*/
return !dfltcc_can_deflate(strm) || (!param->cf && !param->bcf);
}
int Z_INTERNAL dfltcc_can_set_reproducible(PREFIX3(streamp) strm, int reproducible) {
deflate_state *state = (deflate_state *)strm->state;
return reproducible != state->reproducible && !dfltcc_was_deflate_used(strm);
}
/*
Preloading history.
*/
static void append_history(struct dfltcc_param_v0 *param, unsigned char *history, const unsigned char *buf, uInt count)
{
static void append_history(struct dfltcc_param_v0 *param, unsigned char *history, const unsigned char *buf, uInt count) {
size_t offset;
size_t n;
@ -331,20 +373,19 @@ static void append_history(struct dfltcc_param_v0 *param, unsigned char *history
}
}
int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm,
const unsigned char *dictionary, uInt dict_length)
{
int Z_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm,
const unsigned char *dictionary, uInt dict_length) {
deflate_state *state = (deflate_state *)strm->state;
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
struct dfltcc_param_v0 *param = &dfltcc_state->param;
append_history(param, state->window, dictionary, dict_length);
state->strstart = 1; /* Add FDICT to zlib header */
state->block_start = state->strstart; /* Make deflate_stored happy */
return Z_OK;
}
int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length)
{
int Z_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length) {
deflate_state *state = (deflate_state *)strm->state;
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
struct dfltcc_param_v0 *param = &dfltcc_state->param;

View File

@ -3,12 +3,14 @@
#include "dfltcc_common.h"
int ZLIB_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm);
int ZLIB_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *result);
int ZLIB_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strategy);
int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm,
int Z_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm);
int Z_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *result);
int Z_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strategy, int *flush);
int Z_INTERNAL dfltcc_deflate_done(PREFIX3(streamp) strm, int flush);
int Z_INTERNAL dfltcc_can_set_reproducible(PREFIX3(streamp) strm, int reproducible);
int Z_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm,
const unsigned char *dictionary, uInt dict_length);
int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt* dict_length);
int Z_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt* dict_length);
#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \
do { \
@ -25,15 +27,17 @@ int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned
#define DEFLATE_RESET_KEEP_HOOK(strm) \
dfltcc_reset((strm), sizeof(deflate_state))
#define DEFLATE_PARAMS_HOOK(strm, level, strategy) \
#define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) \
do { \
int err; \
\
err = dfltcc_deflate_params((strm), (level), (strategy)); \
err = dfltcc_deflate_params((strm), (level), (strategy), (hook_flush)); \
if (err == Z_STREAM_ERROR) \
return err; \
} while (0)
#define DEFLATE_DONE dfltcc_deflate_done
#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \
do { \
if (dfltcc_can_deflate((strm))) \
@ -47,4 +51,6 @@ int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned
#define DEFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_deflate((strm)))
#define DEFLATE_CAN_SET_REPRODUCIBLE dfltcc_can_set_reproducible
#endif

View File

@ -46,18 +46,17 @@ typedef enum {
#define DFLTCC_FACILITY 151
static inline dfltcc_cc dfltcc(int fn, void *param,
unsigned char **op1, size_t *len1, const unsigned char **op2, size_t *len2, void *hist)
{
unsigned char **op1, size_t *len1, z_const unsigned char **op2, size_t *len2, void *hist) {
unsigned char *t2 = op1 ? *op1 : NULL;
size_t t3 = len1 ? *len1 : 0;
const unsigned char *t4 = op2 ? *op2 : NULL;
z_const unsigned char *t4 = op2 ? *op2 : NULL;
size_t t5 = len2 ? *len2 : 0;
register int r0 __asm__("r0") = fn;
register void *r1 __asm__("r1") = param;
register unsigned char *r2 __asm__("r2") = t2;
register size_t r3 __asm__("r3") = t3;
register const unsigned char *r4 __asm__("r4") = t4;
register size_t r5 __asm__("r5") = t5;
Z_REGISTER int r0 __asm__("r0") = fn;
Z_REGISTER void *r1 __asm__("r1") = param;
Z_REGISTER unsigned char *r2 __asm__("r2") = t2;
Z_REGISTER size_t r3 __asm__("r3") = t3;
Z_REGISTER z_const unsigned char *r4 __asm__("r4") = t4;
Z_REGISTER size_t r5 __asm__("r5") = t5;
int cc;
__asm__ volatile(
@ -108,13 +107,11 @@ struct dfltcc_qaf_param {
static_assert(sizeof(struct dfltcc_qaf_param) == 32, sizeof_struct_dfltcc_qaf_param_is_32);
static inline int is_bit_set(const char *bits, int n)
{
static inline int is_bit_set(const char *bits, int n) {
return bits[n / 8] & (1 << (7 - (n % 8)));
}
static inline void clear_bit(char *bits, int n)
{
static inline void clear_bit(char *bits, int n) {
bits[n / 8] &= ~(1 << (7 - (n % 8)));
}
@ -175,8 +172,7 @@ struct dfltcc_param_v0 {
static_assert(sizeof(struct dfltcc_param_v0) == 1536, sizeof_struct_dfltcc_param_v0_is_1536);
static inline const char *oesc_msg(char *buf, int oesc)
{
static inline z_const char *oesc_msg(char *buf, int oesc) {
if (oesc == 0x00)
return NULL; /* Successful completion */
else {
@ -198,4 +194,6 @@ struct dfltcc_state {
char msg[64]; /* Buffer for strm->msg */
};
#define GET_DFLTCC_STATE(state) ((struct dfltcc_state *)((state) + 1))
#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1))
#define GET_DFLTCC_STATE(state) ((struct dfltcc_state *)((char *)(state) + ALIGN_UP(sizeof(*state), 8)))

View File

@ -13,15 +13,14 @@
$ make
*/
#include "zbuild.h"
#include "zutil.h"
#include "inftrees.h"
#include "inflate.h"
#include "../../zbuild.h"
#include "../../zutil.h"
#include "../../inftrees.h"
#include "../../inflate.h"
#include "dfltcc_inflate.h"
#include "dfltcc_detail.h"
int ZLIB_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm)
{
int Z_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm) {
struct inflate_state *state = (struct inflate_state *)strm->state;
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
@ -33,8 +32,7 @@ int ZLIB_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm)
return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) && is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0);
}
static inline dfltcc_cc dfltcc_xpnd(PREFIX3(streamp) strm)
{
static inline dfltcc_cc dfltcc_xpnd(PREFIX3(streamp) strm) {
struct inflate_state *state = (struct inflate_state *)strm->state;
struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
size_t avail_in = strm->avail_in;
@ -49,8 +47,7 @@ static inline dfltcc_cc dfltcc_xpnd(PREFIX3(streamp) strm)
return cc;
}
dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret)
{
dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret) {
struct inflate_state *state = (struct inflate_state *)strm->state;
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
struct dfltcc_param_v0 *param = &dfltcc_state->param;
@ -115,16 +112,14 @@ dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int fl
DFLTCC_INFLATE_BREAK : DFLTCC_INFLATE_CONTINUE;
}
int ZLIB_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm)
{
int Z_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm) {
struct inflate_state *state = (struct inflate_state *)strm->state;
struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
return !param->nt;
}
int ZLIB_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm)
{
int Z_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm) {
struct inflate_state *state = (struct inflate_state *)strm->state;
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);

View File

@ -3,15 +3,15 @@
#include "dfltcc_common.h"
int ZLIB_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm);
int Z_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm);
typedef enum {
DFLTCC_INFLATE_CONTINUE,
DFLTCC_INFLATE_BREAK,
DFLTCC_INFLATE_SOFTWARE,
} dfltcc_inflate_action;
dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret);
int ZLIB_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm);
int ZLIB_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm);
dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret);
int Z_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm);
int Z_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm);
#define INFLATE_RESET_KEEP_HOOK(strm) \
dfltcc_reset((strm), sizeof(struct inflate_state))
@ -41,4 +41,9 @@ int ZLIB_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm);
if (dfltcc_was_inflate_used((strm))) return -(1L << 16); \
} while (0)
#define INFLATE_SYNC_POINT_HOOK(strm) \
do { \
if (dfltcc_was_inflate_used((strm))) return Z_STREAM_ERROR; \
} while (0)
#endif

View File

@ -1,3 +0,0 @@
fill_window_sse.c SSE2 optimized fill_window
deflate_quick.c SSE4 optimized deflate strategy for use as level 1
crc_folding.c SSE4 + PCLMULQDQ optimized CRC folding implementation

View File

@ -0,0 +1,8 @@
Contents
--------
|Name|Description|
|:-|:-|
|deflate_quick.c|SSE4 optimized deflate strategy for use as level 1|
|crc_folding.c|SSE4 + PCLMULQDQ optimized CRC folding implementation|
|slide_sse2.c|SSE2 optimized slide_hash|

View File

@ -8,7 +8,9 @@ SFLAGS=
INCLUDES=
SUFFIX=
AVX2FLAG=-mavx2
SSE2FLAG=-msse2
SSSE3FLAG=-mssse3
SSE4FLAG=-msse4
PCLMULFLAG=-mpclmul
@ -16,7 +18,18 @@ SRCDIR=.
SRCTOP=../..
TOPDIR=$(SRCTOP)
all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo
all: \
x86.o x86.lo \
adler32_avx.o adler32.lo \
adler32_ssse3.o adler32_ssse3.lo \
chunkset_avx.o chunkset_avx.lo \
chunkset_sse.o chunkset_sse.lo \
compare258_avx.o compare258_avx.lo \
compare258_sse.o compare258_sse.lo \
insert_string_sse.o insert_string_sse.lo \
crc_folding.o crc_folding.lo \
slide_avx.o slide_avx.lo \
slide_sse.o slide_sse.lo
x86.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
@ -24,17 +37,29 @@ x86.o:
x86.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
fill_window_sse.o:
$(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_sse.c
chunkset_avx.o:
$(CC) $(CFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_avx.c
fill_window_sse.lo:
$(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_sse.c
chunkset_avx.lo:
$(CC) $(SFLAGS) $(AVX2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_avx.c
deflate_quick.o:
$(CC) $(CFLAGS) $(SSE4FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/deflate_quick.c
chunkset_sse.o:
$(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse.c
deflate_quick.lo:
$(CC) $(SFLAGS) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/deflate_quick.c
chunkset_sse.lo:
$(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse.c
compare258_avx.o:
$(CC) $(CFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_avx.c
compare258_avx.lo:
$(CC) $(SFLAGS) $(AVX2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_avx.c
compare258_sse.o:
$(CC) $(CFLAGS) $(SSE4FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_sse.c
compare258_sse.lo:
$(CC) $(SFLAGS) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_sse.c
insert_string_sse.o:
$(CC) $(CFLAGS) $(SSE4FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse.c
@ -48,6 +73,30 @@ crc_folding.o:
crc_folding.lo:
$(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c
slide_avx.o:
$(CC) $(CFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_avx.c
slide_avx.lo:
$(CC) $(SFLAGS) $(AVX2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_avx.c
slide_sse.o:
$(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c
slide_sse.lo:
$(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c
adler32_avx.o: $(SRCDIR)/adler32_avx.c
$(CC) $(CFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_avx.c
adler32_avx.lo: $(SRCDIR)/adler32_avx.c
$(CC) $(SFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_avx.c
adler32_ssse3.o: $(SRCDIR)/adler32_ssse3.c
$(CC) $(CFLAGS) $(SSSE3FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_ssse3.c
adler32_ssse3.lo: $(SRCDIR)/adler32_ssse3.c
$(CC) $(SFLAGS) $(SSSE3FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_ssse3.c
mostlyclean: clean
clean:
rm -f *.o *.lo *~

View File

@ -0,0 +1,117 @@
/* adler32.c -- compute the Adler-32 checksum of a data stream
* Copyright (C) 1995-2011 Mark Adler
* Authors:
* Brian Bockelman <bockelman@gmail.com>
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "../../zbuild.h"
#include "../../zutil.h"
#include "../../adler32_p.h"
#include <immintrin.h>
#ifdef X86_AVX2_ADLER32
Z_INTERNAL uint32_t adler32_avx2(uint32_t adler, const unsigned char *buf, size_t len) {
uint32_t sum2;
/* split Adler-32 into component sums */
sum2 = (adler >> 16) & 0xffff;
adler &= 0xffff;
/* in case user likes doing a byte at a time, keep it fast */
if (UNLIKELY(len == 1))
return adler32_len_1(adler, buf, sum2);
/* initial Adler-32 value (deferred check for len == 1 speed) */
if (UNLIKELY(buf == NULL))
return 1L;
/* in case short lengths are provided, keep it somewhat fast */
if (UNLIKELY(len < 16))
return adler32_len_16(adler, buf, len, sum2);
uint32_t ALIGNED_(32) s1[8], s2[8];
memset(s1, 0, sizeof(s1)); s1[7] = adler; // TODO: would a masked load be faster?
memset(s2, 0, sizeof(s2)); s2[7] = sum2;
char ALIGNED_(32) dot1[32] = \
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
__m256i dot1v = _mm256_load_si256((__m256i*)dot1);
char ALIGNED_(32) dot2[32] = \
{32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
__m256i dot2v = _mm256_load_si256((__m256i*)dot2);
short ALIGNED_(32) dot3[16] = \
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
__m256i dot3v = _mm256_load_si256((__m256i*)dot3);
// We will need to multiply by
char ALIGNED_(32) shift[16] = {5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
__m128i shiftv = _mm_load_si128((__m128i*)shift);
while (len >= 32) {
__m256i vs1 = _mm256_load_si256((__m256i*)s1);
__m256i vs2 = _mm256_load_si256((__m256i*)s2);
__m256i vs1_0 = vs1;
int k = (len < NMAX ? (int)len : NMAX);
k -= k % 32;
len -= k;
while (k >= 32) {
/*
vs1 = adler + sum(c[i])
vs2 = sum2 + 16 vs1 + sum( (16-i+1) c[i] )
*/
__m256i vbuf = _mm256_loadu_si256((__m256i*)buf);
buf += 32;
k -= 32;
__m256i v_short_sum1 = _mm256_maddubs_epi16(vbuf, dot1v); // multiply-add, resulting in 8 shorts.
__m256i vsum1 = _mm256_madd_epi16(v_short_sum1, dot3v); // sum 8 shorts to 4 int32_t;
__m256i v_short_sum2 = _mm256_maddubs_epi16(vbuf, dot2v);
vs1 = _mm256_add_epi32(vsum1, vs1);
__m256i vsum2 = _mm256_madd_epi16(v_short_sum2, dot3v);
vs1_0 = _mm256_sll_epi32(vs1_0, shiftv);
vsum2 = _mm256_add_epi32(vsum2, vs2);
vs2 = _mm256_add_epi32(vsum2, vs1_0);
vs1_0 = vs1;
}
// At this point, we have partial sums stored in vs1 and vs2. There are AVX512 instructions that
// would allow us to sum these quickly (VP4DPWSSD). For now, just unpack and move on.
uint32_t ALIGNED_(32) s1_unpack[8];
uint32_t ALIGNED_(32) s2_unpack[8];
_mm256_store_si256((__m256i*)s1_unpack, vs1);
_mm256_store_si256((__m256i*)s2_unpack, vs2);
adler = (s1_unpack[0] % BASE) + (s1_unpack[1] % BASE) + (s1_unpack[2] % BASE) + (s1_unpack[3] % BASE) +
(s1_unpack[4] % BASE) + (s1_unpack[5] % BASE) + (s1_unpack[6] % BASE) + (s1_unpack[7] % BASE);
adler %= BASE;
s1[7] = adler;
sum2 = (s2_unpack[0] % BASE) + (s2_unpack[1] % BASE) + (s2_unpack[2] % BASE) + (s2_unpack[3] % BASE) +
(s2_unpack[4] % BASE) + (s2_unpack[5] % BASE) + (s2_unpack[6] % BASE) + (s2_unpack[7] % BASE);
sum2 %= BASE;
s2[7] = sum2;
}
while (len) {
len--;
adler += *buf++;
sum2 += adler;
}
adler %= BASE;
sum2 %= BASE;
/* return recombined sums */
return adler | (sum2 << 16);
}
#endif

View File

@ -0,0 +1,118 @@
/* adler32.c -- compute the Adler-32 checksum of a data stream
* Copyright (C) 1995-2011 Mark Adler
* Authors:
* Brian Bockelman <bockelman@gmail.com>
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "../../zbuild.h"
#include "../../zutil.h"
#include "../../adler32_p.h"
#ifdef X86_SSSE3_ADLER32
#include <immintrin.h>
Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, size_t len) {
uint32_t sum2;
/* split Adler-32 into component sums */
sum2 = (adler >> 16) & 0xffff;
adler &= 0xffff;
/* in case user likes doing a byte at a time, keep it fast */
if (UNLIKELY(len == 1))
return adler32_len_1(adler, buf, sum2);
/* initial Adler-32 value (deferred check for len == 1 speed) */
if (UNLIKELY(buf == NULL))
return 1L;
/* in case short lengths are provided, keep it somewhat fast */
if (UNLIKELY(len < 16))
return adler32_len_16(adler, buf, len, sum2);
uint32_t ALIGNED_(16) s1[4], s2[4];
s1[0] = s1[1] = s1[2] = 0; s1[3] = adler;
s2[0] = s2[1] = s2[2] = 0; s2[3] = sum2;
char ALIGNED_(16) dot1[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
__m128i dot1v = _mm_load_si128((__m128i*)dot1);
char ALIGNED_(16) dot2[16] = {16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
__m128i dot2v = _mm_load_si128((__m128i*)dot2);
short ALIGNED_(16) dot3[8] = {1, 1, 1, 1, 1, 1, 1, 1};
__m128i dot3v = _mm_load_si128((__m128i*)dot3);
// We will need to multiply by
//char ALIGNED_(16) shift[4] = {0, 0, 0, 4}; //{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4};
char ALIGNED_(16) shift[16] = {4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
__m128i shiftv = _mm_load_si128((__m128i*)shift);
while (len >= 16) {
__m128i vs1 = _mm_load_si128((__m128i*)s1);
__m128i vs2 = _mm_load_si128((__m128i*)s2);
__m128i vs1_0 = vs1;
int k = (len < NMAX ? (int)len : NMAX);
k -= k % 16;
len -= k;
while (k >= 16) {
/*
vs1 = adler + sum(c[i])
vs2 = sum2 + 16 vs1 + sum( (16-i+1) c[i] )
NOTE: 256-bit equivalents are:
_mm256_maddubs_epi16 <- operates on 32 bytes to 16 shorts
_mm256_madd_epi16 <- Sums 16 shorts to 8 int32_t.
We could rewrite the below to use 256-bit instructions instead of 128-bit.
*/
__m128i vbuf = _mm_loadu_si128((__m128i*)buf);
buf += 16;
k -= 16;
__m128i v_short_sum1 = _mm_maddubs_epi16(vbuf, dot1v); // multiply-add, resulting in 8 shorts.
__m128i vsum1 = _mm_madd_epi16(v_short_sum1, dot3v); // sum 8 shorts to 4 int32_t;
__m128i v_short_sum2 = _mm_maddubs_epi16(vbuf, dot2v);
vs1 = _mm_add_epi32(vsum1, vs1);
__m128i vsum2 = _mm_madd_epi16(v_short_sum2, dot3v);
vs1_0 = _mm_sll_epi32(vs1_0, shiftv);
vsum2 = _mm_add_epi32(vsum2, vs2);
vs2 = _mm_add_epi32(vsum2, vs1_0);
vs1_0 = vs1;
}
// At this point, we have partial sums stored in vs1 and vs2. There are AVX512 instructions that
// would allow us to sum these quickly (VP4DPWSSD). For now, just unpack and move on.
uint32_t ALIGNED_(16) s1_unpack[4];
uint32_t ALIGNED_(16) s2_unpack[4];
_mm_store_si128((__m128i*)s1_unpack, vs1);
_mm_store_si128((__m128i*)s2_unpack, vs2);
adler = (s1_unpack[0] % BASE) + (s1_unpack[1] % BASE) + (s1_unpack[2] % BASE) + (s1_unpack[3] % BASE);
adler %= BASE;
s1[3] = adler;
sum2 = (s2_unpack[0] % BASE) + (s2_unpack[1] % BASE) + (s2_unpack[2] % BASE) + (s2_unpack[3] % BASE);
sum2 %= BASE;
s2[3] = sum2;
}
while (len) {
len--;
adler += *buf++;
sum2 += adler;
}
adler %= BASE;
sum2 %= BASE;
/* return recombined sums */
return adler | (sum2 << 16);
}
#endif

View File

@ -0,0 +1,50 @@
/* chunkset_avx.c -- AVX inline functions to copy small data chunks.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "zutil.h"
#ifdef X86_AVX_CHUNKSET
#include <immintrin.h>
typedef __m256i chunk_t;
#define HAVE_CHUNKMEMSET_1
#define HAVE_CHUNKMEMSET_2
#define HAVE_CHUNKMEMSET_4
#define HAVE_CHUNKMEMSET_8
static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
*chunk = _mm256_set1_epi8(*(int8_t *)from);
}
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
*chunk = _mm256_set1_epi16(*(int16_t *)from);
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
*chunk = _mm256_set1_epi32(*(int32_t *)from);
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
*chunk = _mm256_set1_epi64x(*(int64_t *)from);
}
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
*chunk = _mm256_loadu_si256((__m256i *)s);
}
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
_mm256_storeu_si256((__m256i *)out, *chunk);
}
#define CHUNKSIZE chunksize_avx
#define CHUNKCOPY chunkcopy_avx
#define CHUNKCOPY_SAFE chunkcopy_safe_avx
#define CHUNKUNROLL chunkunroll_avx
#define CHUNKMEMSET chunkmemset_avx
#define CHUNKMEMSET_SAFE chunkmemset_safe_avx
#include "chunkset_tpl.h"
#endif

View File

@ -0,0 +1,51 @@
/* chunkset_sse.c -- SSE inline functions to copy small data chunks.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "zutil.h"
#ifdef X86_SSE2
#include <immintrin.h>
typedef __m128i chunk_t;
#define HAVE_CHUNKMEMSET_1
#define HAVE_CHUNKMEMSET_2
#define HAVE_CHUNKMEMSET_4
#define HAVE_CHUNKMEMSET_8
static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
*chunk = _mm_set1_epi8(*(int8_t *)from);
}
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
*chunk = _mm_set1_epi16(*(int16_t *)from);
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
*chunk = _mm_set1_epi32(*(int32_t *)from);
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
*chunk = _mm_set1_epi64x(*(int64_t *)from);
}
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
*chunk = _mm_loadu_si128((__m128i *)s);
}
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
_mm_storeu_si128((__m128i *)out, *chunk);
}
#define CHUNKSIZE chunksize_sse2
#define CHUNKCOPY chunkcopy_sse2
#define CHUNKCOPY_SAFE chunkcopy_safe_sse2
#define CHUNKUNROLL chunkunroll_sse2
#define CHUNKMEMSET chunkmemset_sse2
#define CHUNKMEMSET_SAFE chunkmemset_safe_sse2
#include "chunkset_tpl.h"
#endif

View File

@ -0,0 +1,67 @@
/* compare258_avx.c -- AVX2 version of compare258
* Copyright Mika T. Lindqvist <postmaster@raasu.org>
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "../../zbuild.h"
#include "../../zutil.h"
#include "fallback_builtins.h"
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
#include <immintrin.h>
#ifdef _MSC_VER
# include <nmmintrin.h>
#endif
/* UNALIGNED_OK, AVX2 intrinsic comparison */
static inline uint32_t compare256_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) {
uint32_t len = 0;
do {
__m256i ymm_src0, ymm_src1, ymm_cmp;
ymm_src0 = _mm256_loadu_si256((__m256i*)src0);
ymm_src1 = _mm256_loadu_si256((__m256i*)src1);
ymm_cmp = _mm256_cmpeq_epi8(ymm_src0, ymm_src1); /* non-identical bytes = 00, identical bytes = FF */
unsigned mask = (unsigned)_mm256_movemask_epi8(ymm_cmp);
if (mask != 0xFFFFFFFF) {
uint32_t match_byte = (uint32_t)__builtin_ctz(~mask); /* Invert bits so identical = 0 */
return len + match_byte;
}
src0 += 32, src1 += 32, len += 32;
ymm_src0 = _mm256_loadu_si256((__m256i*)src0);
ymm_src1 = _mm256_loadu_si256((__m256i*)src1);
ymm_cmp = _mm256_cmpeq_epi8(ymm_src0, ymm_src1);
mask = (unsigned)_mm256_movemask_epi8(ymm_cmp);
if (mask != 0xFFFFFFFF) {
uint32_t match_byte = (uint32_t)__builtin_ctz(~mask);
return len + match_byte;
}
src0 += 32, src1 += 32, len += 32;
} while (len < 256);
return 256;
}
static inline uint32_t compare258_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) {
if (*(uint16_t *)src0 != *(uint16_t *)src1)
return (*src0 == *src1);
return compare256_unaligned_avx2_static(src0+2, src1+2) + 2;
}
Z_INTERNAL uint32_t compare258_unaligned_avx2(const unsigned char *src0, const unsigned char *src1) {
return compare258_unaligned_avx2_static(src0, src1);
}
#define LONGEST_MATCH longest_match_unaligned_avx2
#define COMPARE256 compare256_unaligned_avx2_static
#define COMPARE258 compare258_unaligned_avx2_static
#include "match_tpl.h"
#endif

View File

@ -0,0 +1,74 @@
/* compare258_sse.c -- SSE4.2 version of compare258
*
* Copyright (C) 2013 Intel Corporation. All rights reserved.
* Authors:
* Wajdi Feghali <wajdi.k.feghali@intel.com>
* Jim Guilford <james.guilford@intel.com>
* Vinodh Gopal <vinodh.gopal@intel.com>
* Erdinc Ozturk <erdinc.ozturk@intel.com>
* Jim Kukunas <james.t.kukunas@linux.intel.com>
*
* Portions are Copyright (C) 2016 12Sided Technology, LLC.
* Author:
* Phil Vachon <pvachon@12sidedtech.com>
*
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "../../zbuild.h"
#include "../../zutil.h"
#ifdef X86_SSE42_CMP_STR
#include <immintrin.h>
#ifdef _MSC_VER
# include <nmmintrin.h>
#endif
/* UNALIGNED_OK, SSE4.2 intrinsic comparison */
static inline uint32_t compare256_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) {
uint32_t len = 0;
do {
#define mode _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH | _SIDD_NEGATIVE_POLARITY
__m128i xmm_src0, xmm_src1;
uint32_t ret;
xmm_src0 = _mm_loadu_si128((__m128i *)src0);
xmm_src1 = _mm_loadu_si128((__m128i *)src1);
ret = (uint32_t)_mm_cmpestri(xmm_src0, 16, xmm_src1, 16, mode);
if (_mm_cmpestrc(xmm_src0, 16, xmm_src1, 16, mode)) {
return len + ret;
}
src0 += 16, src1 += 16, len += 16;
xmm_src0 = _mm_loadu_si128((__m128i *)src0);
xmm_src1 = _mm_loadu_si128((__m128i *)src1);
ret = (uint32_t)_mm_cmpestri(xmm_src0, 16, xmm_src1, 16, mode);
if (_mm_cmpestrc(xmm_src0, 16, xmm_src1, 16, mode)) {
return len + ret;
}
src0 += 16, src1 += 16, len += 16;
} while (len < 256);
return 256;
}
static inline uint32_t compare258_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) {
if (*(uint16_t *)src0 != *(uint16_t *)src1)
return (*src0 == *src1);
return compare256_unaligned_sse4_static(src0+2, src1+2) + 2;
}
Z_INTERNAL uint32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1) {
return compare258_unaligned_sse4_static(src0, src1);
}
#define LONGEST_MATCH longest_match_unaligned_sse4
#define COMPARE256 compare256_unaligned_sse4_static
#define COMPARE258 compare258_unaligned_sse4_static
#include "match_tpl.h"
#endif

View File

@ -18,14 +18,14 @@
#ifdef X86_PCLMULQDQ_CRC
#include "zbuild.h"
#include "../../zbuild.h"
#include <inttypes.h>
#include <immintrin.h>
#include <wmmintrin.h>
#include "crc_folding.h"
ZLIB_INTERNAL void crc_fold_init(deflate_state *const s) {
Z_INTERNAL void crc_fold_init(deflate_state *const s) {
/* CRC_SAVE */
_mm_storeu_si128((__m128i *)s->crc0 + 0, _mm_cvtsi32_si128(0x9db42487));
_mm_storeu_si128((__m128i *)s->crc0 + 1, _mm_setzero_si128());
@ -227,9 +227,10 @@ static void partial_fold(const size_t len, __m128i *xmm_crc0, __m128i *xmm_crc1,
*xmm_crc3 = _mm_castps_si128(ps_res);
}
ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, const unsigned char *src, long len) {
Z_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, const unsigned char *src, long len) {
unsigned long algn_diff;
__m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3;
char ALIGNED_(16) partial_buf[16] = { 0 };
/* CRC_LOAD */
__m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0);
@ -241,11 +242,14 @@ ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, con
if (len < 16) {
if (len == 0)
return;
xmm_crc_part = _mm_loadu_si128((__m128i *)src);
memcpy(partial_buf, src, len);
xmm_crc_part = _mm_loadu_si128((const __m128i *)partial_buf);
memcpy(dst, partial_buf, len);
goto partial;
}
algn_diff = (0 - (uintptr_t)src) & 0xF;
algn_diff = ((uintptr_t)16 - ((uintptr_t)src & 0xF)) & 0xF;
if (algn_diff) {
xmm_crc_part = _mm_loadu_si128((__m128i *)src);
_mm_storeu_si128((__m128i *)dst, xmm_crc_part);
@ -255,6 +259,8 @@ ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, con
len -= algn_diff;
partial_fold(algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part);
} else {
xmm_crc_part = _mm_setzero_si128();
}
while ((len -= 64) >= 0) {
@ -305,7 +311,7 @@ ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, con
goto done;
dst += 48;
xmm_crc_part = _mm_load_si128((__m128i *)src + 3);
memcpy(&xmm_crc_part, (__m128i *)src + 3, len);
} else if (len + 32 >= 0) {
len += 32;
@ -324,7 +330,7 @@ ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, con
goto done;
dst += 32;
xmm_crc_part = _mm_load_si128((__m128i *)src + 2);
memcpy(&xmm_crc_part, (__m128i *)src + 2, len);
} else if (len + 48 >= 0) {
len += 48;
@ -340,16 +346,18 @@ ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, con
goto done;
dst += 16;
xmm_crc_part = _mm_load_si128((__m128i *)src + 1);
memcpy(&xmm_crc_part, (__m128i *)src + 1, len);
} else {
len += 64;
if (len == 0)
goto done;
xmm_crc_part = _mm_load_si128((__m128i *)src);
memcpy(&xmm_crc_part, src, len);
}
_mm_storeu_si128((__m128i *)partial_buf, xmm_crc_part);
memcpy(dst, partial_buf, len);
partial:
_mm_storeu_si128((__m128i *)dst, xmm_crc_part);
partial_fold(len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part);
done:
/* CRC_SAVE */
@ -377,7 +385,7 @@ static const unsigned ALIGNED_(16) crc_mask2[4] = {
0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
};
uint32_t ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s) {
uint32_t Z_INTERNAL crc_fold_512to32(deflate_state *const s) {
const __m128i xmm_mask = _mm_load_si128((__m128i *)crc_mask);
const __m128i xmm_mask2 = _mm_load_si128((__m128i *)crc_mask2);
@ -447,4 +455,3 @@ uint32_t ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s) {
}
#endif

View File

@ -10,10 +10,10 @@
#ifndef CRC_FOLDING_H_
#define CRC_FOLDING_H_
#include "deflate.h"
#include "../../deflate.h"
ZLIB_INTERNAL void crc_fold_init(deflate_state *const);
ZLIB_INTERNAL uint32_t crc_fold_512to32(deflate_state *const);
ZLIB_INTERNAL void crc_fold_copy(deflate_state *const, unsigned char *, const unsigned char *, long);
Z_INTERNAL void crc_fold_init(deflate_state *const);
Z_INTERNAL uint32_t crc_fold_512to32(deflate_state *const);
Z_INTERNAL void crc_fold_copy(deflate_state *const, unsigned char *, const unsigned char *, long);
#endif

View File

@ -1,25 +0,0 @@
#ifndef X86_CTZL_H
#define X86_CTZL_H
#include <intrin.h>
#ifdef X86_CPUID
# include "x86.h"
#endif
#if defined(_MSC_VER) && !defined(__clang__)
/* This is not a general purpose replacement for __builtin_ctzl. The function expects that value is != 0
* Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward is not checked
*/
static __forceinline unsigned long __builtin_ctzl(unsigned long value)
{
#ifdef X86_CPUID
if (x86_cpu_has_tzcnt)
return _tzcnt_u32(value);
#endif
unsigned long trailing_zero;
_BitScanForward(&trailing_zero, value);
return trailing_zero;
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,175 +0,0 @@
/*
* Fill Window with SSE2-optimized hash shifting
*
* Copyright (C) 2013 Intel Corporation
* Authors:
* Arjan van de Ven <arjan@linux.intel.com>
* Jim Kukunas <james.t.kukunas@linux.intel.com>
*
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifdef X86_SSE2
#include "zbuild.h"
#include <immintrin.h>
#include "deflate.h"
#include "deflate_p.h"
#include "functable.h"
extern int read_buf(PREFIX3(stream) *strm, unsigned char *buf, unsigned size);
ZLIB_INTERNAL void fill_window_sse(deflate_state *s) {
const __m128i xmm_wsize = _mm_set1_epi16(s->w_size);
register unsigned n;
register Pos *p;
unsigned more; /* Amount of free space at the end of the window. */
unsigned int wsize = s->w_size;
Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
do {
more = (unsigned)(s->window_size -(unsigned long)s->lookahead -(unsigned long)s->strstart);
/* Deal with !@#$% 64K limit: */
if (sizeof(int) <= 2) {
if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
more = wsize;
} else if (more == (unsigned)(-1)) {
/* Very unlikely, but possible on 16 bit machine if
* strstart == 0 && lookahead == 1 (input done a byte at time)
*/
more--;
}
}
/* If the window is almost full and there is insufficient lookahead,
* move the upper half to the lower one to make room in the upper half.
*/
if (s->strstart >= wsize+MAX_DIST(s)) {
memcpy(s->window, s->window+wsize, (unsigned)wsize);
s->match_start = (s->match_start >= wsize) ? s->match_start - wsize : 0;
s->strstart -= wsize; /* we now have strstart >= MAX_DIST */
s->block_start -= (long) wsize;
/* Slide the hash table (could be avoided with 32 bit values
at the expense of memory usage). We slide even when level == 0
to keep the hash table consistent if we switch back to level > 0
later. (Using level 0 permanently is not an optimal usage of
zlib, so we don't care about this pathological case.)
*/
n = s->hash_size;
p = &s->head[n];
p -= 8;
do {
__m128i value, result;
value = _mm_loadu_si128((__m128i *)p);
result = _mm_subs_epu16(value, xmm_wsize);
_mm_storeu_si128((__m128i *)p, result);
p -= 8;
n -= 8;
} while (n > 0);
n = wsize;
p = &s->prev[n];
p -= 8;
do {
__m128i value, result;
value = _mm_loadu_si128((__m128i *)p);
result = _mm_subs_epu16(value, xmm_wsize);
_mm_storeu_si128((__m128i *)p, result);
p -= 8;
n -= 8;
} while (n > 0);
more += wsize;
}
if (s->strm->avail_in == 0) break;
/* If there was no sliding:
* strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
* more == window_size - lookahead - strstart
* => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
* => more >= window_size - 2*WSIZE + 2
* In the BIG_MEM or MMAP case (not yet supported),
* window_size == input_size + MIN_LOOKAHEAD &&
* strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
* Otherwise, window_size == 2*WSIZE so more >= 2.
* If there was sliding, more >= WSIZE. So in all cases, more >= 2.
*/
Assert(more >= 2, "more < 2");
n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
s->lookahead += n;
/* Initialize the hash value now that we have some input: */
if (s->lookahead + s->insert >= MIN_MATCH) {
unsigned int str = s->strstart - s->insert;
s->ins_h = s->window[str];
if (str >= 1)
functable.insert_string(s, str + 2 - MIN_MATCH, 1);
#if MIN_MATCH != 3
#error Call insert_string() MIN_MATCH-3 more times
while (s->insert) {
functable.insert_string(s, str, 1);
str++;
s->insert--;
if (s->lookahead + s->insert < MIN_MATCH)
break;
}
#else
unsigned int count;
if (unlikely(s->lookahead == 1)){
count = s->insert - 1;
}else{
count = s->insert;
}
functable.insert_string(s, str, count);
s->insert -= count;
#endif
}
/* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
* but this is not important since only literal bytes will be emitted.
*/
} while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
/* If the WIN_INIT bytes after the end of the current data have never been
* written, then zero those bytes in order to avoid memory check reports of
* the use of uninitialized (or uninitialised as Julian writes) bytes by
* the longest match routines. Update the high water mark for the next
* time through here. WIN_INIT is set to MAX_MATCH since the longest match
* routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
*/
if (s->high_water < s->window_size) {
unsigned long curr = s->strstart + (unsigned long)(s->lookahead);
unsigned long init;
if (s->high_water < curr) {
/* Previous high water mark below current data -- zero WIN_INIT
* bytes or up to end of window, whichever is less.
*/
init = s->window_size - curr;
if (init > WIN_INIT)
init = WIN_INIT;
memset(s->window + curr, 0, (unsigned)init);
s->high_water = curr + init;
} else if (s->high_water < (unsigned long)curr + WIN_INIT) {
/* High water mark at or above current data, but below current data
* plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
* to end of window, whichever is less.
*/
init = (unsigned long)curr + WIN_INIT - s->high_water;
if (init > s->window_size - s->high_water)
init = s->window_size - s->high_water;
memset(s->window + s->high_water, 0, (unsigned)init);
s->high_water += init;
}
}
Assert((unsigned long)s->strstart <= s->window_size - MIN_LOOKAHEAD, "not enough room for search");
}
#endif

View File

@ -5,52 +5,42 @@
*
*/
#include "zbuild.h"
#include "deflate.h"
/* ===========================================================================
* Insert string str in the dictionary and set match_head to the previous head
* of the hash chain (the most recent string with same hash key). Return
* the previous length of the hash chain.
* IN assertion: all calls to to INSERT_STRING are made with consecutive
* input characters and the first MIN_MATCH bytes of str are valid
* (except for the last MIN_MATCH-1 bytes of the input file).
*/
#ifdef X86_SSE4_2_CRC_HASH
ZLIB_INTERNAL Pos insert_string_sse(deflate_state *const s, const Pos str, unsigned int count) {
Pos ret = 0;
unsigned int idx;
unsigned int *ip, val, h;
for (idx = 0; idx < count; idx++) {
ip = (unsigned *)&s->window[str+idx];
memcpy(&val, ip, sizeof(val));
h = 0;
if (s->level >= TRIGGER_LEVEL)
val &= 0xFFFFFF;
#include "../../zbuild.h"
#include <immintrin.h>
#ifdef _MSC_VER
h = _mm_crc32_u32(h, val);
#elif defined(X86_SSE4_2_CRC_INTRIN)
h = __builtin_ia32_crc32si(h, val);
# include <nmmintrin.h>
#endif
#include "../../deflate.h"
#ifdef X86_SSE42_CRC_INTRIN
# ifdef _MSC_VER
# define UPDATE_HASH(s, h, val)\
h = _mm_crc32_u32(h, val)
# else
__asm__ __volatile__ (
"crc32 %1,%0\n\t"
: "+r" (h)
: "r" (val)
# define UPDATE_HASH(s, h, val)\
h = __builtin_ia32_crc32si(h, val)
# endif
#else
# ifdef _MSC_VER
# define UPDATE_HASH(s, h, val) {\
__asm mov edx, h\
__asm mov eax, val\
__asm crc32 eax, edx\
__asm mov val, eax\
}
# else
# define UPDATE_HASH(s, h, val) \
__asm__ __volatile__ (\
"crc32 %1,%0\n\t"\
: "+r" (h)\
: "r" (val)\
);
# endif
Pos head = s->head[h & s->hash_mask];
if (head != str+idx) {
s->prev[(str+idx) & s->w_mask] = head;
s->head[h & s->hash_mask] = str+idx;
if (idx == count-1)
ret = head;
} else if (idx == count - 1) {
ret = str + idx;
}
}
return ret;
}
#endif
#define INSERT_STRING insert_string_sse4
#define QUICK_INSERT_STRING quick_insert_string_sse4
#ifdef X86_SSE42_CRC_HASH
# include "../../insert_string_tpl.h"
#endif

View File

@ -0,0 +1,47 @@
/*
* AVX2 optimized hash slide, based on Intel's slide_sse implementation
*
* Copyright (C) 2017 Intel Corporation
* Authors:
* Arjan van de Ven <arjan@linux.intel.com>
* Jim Kukunas <james.t.kukunas@linux.intel.com>
* Mika T. Lindqvist <postmaster@raasu.org>
*
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "../../zbuild.h"
#include "../../deflate.h"
#include <immintrin.h>
Z_INTERNAL void slide_hash_avx2(deflate_state *s) {
Pos *p;
unsigned n;
uint16_t wsize = (uint16_t)s->w_size;
const __m256i ymm_wsize = _mm256_set1_epi16((short)wsize);
n = HASH_SIZE;
p = &s->head[n] - 16;
do {
__m256i value, result;
value = _mm256_loadu_si256((__m256i *)p);
result= _mm256_subs_epu16(value, ymm_wsize);
_mm256_storeu_si256((__m256i *)p, result);
p -= 16;
n -= 16;
} while (n > 0);
n = wsize;
p = &s->prev[n] - 16;
do {
__m256i value, result;
value = _mm256_loadu_si256((__m256i *)p);
result= _mm256_subs_epu16(value, ymm_wsize);
_mm256_storeu_si256((__m256i *)p, result);
p -= 16;
n -= 16;
} while (n > 0);
}

View File

@ -0,0 +1,46 @@
/*
* SSE optimized hash slide
*
* Copyright (C) 2017 Intel Corporation
* Authors:
* Arjan van de Ven <arjan@linux.intel.com>
* Jim Kukunas <james.t.kukunas@linux.intel.com>
*
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "../../zbuild.h"
#include "../../deflate.h"
#include <immintrin.h>
Z_INTERNAL void slide_hash_sse2(deflate_state *s) {
Pos *p;
unsigned n;
uint16_t wsize = (uint16_t)s->w_size;
const __m128i xmm_wsize = _mm_set1_epi16((short)wsize);
n = HASH_SIZE;
p = &s->head[n] - 8;
do {
__m128i value, result;
value = _mm_loadu_si128((__m128i *)p);
result= _mm_subs_epu16(value, xmm_wsize);
_mm_storeu_si128((__m128i *)p, result);
p -= 8;
n -= 8;
} while (n > 0);
n = wsize;
p = &s->prev[n] - 8;
do {
__m128i value, result;
value = _mm_loadu_si128((__m128i *)p);
result= _mm_subs_epu16(value, xmm_wsize);
_mm_storeu_si128((__m128i *)p, result);
p -= 8;
n -= 8;
} while (n > 0);
}

View File

@ -8,7 +8,7 @@
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zutil.h"
#include "../../zutil.h"
#ifdef _MSC_VER
# include <intrin.h>
@ -17,34 +17,42 @@
# include <cpuid.h>
#endif
ZLIB_INTERNAL int x86_cpu_has_sse2;
ZLIB_INTERNAL int x86_cpu_has_sse42;
ZLIB_INTERNAL int x86_cpu_has_pclmulqdq;
ZLIB_INTERNAL int x86_cpu_has_tzcnt;
Z_INTERNAL int x86_cpu_has_avx2;
Z_INTERNAL int x86_cpu_has_sse2;
Z_INTERNAL int x86_cpu_has_ssse3;
Z_INTERNAL int x86_cpu_has_sse42;
Z_INTERNAL int x86_cpu_has_pclmulqdq;
Z_INTERNAL int x86_cpu_has_tzcnt;
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) {
#ifdef _MSC_VER
unsigned int registers[4];
__cpuid(registers, info);
__cpuid((int *)registers, info);
*eax = registers[0];
*ebx = registers[1];
*ecx = registers[2];
*edx = registers[3];
#else
unsigned int _eax;
unsigned int _ebx;
unsigned int _ecx;
unsigned int _edx;
__cpuid(info, _eax, _ebx, _ecx, _edx);
*eax = _eax;
*ebx = _ebx;
*ecx = _ecx;
*edx = _edx;
__cpuid(info, *eax, *ebx, *ecx, *edx);
#endif
}
void ZLIB_INTERNAL x86_check_features(void) {
static void cpuidex(int info, int subinfo, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) {
#ifdef _MSC_VER
unsigned int registers[4];
__cpuidex((int *)registers, info, subinfo);
*eax = registers[0];
*ebx = registers[1];
*ecx = registers[2];
*edx = registers[3];
#else
__cpuid_count(info, subinfo, *eax, *ebx, *ecx, *edx);
#endif
}
void Z_INTERNAL x86_check_features(void) {
unsigned eax, ebx, ecx, edx;
unsigned maxbasic;
@ -53,16 +61,20 @@ void ZLIB_INTERNAL x86_check_features(void) {
cpuid(1 /*CPU_PROCINFO_AND_FEATUREBITS*/, &eax, &ebx, &ecx, &edx);
x86_cpu_has_sse2 = edx & 0x4000000;
x86_cpu_has_ssse3 = ecx & 0x200;
x86_cpu_has_sse42 = ecx & 0x100000;
x86_cpu_has_pclmulqdq = ecx & 0x2;
if (maxbasic >= 7) {
cpuid(7, &eax, &ebx, &ecx, &edx);
cpuidex(7, 0, &eax, &ebx, &ecx, &edx);
// check BMI1 bit
// Reference: https://software.intel.com/sites/default/files/article/405250/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family.pdf
x86_cpu_has_tzcnt = ebx & 0x8;
// check AVX2 bit
x86_cpu_has_avx2 = ebx & 0x20;
} else {
x86_cpu_has_tzcnt = 0;
x86_cpu_has_avx2 = 0;
}
}

View File

@ -6,11 +6,13 @@
#ifndef CPU_H_
#define CPU_H_
extern int x86_cpu_has_avx2;
extern int x86_cpu_has_sse2;
extern int x86_cpu_has_ssse3;
extern int x86_cpu_has_sse42;
extern int x86_cpu_has_pclmulqdq;
extern int x86_cpu_has_tzcnt;
void ZLIB_INTERNAL x86_check_features(void);
void Z_INTERNAL x86_check_features(void);
#endif /* CPU_H_ */

81
libs/zlibng/chunkset.c Normal file
View File

@ -0,0 +1,81 @@
/* chunkset.c -- inline functions to copy small data chunks.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "zutil.h"
// We need sizeof(chunk_t) to be 8, no matter what.
#if defined(UNALIGNED64_OK)
typedef uint64_t chunk_t;
#elif defined(UNALIGNED_OK)
typedef struct chunk_t { uint32_t u32[2]; } chunk_t;
#else
typedef struct chunk_t { uint8_t u8[8]; } chunk_t;
#endif
#define HAVE_CHUNKMEMSET_1
#define HAVE_CHUNKMEMSET_4
#define HAVE_CHUNKMEMSET_8
static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
#if defined(UNALIGNED64_OK)
*chunk = 0x0101010101010101 * (uint8_t)*from;
#elif defined(UNALIGNED_OK)
chunk->u32[0] = 0x01010101 * (uint8_t)*from;
chunk->u32[1] = chunk->u32[0];
#else
memset(chunk, *from, sizeof(chunk_t));
#endif
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
#if defined(UNALIGNED64_OK)
uint32_t half_chunk;
half_chunk = *(uint32_t *)from;
*chunk = 0x0000000100000001 * (uint64_t)half_chunk;
#elif defined(UNALIGNED_OK)
chunk->u32[0] = *(uint32_t *)from;
chunk->u32[1] = chunk->u32[0];
#else
uint8_t *chunkptr = (uint8_t *)chunk;
memcpy(chunkptr, from, 4);
memcpy(chunkptr+4, from, 4);
#endif
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
#if defined(UNALIGNED64_OK)
*chunk = *(uint64_t *)from;
#elif defined(UNALIGNED_OK)
uint32_t* p = (uint32_t *)from;
chunk->u32[0] = p[0];
chunk->u32[1] = p[1];
#else
memcpy(chunk, from, sizeof(chunk_t));
#endif
}
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
chunkmemset_8((uint8_t *)s, chunk);
}
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
#if defined(UNALIGNED64_OK)
*(uint64_t *)out = *chunk;
#elif defined(UNALIGNED_OK)
((uint32_t *)out)[0] = chunk->u32[0];
((uint32_t *)out)[1] = chunk->u32[1];
#else
memcpy(out, chunk, sizeof(chunk_t));
#endif
}
#define CHUNKSIZE chunksize_c
#define CHUNKCOPY chunkcopy_c
#define CHUNKCOPY_SAFE chunkcopy_safe_c
#define CHUNKUNROLL chunkunroll_c
#define CHUNKMEMSET chunkmemset_c
#define CHUNKMEMSET_SAFE chunkmemset_safe_c
#include "chunkset_tpl.h"

172
libs/zlibng/chunkset_tpl.h Normal file
View File

@ -0,0 +1,172 @@
/* chunkset_tpl.h -- inline functions to copy small data chunks.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* Returns the chunk size */
Z_INTERNAL uint32_t CHUNKSIZE(void) {
return sizeof(chunk_t);
}
/* Behave like memcpy, but assume that it's OK to overwrite at least
chunk_t bytes of output even if the length is shorter than this,
that the length is non-zero, and that `from` lags `out` by at least
sizeof chunk_t bytes (or that they don't overlap at all or simply that
the distance is less than the length of the copy).
Aside from better memory bus utilisation, this means that short copies
(chunk_t bytes or fewer) will fall straight through the loop
without iteration, which will hopefully make the branch prediction more
reliable. */
Z_INTERNAL uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) {
chunk_t chunk;
--len;
loadchunk(from, &chunk);
storechunk(out, &chunk);
out += (len % sizeof(chunk_t)) + 1;
from += (len % sizeof(chunk_t)) + 1;
len /= sizeof(chunk_t);
while (len > 0) {
loadchunk(from, &chunk);
storechunk(out, &chunk);
out += sizeof(chunk_t);
from += sizeof(chunk_t);
--len;
}
return out;
}
/* Behave like chunkcopy, but avoid writing beyond of legal output. */
Z_INTERNAL uint8_t* CHUNKCOPY_SAFE(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe) {
if ((safe - out) < (ptrdiff_t)sizeof(chunk_t)) {
int32_t use_chunk16 = sizeof(chunk_t) > 16 && (len & 16);
if (use_chunk16) {
memcpy(out, from, 16);
out += 16;
from += 16;
}
if (len & 8) {
memcpy(out, from, 8);
out += 8;
from += 8;
}
if (len & 4) {
memcpy(out, from, 4);
out += 4;
from += 4;
}
if (len & 2) {
memcpy(out, from, 2);
out += 2;
from += 2;
}
if (len & 1) {
*out++ = *from++;
}
return out;
}
return CHUNKCOPY(out, from, len);
}
/* Perform short copies until distance can be rewritten as being at least
sizeof chunk_t.
This assumes that it's OK to overwrite at least the first
2*sizeof(chunk_t) bytes of output even if the copy is shorter than this.
This assumption holds because inflate_fast() starts every iteration with at
least 258 bytes of output space available (258 being the maximum length
output from a single token; see inflate_fast()'s assumptions below). */
Z_INTERNAL uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) {
unsigned char const *from = out - *dist;
chunk_t chunk;
while (*dist < *len && *dist < sizeof(chunk_t)) {
loadchunk(from, &chunk);
storechunk(out, &chunk);
out += *dist;
*len -= *dist;
*dist += *dist;
}
return out;
}
/* Copy DIST bytes from OUT - DIST into OUT + DIST * k, for 0 <= k < LEN/DIST.
Return OUT + LEN. */
Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) {
/* Debug performance related issues when len < sizeof(uint64_t):
Assert(len >= sizeof(uint64_t), "chunkmemset should be called on larger chunks"); */
Assert(dist > 0, "cannot have a distance 0");
unsigned char *from = out - dist;
chunk_t chunk;
unsigned sz = sizeof(chunk);
if (len < sz) {
do {
*out++ = *from++;
--len;
} while (len != 0);
return out;
}
#ifdef HAVE_CHUNKMEMSET_1
if (dist == 1) {
chunkmemset_1(from, &chunk);
} else
#endif
#ifdef HAVE_CHUNKMEMSET_2
if (dist == 2) {
chunkmemset_2(from, &chunk);
} else
#endif
#ifdef HAVE_CHUNKMEMSET_4
if (dist == 4) {
chunkmemset_4(from, &chunk);
} else
#endif
#ifdef HAVE_CHUNKMEMSET_8
if (dist == 8) {
chunkmemset_8(from, &chunk);
} else
#endif
if (dist == sz) {
loadchunk(from, &chunk);
} else if (dist < sz) {
unsigned char *end = out + len - 1;
while (len > dist) {
out = CHUNKCOPY_SAFE(out, from, dist, end);
len -= dist;
}
if (len > 0) {
out = CHUNKCOPY_SAFE(out, from, len, end);
}
return out;
} else {
out = CHUNKUNROLL(out, &dist, &len);
return CHUNKCOPY(out, out - dist, len);
}
unsigned rem = len % sz;
len -= rem;
while (len) {
storechunk(out, &chunk);
out += sz;
len -= sz;
}
/* Last, deal with the case when LEN is not a multiple of SZ. */
if (rem)
memcpy(out, from, rem);
out += rem;
return out;
}
Z_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, unsigned dist, unsigned len, unsigned left) {
if (left < (unsigned)(3 * sizeof(chunk_t))) {
while (len > 0) {
*out = *(out - dist);
out++;
--len;
}
return out;
}
return CHUNKMEMSET(out, dist, len);
}

View File

@ -0,0 +1,99 @@
// archdetect.c -- Detect compiler architecture and raise preprocessor error
// containing a simple arch identifier.
// Copyright (C) 2019 Hans Kristian Rosbach
// Licensed under the Zlib license, see LICENSE.md for details
// x86_64
#if defined(__x86_64__) || defined(_M_X64)
#error archfound x86_64
// x86
#elif defined(__i386) || defined(_M_IX86)
#error archfound i686
// ARM
#elif defined(__aarch64__) || defined(_M_ARM64)
#error archfound aarch64
#elif defined(__arm__) || defined(__arm) || defined(_M_ARM) || defined(__TARGET_ARCH_ARM)
#if defined(__ARM64_ARCH_8__) || defined(__ARMv8__) || defined(__ARMv8_A__)
#error archfound armv8
#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__)
#error archfound armv7
#elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6M__)
#error archfound armv6
#elif defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
#error archfound armv5
#elif defined(__ARM_ARCH_4T__) || defined(__TARGET_ARCH_5E__)
#error archfound armv4
#elif defined(__ARM_ARCH_3__) || defined(__TARGET_ARCH_3M__)
#error archfound armv3
#elif defined(__ARM_ARCH_2__)
#error archfound armv2
#endif
// PowerPC
#elif defined(__powerpc__) || defined(_ppc__) || defined(__PPC__)
#if defined(__64BIT__) || defined(__powerpc64__) || defined(__ppc64__)
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#error archfound powerpc64le
#else
#error archfound powerpc64
#endif
#else
#error archfound powerpc
#endif
// --------------- Less common architectures alphabetically below ---------------
// ALPHA
#elif defined(__alpha__) || defined(__alpha)
#error archfound alpha
// Blackfin
#elif defined(__BFIN__)
#error archfound blackfin
// Itanium
#elif defined(__ia64) || defined(_M_IA64)
#error archfound ia64
// MIPS
#elif defined(__mips__) || defined(__mips)
#error archfound mips
// Motorola 68000-series
#elif defined(__m68k__)
#error archfound m68k
// SuperH
#elif defined(__sh__)
#error archfound sh
// SPARC
#elif defined(__sparc__) || defined(__sparc)
#if defined(__sparcv9) || defined(__sparc_v9__)
#error archfound sparc9
#elif defined(__sparcv8) || defined(__sparc_v8__)
#error archfound sparc8
#endif
// SystemZ
#elif defined(__370__)
#error archfound s370
#elif defined(__s390__)
#error archfound s390
#elif defined(__s390x) || defined(__zarch__)
#error archfound s390x
// PARISC
#elif defined(__hppa__)
#error archfound parisc
// RS-6000
#elif defined(__THW_RS6000)
#error archfound rs6000
// return 'unrecognized' if we do not know what architecture this is
#else
#error archfound unrecognized
#endif

View File

@ -0,0 +1,93 @@
# detect-arch.cmake -- Detect compiler architecture and set ARCH and BASEARCH
# Copyright (C) 2019 Hans Kristian Rosbach
# Licensed under the Zlib license, see LICENSE.md for details
set(ARCHDETECT_FOUND TRUE)
if(CMAKE_OSX_ARCHITECTURES)
# If multiple architectures are requested (universal build), pick only the first
list(GET CMAKE_OSX_ARCHITECTURES 0 ARCH)
elseif(MSVC)
if("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "X86")
set(ARCH "i686")
elseif("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "x64")
set(ARCH "x86_64")
elseif("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM" OR "${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARMV7")
set(ARCH "arm")
elseif ("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM64")
set(ARCH "aarch64")
endif()
elseif(CMAKE_CROSSCOMPILING)
set(ARCH ${CMAKE_C_COMPILER_TARGET})
else()
# Let preprocessor parse archdetect.c and raise an error containing the arch identifier
enable_language(C)
try_run(
run_result_unused
compile_result_unused
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/cmake/detect-arch.c
COMPILE_OUTPUT_VARIABLE RAWOUTPUT
CMAKE_FLAGS CMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}
)
# Find basearch tag, and extract the arch word into BASEARCH variable
string(REGEX REPLACE ".*archfound ([a-zA-Z0-9_]+).*" "\\1" ARCH "${RAWOUTPUT}")
if(NOT ARCH)
set(ARCH unknown)
endif()
endif()
# Make sure we have ARCH set
if(NOT ARCH OR ARCH STREQUAL "unknown")
set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
message(STATUS "Arch not recognized, falling back to cmake arch: '${ARCH}'")
else()
message(STATUS "Arch detected: '${ARCH}'")
endif()
# Base arch detection
if("${ARCH}" MATCHES "(x86_64|AMD64|i[3-6]86)")
set(BASEARCH "x86")
set(BASEARCH_X86_FOUND TRUE)
elseif("${ARCH}" MATCHES "(arm(v[0-9])?|aarch64)")
set(BASEARCH "arm")
set(BASEARCH_ARM_FOUND TRUE)
elseif("${ARCH}" MATCHES "ppc(64(le)?)?|powerpc(64(le)?)?")
set(BASEARCH "ppc")
set(BASEARCH_PPC_FOUND TRUE)
elseif("${ARCH}" MATCHES "alpha")
set(BASEARCH "alpha")
set(BASEARCH_ALPHA_FOUND TRUE)
elseif("${ARCH}" MATCHES "blackfin")
set(BASEARCH "blackfin")
set(BASEARCH_BLACKFIN_FOUND TRUE)
elseif("${ARCH}" MATCHES "ia64")
set(BASEARCH "ia64")
set(BASEARCH_IA64_FOUND TRUE)
elseif("${ARCH}" MATCHES "mips")
set(BASEARCH "mips")
set(BASEARCH_MIPS_FOUND TRUE)
elseif("${ARCH}" MATCHES "m68k")
set(BASEARCH "m68k")
set(BASEARCH_M68K_FOUND TRUE)
elseif("${ARCH}" MATCHES "sh")
set(BASEARCH "sh")
set(BASEARCH_SH_FOUND TRUE)
elseif("${ARCH}" MATCHES "sparc[89]?")
set(BASEARCH "sparc")
set(BASEARCH_SPARC_FOUND TRUE)
elseif("${ARCH}" MATCHES "s3[679]0x?")
set(BASEARCH "s360")
set(BASEARCH_S360_FOUND TRUE)
elseif("${ARCH}" MATCHES "parisc")
set(BASEARCH "parisc")
set(BASEARCH_PARISC_FOUND TRUE)
elseif("${ARCH}" MATCHES "rs6000")
set(BASEARCH "rs6000")
set(BASEARCH_RS6000_FOUND TRUE)
else()
set(BASEARCH "x86")
set(BASEARCH_X86_FOUND TRUE)
message(STATUS "Basearch '${ARCH}' not recognized, defaulting to 'x86'.")
endif()
message(STATUS "Basearch of '${ARCH}' has been detected as: '${BASEARCH}'")

View File

@ -0,0 +1,123 @@
# detect-sanitizer.cmake -- Detect supported compiler sanitizer flags
# Licensed under the Zlib license, see LICENSE.md for details
macro(check_sanitizer_support known_checks supported_checks)
set(available_checks "")
# Build list of supported sanitizer flags by incrementally trying compilation with
# known sanitizer checks
foreach(check ${known_checks})
if(available_checks STREQUAL "")
set(compile_checks "${check}")
else()
set(compile_checks "${available_checks},${check}")
endif()
set(CMAKE_REQUIRED_FLAGS "-fsanitize=${compile_checks}")
check_c_source_compiles("int main() { return 0; }" HAS_SANITIZER_${check}
FAIL_REGEX "not supported|unrecognized command|unknown option")
set(CMAKE_REQUIRED_FLAGS)
if(HAS_SANITIZER_${check})
set(available_checks ${compile_checks})
endif()
endforeach()
set(${supported_checks} ${available_checks})
endmacro()
macro(add_address_sanitizer)
set(known_checks
address
pointer-compare
pointer-subtract
)
check_sanitizer_support("${known_checks}" supported_checks)
if(NOT ${supported_checks} STREQUAL "")
message(STATUS "Address sanitizer is enabled: ${supported_checks}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}")
else()
message(STATUS "Address sanitizer is not supported")
endif()
if(CMAKE_CROSSCOMPILING_EMULATOR)
# Only check for leak sanitizer if not cross-compiling due to qemu crash
message(WARNING "Leak sanitizer is not supported when cross compiling")
else()
# Leak sanitizer requires address sanitizer
check_sanitizer_support("leak" supported_checks)
if(NOT ${supported_checks} STREQUAL "")
message(STATUS "Leak sanitizer is enabled: ${supported_checks}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}")
else()
message(STATUS "Leak sanitizer is not supported")
endif()
endif()
endmacro()
macro(add_memory_sanitizer)
check_sanitizer_support("memory" supported_checks)
if(NOT ${supported_checks} STREQUAL "")
message(STATUS "Memory sanitizer is enabled: ${supported_checks}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}")
else()
message(STATUS "Memory sanitizer is not supported")
endif()
endmacro()
macro(add_undefined_sanitizer)
set(known_checks
array-bounds
bool
bounds
builtin
enum
float-cast-overflow
float-divide-by-zero
function
integer-divide-by-zero
local-bounds
null
nonnull-attribute
pointer-overflow
return
returns-nonnull-attribute
shift
shift-base
shift-exponent
signed-integer-overflow
undefined
unsigned-integer-overflow
unsigned-shift-base
vla-bound
vptr
)
# Only check for alignment sanitizer flag if unaligned access is not supported
if(NOT UNALIGNED_OK)
list(APPEND known_checks alignment)
endif()
# Object size sanitizer has no effect at -O0 and produces compiler warning if enabled
if(NOT CMAKE_C_FLAGS MATCHES "-O0")
list(APPEND known_checks object-size)
endif()
check_sanitizer_support("${known_checks}" supported_checks)
if(NOT ${supported_checks} STREQUAL "")
message(STATUS "Undefined behavior sanitizer is enabled: ${supported_checks}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}")
# Group sanitizer flag -fsanitize=undefined will automatically add alignment, even if
# it is not in our sanitize flag list, so we need to explicitly disable alignment sanitizing.
if(UNALIGNED_OK)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-sanitize=alignment")
endif()
else()
message(STATUS "UNdefined behavior sanitizer is not supported")
endif()
endmacro()

View File

@ -0,0 +1,48 @@
if(NOT DEFINED OUTPUT OR NOT DEFINED COMPARE OR NOT DEFINED COMMAND)
message(FATAL_ERROR "Run and compare arguments missing")
endif()
if(INPUT)
# Run command with stdin input and redirect stdout to output
execute_process(COMMAND ${CMAKE_COMMAND}
"-DCOMMAND=${COMMAND}"
-DINPUT=${INPUT}
-DOUTPUT=${OUTPUT}
"-DSUCCESS_EXIT=${SUCCESS_EXIT}"
-P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
RESULT_VARIABLE CMD_RESULT)
else()
# Run command and redirect stdout to output
execute_process(COMMAND ${CMAKE_COMMAND}
"-DCOMMAND=${COMMAND}"
-DOUTPUT=${OUTPUT}
"-DSUCCESS_EXIT=${SUCCESS_EXIT}"
-P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
RESULT_VARIABLE CMD_RESULT)
endif()
if(CMD_RESULT)
message(FATAL_ERROR "Run before compare failed: ${CMD_RESULT}")
endif()
# Use configure_file to normalize line-endings
if(IGNORE_LINE_ENDINGS)
configure_file(${COMPARE} ${COMPARE}.cmp NEWLINE_STYLE LF)
set(COMPARE ${COMPARE}.cmp)
configure_file(${OUTPUT} ${OUTPUT}.cmp NEWLINE_STYLE LF)
set(OUTPUT ${OUTPUT}.cmp)
endif()
# Compare that output is equal to specified file
execute_process(COMMAND ${CMAKE_COMMAND}
-E compare_files ${COMPARE} ${OUTPUT}
RESULT_VARIABLE CMD_RESULT)
# Delete temporary files used to normalize line-endings
if(IGNORE_LINE_ENDINGS)
file(REMOVE ${COMPARE} ${OUTPUT})
endif()
if(CMD_RESULT)
message(FATAL_ERROR "Run compare failed: ${CMD_RESULT}")
endif()

View File

@ -0,0 +1,38 @@
# If no output is specified, discard output
if(NOT DEFINED OUTPUT)
if(WIN32)
set(OUTPUT NUL)
else()
set(OUTPUT /dev/null)
endif()
endif()
if(INPUT)
# Check to see that input file exists
if(NOT EXISTS ${INPUT})
message(FATAL_ERROR "Cannot find input: ${INPUT}")
endif()
# Execute with both stdin and stdout file
execute_process(COMMAND ${COMMAND}
RESULT_VARIABLE CMD_RESULT
INPUT_FILE ${INPUT}
OUTPUT_FILE ${OUTPUT})
else()
# Execute with only stdout file
execute_process(COMMAND ${COMMAND}
RESULT_VARIABLE CMD_RESULT
OUTPUT_FILE ${OUTPUT})
endif()
# Check if exit code is in list of successful exit codes
if(SUCCESS_EXIT)
list(FIND SUCCESS_EXIT ${CMD_RESULT} _INDEX)
if (${_INDEX} GREATER -1)
set(CMD_RESULT 0)
endif()
endif()
# Check to see if successful
if(CMD_RESULT)
message(FATAL_ERROR "${COMMAND} failed: ${CMD_RESULT}")
endif()

View File

@ -0,0 +1,188 @@
if(TARGET)
set(COMPRESS_TARGET ${TARGET})
set(DECOMPRESS_TARGET ${TARGET})
endif()
if(NOT DEFINED INPUT OR NOT DEFINED COMPRESS_TARGET OR NOT DEFINED DECOMPRESS_TARGET)
message(FATAL_ERROR "Compress test arguments missing")
endif()
# Set default values
if(NOT DEFINED COMPARE)
set(COMPARE ON)
endif()
if(NOT DEFINED COMPRESS_ARGS)
set(COMPRESS_ARGS -c -k)
endif()
if(NOT DEFINED DECOMPRESS_ARGS)
set(DECOMPRESS_ARGS -d -c)
endif()
if(NOT DEFINED GZIP_VERIFY)
set(GZIP_VERIFY ON)
endif()
if(NOT DEFINED SUCCESS_EXIT)
set(SUCCESS_EXIT 0)
endif()
# Generate unique output path so multiple tests can be executed at the same time
if(NOT OUTPUT)
# Output name based on input and unique id
string(RANDOM UNIQUE_ID)
set(OUTPUT ${INPUT}-${UNIQUE_ID})
else()
# Output name appends unique id in case multiple tests with same output name
string(RANDOM LENGTH 6 UNIQUE_ID)
set(OUTPUT ${OUTPUT}-${UNIQUE_ID})
endif()
string(REPLACE ".gz" "" OUTPUT "${OUTPUT}")
macro(cleanup)
# Cleanup temporary mingizip files
file(REMOVE ${OUTPUT}.gz ${OUTPUT}.out)
# Cleanup temporary gzip files
file(REMOVE ${OUTPUT}.gzip.gz ${OUTPUT}.gzip.out)
endmacro()
# Compress input file
if(NOT EXISTS ${INPUT})
message(FATAL_ERROR "Cannot find compress input: ${INPUT}")
endif()
set(COMPRESS_COMMAND ${COMPRESS_TARGET} ${COMPRESS_ARGS})
execute_process(COMMAND ${CMAKE_COMMAND}
"-DCOMMAND=${COMPRESS_COMMAND}"
-DINPUT=${INPUT}
-DOUTPUT=${OUTPUT}.gz
"-DSUCCESS_EXIT=${SUCCESS_EXIT}"
-P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
RESULT_VARIABLE CMD_RESULT)
if(CMD_RESULT)
cleanup()
message(FATAL_ERROR "Compress failed: ${CMD_RESULT}")
endif()
# Decompress output
if(NOT EXISTS ${OUTPUT}.gz)
cleanup()
message(FATAL_ERROR "Cannot find decompress input: ${OUTPUT}.gz")
endif()
set(DECOMPRESS_COMMAND ${DECOMPRESS_TARGET} ${DECOMPRESS_ARGS})
execute_process(COMMAND ${CMAKE_COMMAND}
"-DCOMMAND=${DECOMPRESS_COMMAND}"
-DINPUT=${OUTPUT}.gz
-DOUTPUT=${OUTPUT}.out
"-DSUCCESS_EXIT=${SUCCESS_EXIT}"
-P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
RESULT_VARIABLE CMD_RESULT)
if(CMD_RESULT)
cleanup()
message(FATAL_ERROR "Decompress failed: ${CMD_RESULT}")
endif()
if(COMPARE)
# Compare decompressed output with original input file
execute_process(COMMAND ${CMAKE_COMMAND}
-E compare_files ${INPUT} ${OUTPUT}.out
RESULT_VARIABLE CMD_RESULT)
if(CMD_RESULT)
cleanup()
message(FATAL_ERROR "Compare minigzip decompress failed: ${CMD_RESULT}")
endif()
endif()
if(GZIP_VERIFY AND NOT "${COMPRESS_ARGS}" MATCHES "-T")
# Transparent writing does not use gzip format
find_program(GZIP gzip)
if(GZIP)
if(NOT EXISTS ${OUTPUT}.gz)
cleanup()
message(FATAL_ERROR "Cannot find gzip decompress input: ${OUTPUT}.gz")
endif()
# Check gzip can decompress our compressed output
set(GZ_DECOMPRESS_COMMAND ${GZIP} --decompress)
execute_process(COMMAND ${CMAKE_COMMAND}
"-DCOMMAND=${GZ_DECOMPRESS_COMMAND}"
-DINPUT=${OUTPUT}.gz
-DOUTPUT=${OUTPUT}.gzip.out
"-DSUCCESS_EXIT=${SUCCESS_EXIT}"
-P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
RESULT_VARIABLE CMD_RESULT)
if(CMD_RESULT)
cleanup()
message(FATAL_ERROR "Gzip decompress failed: ${CMD_RESULT}")
endif()
# Compare gzip output with original input file
execute_process(COMMAND ${CMAKE_COMMAND}
-E compare_files ${INPUT} ${OUTPUT}.gzip.out
RESULT_VARIABLE CMD_RESULT)
if(CMD_RESULT)
cleanup()
message(FATAL_ERROR "Compare gzip decompress failed: ${CMD_RESULT}")
endif()
if(NOT EXISTS ${OUTPUT}.gz)
cleanup()
message(FATAL_ERROR "Cannot find gzip compress input: ${INPUT}")
endif()
# Compress input file with gzip
set(GZ_COMPRESS_COMMAND ${GZIP} --stdout)
execute_process(COMMAND ${CMAKE_COMMAND}
"-DCOMMAND=${GZ_COMPRESS_COMMAND}"
-DINPUT=${INPUT}
-DOUTPUT=${OUTPUT}.gzip.gz
"-DSUCCESS_EXIT=${SUCCESS_EXIT}"
-P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
RESULT_VARIABLE CMD_RESULT)
if(CMD_RESULT)
cleanup()
message(FATAL_ERROR "Gzip compress failed: ${CMD_RESULT}")
endif()
if(NOT EXISTS ${OUTPUT}.gz)
cleanup()
message(FATAL_ERROR "Cannot find minigzip decompress input: ${OUTPUT}.gzip.gz")
endif()
# Check minigzip can decompress gzip compressed output
execute_process(COMMAND ${CMAKE_COMMAND}
"-DCOMMAND=${DECOMPRESS_COMMAND}"
-DINPUT=${OUTPUT}.gzip.gz
-DOUTPUT=${OUTPUT}.gzip.out
"-DSUCCESS_EXIT=${SUCCESS_EXIT}"
-P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
RESULT_VARIABLE CMD_RESULT)
if(CMD_RESULT)
cleanup()
message(FATAL_ERROR "Minigzip decompress gzip failed: ${CMD_RESULT}")
endif()
if(COMPARE)
# Compare original input file with gzip decompressed output
execute_process(COMMAND ${CMAKE_COMMAND}
-E compare_files ${INPUT} ${OUTPUT}.gzip.out
RESULT_VARIABLE CMD_RESULT)
if(CMD_RESULT)
cleanup()
message(FATAL_ERROR "Compare minigzip decompress gzip failed: ${CMD_RESULT}")
endif()
endif()
endif()
endif()
cleanup()

View File

@ -0,0 +1,26 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR aarch64)
set(CMAKE_SYSTEM_VERSION 1)
message(STATUS "Using cross-compile toolchain: ${CROSS_COMPILE_TOOLCHAIN}")
set(CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu")
set(CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu")
set(CMAKE_CROSSCOMPILING TRUE)
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-aarch64 -L /usr/${CMAKE_C_COMPILER_TARGET}/)
SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
if(NOT C_COMPILER_FULL_PATH)
message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
endif()
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
if(CXX_COMPILER_FULL_PATH)
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
endif()

View File

@ -0,0 +1,24 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR arm)
set(CMAKE_SYSTEM_VERSION 1)
message(STATUS "Using cross-compile toolchain: ${CMAKE_C_COMPILER_TARGET}")
set(CMAKE_CROSSCOMPILING TRUE)
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-arm -L /usr/${CMAKE_C_COMPILER_TARGET}/)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
if(NOT C_COMPILER_FULL_PATH)
message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
endif()
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
if(CXX_COMPILER_FULL_PATH)
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
endif()

View File

@ -0,0 +1,16 @@
set(CMAKE_SYSTEM_NAME Windows)
set(CMAKE_C_COMPILER_TARGET i686)
set(CMAKE_CXX_COMPILER_TARGET i686)
set(CMAKE_C_COMPILER i686-w64-mingw32-gcc)
set(CMAKE_CXX_COMPILER i686-w64-mingw32-g++)
set(CMAKE_RC_COMPILER i686-w64-mingw32-windres)
set(CMAKE_CROSSCOMPILING TRUE)
set(CMAKE_CROSSCOMPILING_EMULATOR wine)
set(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

View File

@ -0,0 +1,16 @@
set(CMAKE_SYSTEM_NAME Windows)
set(CMAKE_C_COMPILER_TARGET x86_64)
set(CMAKE_CXX_COMPILER_TARGET x86_64)
set(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc)
set(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++)
set(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres)
set(CMAKE_CROSSCOMPILING TRUE)
set(CMAKE_CROSSCOMPILING_EMULATOR wine)
set(CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)

View File

@ -0,0 +1,25 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR powerpc)
set(CMAKE_SYSTEM_VERSION 1)
set(CMAKE_C_COMPILER_TARGET "powerpc-linux-gnu")
set(CMAKE_CXX_COMPILER_TARGET "powerpc-linux-gnu")
set(CMAKE_CROSSCOMPILING TRUE)
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc -L /usr/${CMAKE_C_COMPILER_TARGET}/)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
if(NOT C_COMPILER_FULL_PATH)
message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
endif()
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
if(CXX_COMPILER_FULL_PATH)
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
endif()

View File

@ -0,0 +1,25 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR ppc64)
set(CMAKE_SYSTEM_VERSION 1)
set(CMAKE_C_COMPILER_TARGET "powerpc64-linux-gnu")
set(CMAKE_CXX_COMPILER_TARGET "powerpc64-linux-gnu")
set(CMAKE_CROSSCOMPILING TRUE)
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc64 -L /usr/${CMAKE_C_COMPILER_TARGET}/)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
if(NOT C_COMPILER_FULL_PATH)
message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
endif()
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
if(CXX_COMPILER_FULL_PATH)
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
endif()

View File

@ -0,0 +1,25 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR ppc64le)
set(CMAKE_SYSTEM_VERSION 1)
set(CMAKE_C_COMPILER_TARGET "powerpc64le-linux-gnu")
set(CMAKE_CXX_COMPILER_TARGET "powerpc64le-linux-gnu")
set(CMAKE_CROSSCOMPILING TRUE)
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc64le -L /usr/${CMAKE_C_COMPILER_TARGET}/)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
if(NOT C_COMPILER_FULL_PATH)
message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
endif()
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
if(CXX_COMPILER_FULL_PATH)
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
endif()

View File

@ -0,0 +1,25 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR s390x)
set(CMAKE_SYSTEM_VERSION 1)
set(CMAKE_C_COMPILER_TARGET "s390x-linux-gnu")
set(CMAKE_CXX_COMPILER_TARGET "s390x-linux-gnu")
set(CMAKE_CROSSCOMPILING TRUE)
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-s390x -L /usr/${CMAKE_C_COMPILER_TARGET}/)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
if(NOT C_COMPILER_FULL_PATH)
message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
endif()
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
if(CXX_COMPILER_FULL_PATH)
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
endif()

View File

@ -0,0 +1,25 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR sparc64)
set(CMAKE_SYSTEM_VERSION 1)
set(CMAKE_C_COMPILER_TARGET "sparc64-linux-gnu")
set(CMAKE_CXX_COMPILER_TARGET "sparc64-linux-gnu")
set(CMAKE_CROSSCOMPILING TRUE)
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-sparc64 -L /usr/${CMAKE_C_COMPILER_TARGET}/)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
if(NOT C_COMPILER_FULL_PATH)
message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
endif()
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
if(CXX_COMPILER_FULL_PATH)
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
endif()

186
libs/zlibng/compare258.c Normal file
View File

@ -0,0 +1,186 @@
/* compare258.c -- aligned and unaligned versions of compare258
* Copyright (C) 2020 Nathan Moinvaziri
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "zutil.h"
#include "fallback_builtins.h"
/* ALIGNED, byte comparison */
static inline uint32_t compare256_c_static(const unsigned char *src0, const unsigned char *src1) {
uint32_t len = 0;
do {
if (*src0 != *src1)
return len + (*src0 == *src1);
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len + (*src0 == *src1);
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len + (*src0 == *src1);
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len + (*src0 == *src1);
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len + (*src0 == *src1);
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len + (*src0 == *src1);
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len + (*src0 == *src1);
src0 += 1, src1 += 1, len += 1;
if (*src0 != *src1)
return len + (*src0 == *src1);
src0 += 1, src1 += 1, len += 1;
} while (len < 256);
return 256;
}
static inline uint32_t compare258_c_static(const unsigned char *src0, const unsigned char *src1) {
if (*src0 != *src1)
return 0;
src0 += 1, src1 += 1;
if (*src0 != *src1)
return 1;
src0 += 1, src1 += 1;
return compare256_c_static(src0, src1) + 2;
}
Z_INTERNAL uint32_t compare258_c(const unsigned char *src0, const unsigned char *src1) {
return compare258_c_static(src0, src1);
}
#define LONGEST_MATCH longest_match_c
#define COMPARE256 compare256_c_static
#define COMPARE258 compare258_c_static
#include "match_tpl.h"
#ifdef UNALIGNED_OK
/* UNALIGNED_OK, 16-bit integer comparison */
static inline uint32_t compare256_unaligned_16_static(const unsigned char *src0, const unsigned char *src1) {
uint32_t len = 0;
do {
if (*(uint16_t *)src0 != *(uint16_t *)src1)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;
if (*(uint16_t *)src0 != *(uint16_t *)src1)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;
if (*(uint16_t *)src0 != *(uint16_t *)src1)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;
if (*(uint16_t *)src0 != *(uint16_t *)src1)
return len + (*src0 == *src1);
src0 += 2, src1 += 2, len += 2;
} while (len < 256);
return 256;
}
static inline uint32_t compare258_unaligned_16_static(const unsigned char *src0, const unsigned char *src1) {
if (*(uint16_t *)src0 != *(uint16_t *)src1)
return (*src0 == *src1);
return compare256_unaligned_16_static(src0+2, src1+2) + 2;
}
Z_INTERNAL uint32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char *src1) {
return compare258_unaligned_16_static(src0, src1);
}
#define LONGEST_MATCH longest_match_unaligned_16
#define COMPARE256 compare256_unaligned_16_static
#define COMPARE258 compare258_unaligned_16_static
#include "match_tpl.h"
#ifdef HAVE_BUILTIN_CTZ
/* UNALIGNED_OK, 32-bit integer comparison */
static inline uint32_t compare256_unaligned_32_static(const unsigned char *src0, const unsigned char *src1) {
uint32_t len = 0;
do {
uint32_t sv = *(uint32_t *)src0;
uint32_t mv = *(uint32_t *)src1;
uint32_t diff = sv ^ mv;
if (diff) {
uint32_t match_byte = __builtin_ctz(diff) / 8;
return len + match_byte;
}
src0 += 4, src1 += 4, len += 4;
} while (len < 256);
return 256;
}
static inline uint32_t compare258_unaligned_32_static(const unsigned char *src0, const unsigned char *src1) {
if (*(uint16_t *)src0 != *(uint16_t *)src1)
return (*src0 == *src1);
return compare256_unaligned_32_static(src0+2, src1+2) + 2;
}
Z_INTERNAL uint32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char *src1) {
return compare258_unaligned_32_static(src0, src1);
}
#define LONGEST_MATCH longest_match_unaligned_32
#define COMPARE256 compare256_unaligned_32_static
#define COMPARE258 compare258_unaligned_32_static
#include "match_tpl.h"
#endif
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
/* UNALIGNED64_OK, 64-bit integer comparison */
static inline uint32_t compare256_unaligned_64_static(const unsigned char *src0, const unsigned char *src1) {
uint32_t len = 0;
do {
uint64_t sv = *(uint64_t *)src0;
uint64_t mv = *(uint64_t *)src1;
uint64_t diff = sv ^ mv;
if (diff) {
uint64_t match_byte = __builtin_ctzll(diff) / 8;
return len + (uint32_t)match_byte;
}
src0 += 8, src1 += 8, len += 8;
} while (len < 256);
return 256;
}
static inline uint32_t compare258_unaligned_64_static(const unsigned char *src0, const unsigned char *src1) {
if (*(uint16_t *)src0 != *(uint16_t *)src1)
return (*src0 == *src1);
return compare256_unaligned_64_static(src0+2, src1+2) + 2;
}
Z_INTERNAL uint32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char *src1) {
return compare258_unaligned_64_static(src0, src1);
}
#define LONGEST_MATCH longest_match_unaligned_64
#define COMPARE256 compare256_unaligned_64_static
#define COMPARE258 compare258_unaligned_64_static
#include "match_tpl.h"
#endif
#endif

View File

@ -3,8 +3,6 @@
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#define ZLIB_INTERNAL
#include "zbuild.h"
#if defined(ZLIB_COMPAT)
@ -24,7 +22,7 @@
memory, Z_BUF_ERROR if there was not enough room in the output buffer,
Z_STREAM_ERROR if the level parameter is invalid.
*/
int ZEXPORT PREFIX(compress2)(unsigned char *dest, z_size_t *destLen, const unsigned char *source,
int Z_EXPORT PREFIX(compress2)(unsigned char *dest, z_size_t *destLen, const unsigned char *source,
z_size_t sourceLen, int level) {
PREFIX3(stream) stream;
int err;
@ -44,7 +42,7 @@ int ZEXPORT PREFIX(compress2)(unsigned char *dest, z_size_t *destLen, const unsi
stream.next_out = dest;
stream.avail_out = 0;
stream.next_in = (const unsigned char *)source;
stream.next_in = (z_const unsigned char *)source;
stream.avail_in = 0;
do {
@ -66,7 +64,7 @@ int ZEXPORT PREFIX(compress2)(unsigned char *dest, z_size_t *destLen, const unsi
/* ===========================================================================
*/
int ZEXPORT PREFIX(compress)(unsigned char *dest, z_size_t *destLen, const unsigned char *source, z_size_t sourceLen) {
int Z_EXPORT PREFIX(compress)(unsigned char *dest, z_size_t *destLen, const unsigned char *source, z_size_t sourceLen) {
return PREFIX(compress2)(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
}
@ -74,6 +72,12 @@ int ZEXPORT PREFIX(compress)(unsigned char *dest, z_size_t *destLen, const unsig
If the default memLevel or windowBits for deflateInit() is changed, then
this function needs to be updated.
*/
z_size_t ZEXPORT PREFIX(compressBound)(z_size_t sourceLen) {
z_size_t Z_EXPORT PREFIX(compressBound)(z_size_t sourceLen) {
#ifndef NO_QUICK_STRATEGY
/* Quick deflate strategy worse case is 9 bits per literal, rounded to nearest byte,
plus the size of block & gzip headers and footers */
return sourceLen + ((sourceLen + 13 + 7) >> 3) + 18;
#else
return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + (sourceLen >> 25) + 13;
#endif
}

560
libs/zlibng/configure vendored
View File

@ -23,9 +23,11 @@ BUILDDIR=$(pwd)
# set command prefix for cross-compilation
if [ -n "${CHOST}" ]; then
uname="`echo "${CHOST}" | sed -e 's/^[^-]*-\([^-]*\)$/\1/' -e 's/^[^-]*-[^-]*-\([^-]*\)$/\1/' -e 's/^[^-]*-[^-]*-\([^-]*\)-.*$/\1/'`"
# normalize the chost before parsing it
NORM_CHOST=$(sh "$SRCDIR"/tools/config.sub $CHOST)
uname="$(echo "${NORM_CHOST}" | sed -e 's/^[^-]*-\([^-]*\)$/\1/' -e 's/^[^-]*-[^-]*-\([^-]*\)$/\1/' -e 's/^[^-]*-[^-]*-\([^-]*\)-.*$/\1/')"
CROSS_PREFIX="${CHOST}-"
ARCH="`echo "${CHOST}" | sed -e 's/-.*//'`"
ARCH="$(echo "${NORM_CHOST}" | sed -e 's/-.*//')"
else
ARCH="`uname -m`"
fi
@ -68,8 +70,8 @@ fi
# set defaults before processing command line options
LDCONFIG=${LDCONFIG-"ldconfig"}
LDFLAGS=${LDFLAGS-"-L."}
LDSHAREDLIBC="${LDSHAREDLIBC--lc}"
LDFLAGS=${LDFLAGS}
LDSHAREDLIBC="${LDSHAREDLIBC}"
DEFFILE=
RC=
RCFLAGS=
@ -85,7 +87,7 @@ includedir=${includedir-'${prefix}/include'}
mandir=${mandir-'${prefix}/share/man'}
shared_ext='.so'
shared=1
gzfileops=0
gzfileops=1
compat=0
cover=0
build32=0
@ -94,16 +96,19 @@ buildacle=1
buildneon=1
builddfltccdeflate=0
builddfltccinflate=0
with_sanitizers=0
with_msan=0
with_sanitizer=""
with_fuzzers=0
floatabi=
native=0
forcesse2=0
avx2flag="-mavx2"
sse2flag="-msse2"
ssse3flag="-mssse3"
sse4flag="-msse4"
sse42flag="-msse4.2"
pclmulflag="-mpclmul"
acleflag=
neonflag=
without_optimizations=0
without_new_strategies=0
gcc=0
@ -143,7 +148,7 @@ case "$1" in
echo ' [--warn] Enables extra compiler warnings' | tee -a configure.log
echo ' [--debug] Enables extra debug prints during operation' | tee -a configure.log
echo ' [--zlib-compat] Compiles for zlib-compatible API instead of zlib-ng API' | tee -a configure.log
echo ' [--with-gzfileops] Compiles with the gzfile parts of the API enabled' | tee -a configure.log
echo ' [--without-gzfileops] Compiles with the gzfile parts of the API enabled' | tee -a configure.log
echo ' [--without-optimizations] Compiles without support for optional instruction sets' | tee -a configure.log
echo ' [--without-new-strategies] Compiles without using new additional deflate strategies' | tee -a configure.log
echo ' [--without-acle] Compiles without ARM C Language Extensions' | tee -a configure.log
@ -151,9 +156,9 @@ case "$1" in
echo ' [--with-dfltcc-deflate] Use DEFLATE CONVERSION CALL instruction for compression on IBM Z' | tee -a configure.log
echo ' [--with-dfltcc-inflate] Use DEFLATE CONVERSION CALL instruction for decompression on IBM Z' | tee -a configure.log
echo ' [--force-sse2] Assume SSE2 instructions are always available (disabled by default on x86, enabled on x86_64)' | tee -a configure.log
echo ' [--with-sanitizers] Build with address sanitizer and all supported sanitizers other than memory sanitizer (disabled by default)' | tee -a configure.log
echo ' [--with-msan] Build with memory sanitizer (disabled by default)' | tee -a configure.log
echo ' [--with-sanitizer] Build with sanitizer (memory, address, undefined)' | tee -a configure.log
echo ' [--with-fuzzers] Build test/fuzz (disabled by default)' | tee -a configure.log
echo ' [--native] Compiles with full instruction set supported on this host' | tee -a configure.log
exit 0 ;;
-p*=* | --prefix=*) prefix=`echo $1 | sed 's/.*=//'`; shift ;;
-e*=* | --eprefix=*) exec_prefix=`echo $1 | sed 's/.*=//'`; shift ;;
@ -168,7 +173,7 @@ case "$1" in
-s* | --shared | --enable-shared) shared=1; shift ;;
-t | --static) shared=0; shift ;;
--zlib-compat) compat=1; shift ;;
--with-gzfileops) gzfileops=1; shift ;;
--without-gzfileops) gzfileops=0; shift ;;
--cover) cover=1; shift ;;
-3* | --32) build32=1; shift ;;
-6* | --64) build64=1; shift ;;
@ -185,8 +190,7 @@ case "$1" in
-oldstrat | --without-new-strategies) without_new_strategies=1; shift;;
-w* | --warn) warn=1; shift ;;
-d* | --debug) debug=1; shift ;;
--with-sanitizers) with_sanitizers=1; shift ;;
--with-msan) with_msan=1; shift ;;
--with-sanitizer=*) with_sanitizer=`echo $1 | sed 's/.*=//'`; shift ;;
--with-fuzzers) with_fuzzers=1; shift ;;
*)
@ -325,6 +329,12 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then
else
ARCH=native
fi ;;
powerpc | ppc)
ARCH=powerpc ;;
powerpc64 | ppc64)
ARCH=powerpc64 ;;
powerpc64le | ppc64le)
ARCH=powerpc64le ;;
esac
CFLAGS="-O2 ${CFLAGS}"
if test -n "${ARCHS}"; then
@ -332,13 +342,19 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then
LDFLAGS="${LDFLAGS} ${ARCHS}"
fi
CFLAGS="${CFLAGS} -Wall"
SFLAGS="-O2 ${CFLAGS} -fPIC"
SFLAGS="${CFLAGS} -fPIC"
if test $native -eq 1; then
CFLAGS="${CFLAGS} -march=native"
SFLAGS="${SFLAGS} -march=native"
case $ARCH in
powerpc*)
NATIVE_FLAG="-mcpu=native" ;;
*)
NATIVE_FLAG="-march=native" ;;
esac
CFLAGS="${CFLAGS} ${NATIVE_FLAG}"
SFLAGS="${SFLAGS} ${NATIVE_FLAG}"
fi
if test "$warn" -eq 1; then
CFLAGS="${CFLAGS} -Wextra -Wpedantic"
CFLAGS="${CFLAGS} -Wextra -Wpedantic -Wno-implicit-fallthrough"
fi
if test $debug -eq 1; then
CFLAGS="${CFLAGS} -DZLIB_DEBUG"
@ -350,10 +366,10 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then
case "$uname" in
Linux* | linux* | GNU | GNU/* | solaris*)
LDSHARED=${LDSHARED-"$cc"}
LDSHAREDFLAGS="-shared -Wl,-soname,${LIBNAME}.so.1,--version-script,${SRCDIR}/${MAPNAME}" ;;
LDSHAREDFLAGS="-shared -Wl,-soname,${LIBNAME}.so.${VER1},--version-script,${SRCDIR}/${MAPNAME}" ;;
*BSD | *bsd* | DragonFly)
LDSHARED=${LDSHARED-"$cc"}
LDSHAREDFLAGS="-shared -Wl,-soname,${LIBNAME}.so.1,--version-script,${SRCDIR}/${MAPNAME}"
LDSHAREDFLAGS="-shared -Wl,-soname,${LIBNAME}.so.${VER1},--version-script,${SRCDIR}/${MAPNAME}"
LDCONFIG="ldconfig -m" ;;
CYGWIN* | Cygwin* | cygwin*)
ARFLAGS="rcs"
@ -429,7 +445,7 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then
QNX*) # This is for QNX6. I suppose that the QNX rule below is for QNX2,QNX4
# (alain.bonnefoy@icbt.com)
LDSHARED=${LDSHARED-"$cc"}
LDSHAREDFLAGS="-shared -Wl,-h${LIBNAME}.so.1" ;;
LDSHAREDFLAGS="-shared -Wl,-h${LIBNAME}.so.${VER1}" ;;
HP-UX*)
LDSHARED=${LDSHARED-"$cc"}
LDSHAREDFLAGS="-shared"
@ -457,7 +473,7 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then
ARFLAGS="-o" ;;
aarch64)
LDSHARED=${LDSHARED-"$cc"}
LDSHAREDFLAGS="-shared -Wl,-soname,${LIBNAME}.so.1 -Wl,--version-script,${SRCDIR}/${MAPNAME}"
LDSHAREDFLAGS="-shared -Wl,-soname,${LIBNAME}.so.${VER1} -Wl,--version-script,${SRCDIR}/${MAPNAME}"
LDSHAREDLIBC="-Wl,--start-group -lc -lrdimon -Wl,--end-group" ;;
*)
LDSHARED=${LDSHARED-"$cc"}
@ -555,14 +571,10 @@ fi
echo >> configure.log
if test $with_sanitizers -eq 1; then
if test $with_msan -eq 1; then
echo "Error: --with-sanitizers and --with-msan cannot be used together"
exit 1
fi
echo -n "Checking for sanitizers ASan/UBSan... " | tee -a configure.log
if test "$with_sanitizer" = "address"; then
echo -n "Checking for address sanitizer... " | tee -a configure.log
sanitizers=""
for san in bool address array-bounds float-divide-by-zero function integer-divide-by-zero return shift signed-integer-overflow undefined unsigned-integer-overflow vla-bound vptr; do
for san in address pointer-compare pointer-subtract; do
if try $CC -c $CFLAGS $test.c -fsanitize=$san ; then
if test -n "$sanitizers"; then
sanitizers="$sanitizers,$san"
@ -581,11 +593,21 @@ if test $with_sanitizers -eq 1; then
echo No | tee -a configure.log
fi
echo -n "Checking for leak sanitizer... " | tee -a configure.log
if try $CC -c $CFLAGS $test.c -fsanitize=leak; then
echo "-fsanitize=leak" | tee -a configure.log
CFLAGS="$CFLAGS -fsanitize=leak"
SFLAGS="$SFLAGS -fsanitize=leak"
LDFLAGS="$LDFLAGS -fsanitize=leak"
else
echo No | tee -a configure.log
fi
echo >> configure.log
fi
if test $with_msan -eq 1; then
echo -n "Checking for MSan... " | tee -a configure.log
if test "$with_sanitizer" = "memory"; then
echo -n "Checking for memory sanitizer... " | tee -a configure.log
if try $CC -c $CFLAGS $test.c -fsanitize=memory ; then
echo "-fsanitize=memory" | tee -a configure.log
CFLAGS="$CFLAGS -fsanitize=memory"
@ -598,6 +620,31 @@ if test $with_msan -eq 1; then
echo >> configure.log
fi
if test "$with_sanitizer" = "undefined"; then
echo -n "Checking for undefined behavior sanitizer... " | tee -a configure.log
sanitizers=""
for san in array-bounds bool bounds builtin enum float-cast-overflow float-divide-by-zero function integer-divide-by-zero local-bounds null nonnull-attribute object-size pointer-overflow return returns-nonnull-attribute shift shift-base shift-exponent signed-integer-overflow undefined unsigned-integer-overflow unsigned-shift-base vla-bound vptr; do
if try $CC -c $CFLAGS $test.c -fsanitize=$san; then
if test -n "$sanitizers"; then
sanitizers="$sanitizers,$san"
else
sanitizers="$san"
fi
fi
done
if test -n "$sanitizers"; then
echo "-fsanitize=$sanitizers" | tee -a configure.log
CFLAGS="$CFLAGS -fsanitize=$sanitizers"
SFLAGS="$SFLAGS -fsanitize=$sanitizers"
LDFLAGS="$LDFLAGS -fsanitize=$sanitizers"
else
echo No | tee -a configure.log
fi
echo >> configure.log
fi
# see if shared library build supported
cat > $test.c <<EOF
extern int getchar();
@ -643,8 +690,6 @@ EOF
if try $CC -c $CFLAGS -D_LARGEFILE64_SOURCE=1 $test.c; then
CFLAGS="${CFLAGS} -D_LARGEFILE64_SOURCE=1"
SFLAGS="${SFLAGS} -D_LARGEFILE64_SOURCE=1"
ALL="${ALL} all64"
TEST="${TEST} test64"
echo "Checking for off64_t... Yes." | tee -a configure.log
echo "Checking for fseeko... Yes." | tee -a configure.log
else
@ -659,8 +704,6 @@ int main() {
EOF
if try $CC $CFLAGS -o $test $test.c $LDSHAREDLIBC; then
echo "Checking for _off64_t... Yes." | tee -a configure.log
ALL="${ALL} all64"
TEST="${TEST} test64"
else
echo "Checking for _off64_t... No." | tee -a configure.log
fi
@ -813,17 +856,29 @@ else
leave 1
fi
# Check for -fno-semantic-interposition compiler support
echo "" > test.c
cat > $test.c <<EOF
int main() { return 0; }
EOF
if test "$gcc" -eq 1 && ($cc $CFLAGS -fno-semantic-interposition -c $test.c) >> configure.log 2>&1; then
echo "Checking for -no-semantic-interposition... Yes." | tee -a configure.log
SFLAGS="$SFLAGS -fno-semantic-interposition"
else
echo "Checking for -no-semantic-interposition... No." | tee -a configure.log
fi
# see if we can hide zlib internal symbols that are linked between separate source files using hidden
if test "$gcc" -eq 1; then
echo >> configure.log
cat > $test.c <<EOF
#define ZLIB_INTERNAL __attribute__((visibility ("hidden")))
int ZLIB_INTERNAL foo;
#define Z_INTERNAL __attribute__((visibility ("hidden")))
int Z_INTERNAL foo;
int main() { return 0; }
EOF
if tryboth $CC -c $CFLAGS $test.c; then
CFLAGS="$CFLAGS -DHAVE_HIDDEN"
SFLAGS="$SFLAGS -DHAVE_HIDDEN"
CFLAGS="$CFLAGS -DHAVE_VISIBILITY_HIDDEN"
SFLAGS="$SFLAGS -DHAVE_VISIBILITY_HIDDEN"
echo >> configure.log
echo "Checking for attribute(visibility(hidden)) support... Yes." | tee -a configure.log
else
@ -836,13 +891,13 @@ fi
if test "$gcc" -eq 1; then
echo >> configure.log
cat > $test.c <<EOF
#define ZLIB_INTERNAL __attribute__((visibility ("internal")))
int ZLIB_INTERNAL foo;
#define Z_INTERNAL __attribute__((visibility ("internal")))
int Z_INTERNAL foo;
int main() { return 0; }
EOF
if tryboth $CC -c $CFLAGS $test.c; then
CFLAGS="$CFLAGS -DHAVE_INTERNAL"
SFLAGS="$SFLAGS -DHAVE_INTERNAL"
CFLAGS="$CFLAGS -DHAVE_VISIBILITY_INTERNAL"
SFLAGS="$SFLAGS -DHAVE_VISIBILITY_INTERNAL"
echo >> configure.log
echo "Checking for attribute(visibility(internal)) support... Yes." | tee -a configure.log
else
@ -851,26 +906,43 @@ EOF
fi
fi
# Check for __builtin_ctzl() support in compiler
# Check for __builtin_ctz() support in compiler
cat > $test.c << EOF
int main(void) {
unsigned int zero = 0;
long test = __builtin_ctzl(zero);
long test = __builtin_ctz(zero);
(void)test;
return 0;
}
EOF
if try ${CC} ${CFLAGS} $test.c $LDSHAREDLIBC; then
echo "Checking for __builtin_ctzl ... Yes." | tee -a configure.log
CFLAGS="$CFLAGS -DHAVE_BUILTIN_CTZL"
SFLAGS="$SFLAGS -DHAVE_BUILTIN_CTZL"
echo "Checking for __builtin_ctz ... Yes." | tee -a configure.log
CFLAGS="$CFLAGS -DHAVE_BUILTIN_CTZ"
SFLAGS="$SFLAGS -DHAVE_BUILTIN_CTZ"
else
echo "Checking for __builtin_ctzl ... No." | tee -a configure.log
echo "Checking for __builtin_ctz ... No." | tee -a configure.log
fi
# Check for __builtin_ctzll() support in compiler
cat > $test.c << EOF
int main(void) {
unsigned long long zero = 0;
long test = __builtin_ctzll(zero);
(void)test;
return 0;
}
EOF
if try ${CC} ${CFLAGS} $test.c $LDSHAREDLIBC; then
echo "Checking for __builtin_ctzll ... Yes." | tee -a configure.log
CFLAGS="$CFLAGS -DHAVE_BUILTIN_CTZLL"
SFLAGS="$SFLAGS -DHAVE_BUILTIN_CTZLL"
else
echo "Checking for __builtin_ctzll ... No." | tee -a configure.log
fi
# Check for SSE2 intrinsics
case "${ARCH}" in
i386 | i486 | i586 | i686)
i386 | i486 | i586 | i686 | x86_64)
cat > $test.c << EOF
#include <immintrin.h>
int main(void) {
@ -889,6 +961,48 @@ EOF
;;
esac
# Check for SSSE3 intrinsics
cat > $test.c << EOF
#include <x86intrin.h>
int main(void)
{
__m128i u, v, w;
u = _mm_set1_epi32(1);
v = _mm_set1_epi32(2);
w = _mm_hadd_epi32(u, v);
(void)w;
return 0;
}
EOF
if try ${CC} ${CFLAGS} ${ssse3flag} $test.c; then
echo "Checking for SSSE3 intrinsics ... Yes." | tee -a configure.log
HAVE_SSSE3_INTRIN=1
else
echo "Checking for SSSE3 intrinsics ... No." | tee -a configure.log
HAVE_SSSE3_INTRIN=0
fi
# Check for SSE4.2 CRC inline assembly
case "${ARCH}" in
i386 | i486 | i586 | i686 | x86_64)
cat > $test.c << EOF
int main(void) {
unsigned val = 0, h = 0;
__asm__ __volatile__ ( "crc32 %1,%0" : "+r" (h) : "r" (val) );
return (int) h;
}
EOF
if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
echo "Checking for SSE4.2 CRC inline assembly ... Yes." | tee -a configure.log
HAVE_SSE42CRC_INLINE_ASM=1
else
echo "Checking for SSE4.2 CRC inline assembly ... No." | tee -a configure.log
HAVE_SSE42CRC_INLINE_ASM=0
fi
;;
esac
# Check for SSE4.2 CRC intrinsics
case "${ARCH}" in
i386 | i486 | i586 | i686 | x86_64)
@ -911,6 +1025,31 @@ EOF
;;
esac
# Check for SSE4.2 compare string intrinsics
case "${ARCH}" in
i386 | i486 | i586 | i686 | x86_64)
cat > $test.c << EOF
#include <immintrin.h>
int main(void)
{
unsigned char a[64] = { 0 };
unsigned char b[64] = { 0 };
__m128i xmm_src0, xmm_src1;
xmm_src0 = _mm_loadu_si128((__m128i *)(char *)a);
xmm_src1 = _mm_loadu_si128((__m128i *)(char *)b);
return _mm_cmpestri(xmm_src0, 16, xmm_src1, 16, 0);
}
EOF
if try ${CC} ${CFLAGS} ${sse42flag} $test.c; then
echo "Checking for SSE4.2 compare string intrinsics ... Yes." | tee -a configure.log
HAVE_SSE42CMPSTR_INTRIN=1
else
echo "Checking for SSE4.2 compare string intrinsics ... No." | tee -a configure.log
HAVE_SSE42CMPSTR_INTRIN=0
fi
;;
esac
# Check for PCLMULQDQ intrinsics
case "${ARCH}" in
i386 | i486 | i586 | i686 | x86_64)
@ -933,6 +1072,11 @@ EOF
HAVE_PCLMULQDQ_INTRIN=0
fi
# Enable deflate_medium at level 1
if test $without_new_strategies -eq 1; then
CFLAGS="${CFLAGS} -DNO_QUICK_STRATEGY"
SFLAGS="${SFLAGS} -DNO_QUICK_STRATEGY"
fi
# Enable deflate_medium at level 4-6
if test $without_new_strategies -eq 1; then
CFLAGS="${CFLAGS} -DNO_MEDIUM_STRATEGY"
@ -941,6 +1085,30 @@ EOF
;;
esac
# Check for AVX2 intrinsics
case "${ARCH}" in
i386 | i486 | i586 | i686 | x86_64)
cat > $test.c << EOF
#include <immintrin.h>
int main(void) {
__m256i x = _mm256_set1_epi16(2);
const __m256i y = _mm256_set1_epi16(1);
x = _mm256_subs_epu16(x, y);
(void)x;
return 0;
}
EOF
if try ${CC} ${CFLAGS} ${avx2flag} $test.c; then
echo "Checking for AVX2 intrinsics ... Yes." | tee -a configure.log
HAVE_AVX2_INTRIN=1
else
echo "Checking for AVX2 intrinsics ... No." | tee -a configure.log
HAVE_AVX2_INTRIN=0
fi
;;
esac
# Check whether -mfpu=neon is available on ARM processors.
case "${ARCH}" in
arm*)
@ -957,6 +1125,22 @@ EOF
;;
esac
# Check whether features needed by POWER optimisations are available
case "${ARCH}" in
powerpc*)
cat > $test.c << EOF
#include <sys/auxv.h>
int main() { return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); }
EOF
if try $CC -c $CFLAGS -mcpu=power8 $test.c; then
HAVE_POWER8=1
echo "Check whether POWER8 instructions are available ... Yes." | tee -a configure.log
else
HAVE_POWER8=0
echo "Check whether POWER8 instructions are available ... No." | tee -a configure.log
fi
esac
# Check whether sys/sdt.h is available
cat > $test.c << EOF
#include <sys/sdt.h>
@ -976,81 +1160,66 @@ ARCH_SHARED_OBJS=''
# Set ARCH specific FLAGS
case "${ARCH}" in
# x86 specific optimizations
i386 | i486 | i586 | i686)
# x86/amd64 specific optimizations
i386 | i486 | i586 | i686 |x86_64)
ARCHDIR=arch/x86
CFLAGS="${CFLAGS} -DUNALIGNED_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK"
CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
# Enable arch-specific optimizations?
# Enable arch-specific optimizations
if test $without_optimizations -eq 0; then
CFLAGS="${CFLAGS} -DX86_CPUID"
SFLAGS="${SFLAGS} -DX86_CPUID"
CFLAGS="${CFLAGS} -DX86_FEATURES"
SFLAGS="${SFLAGS} -DX86_FEATURES"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} x86.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} x86.lo"
if test ${HAVE_AVX2_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET"
SFLAGS="${SFLAGS} -DX86_AVX2 -DX86_AVX2_ADLER32 -DX86_AVX_CHUNKSET"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} slide_avx.o chunkset_avx.o compare258_avx.o adler32_avx.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} slide_avx.lo chunkset_avx.lo compare258_avx.lo adler32_avx.lo"
fi
if test ${HAVE_SSE42CRC_INTRIN} -eq 1 || test ${HAVE_SSE42CRC_INLINE_ASM} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSE42_CRC_HASH"
SFLAGS="${SFLAGS} -DX86_SSE42_CRC_HASH"
if test ${HAVE_SSE42CRC_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSE42_CRC_INTRIN"
SFLAGS="${SFLAGS} -DX86_SSE42_CRC_INTRIN"
fi
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} insert_string_sse.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} insert_string_sse.lo"
fi
if test ${HAVE_SSE42CMPSTR_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSE42_CMP_STR"
SFLAGS="${SFLAGS} -DX86_SSE42_CMP_STR"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} compare258_sse.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} compare258_sse.lo"
fi
if test ${HAVE_SSE2_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSE2"
SFLAGS="${SFLAGS} -DX86_SSE2"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} fill_window_sse.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} fill_window_sse.lo"
CFLAGS="${CFLAGS} -DX86_SSE2 -DX86_SSE2_CHUNKSET"
SFLAGS="${SFLAGS} -DX86_SSE2 -DX86_SSE2_CHUNKSET"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} chunkset_sse.o slide_sse.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} chunkset_sse.lo slide_sse.lo"
if test $forcesse2 -eq 1; then
CFLAGS="${CFLAGS} -DX86_NOCHECK_SSE2"
SFLAGS="${SFLAGS} -DX86_NOCHECK_SSE2"
fi
# Enable deflate_quick at level 1?
# requires SSE2: code uses fill_window_sse
if test $without_new_strategies -eq 0; then
CFLAGS="${CFLAGS} -DX86_QUICK_STRATEGY"
SFLAGS="${SFLAGS} -DX86_QUICK_STRATEGY"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} deflate_quick.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} deflate_quick.lo"
fi
fi
if test ${HAVE_SSE42CRC_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSE4_2_CRC_INTRIN"
SFLAGS="${SFLAGS} -DX86_SSE4_2_CRC_INTRIN"
fi
CFLAGS="${CFLAGS} -DX86_SSE4_2_CRC_HASH"
SFLAGS="${SFLAGS} -DX86_SSE4_2_CRC_HASH"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} insert_string_sse.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} insert_string_sse.lo"
if test ${HAVE_PCLMULQDQ_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_PCLMULQDQ_CRC"
SFLAGS="${SFLAGS} -DX86_PCLMULQDQ_CRC"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc_folding.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc_folding.lo"
fi
fi
;;
# x86_64 specific optimizations
x86_64)
ARCHDIR=arch/x86
CFLAGS="${CFLAGS} -DUNALIGNED_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK"
# Enable arch-specific optimizations?
if test $without_optimizations -eq 0; then
CFLAGS="${CFLAGS} -DX86_CPUID -DX86_SSE2 -DX86_SSE4_2_CRC_HASH"
SFLAGS="${SFLAGS} -DX86_CPUID -DX86_SSE2 -DX86_SSE4_2_CRC_HASH"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} x86.o fill_window_sse.o insert_string_sse.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} x86.lo fill_window_sse.lo insert_string_sse.lo"
if test ${HAVE_SSE42CRC_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSE4_2_CRC_INTRIN"
SFLAGS="${SFLAGS} -DX86_SSE4_2_CRC_INTRIN"
if test ${HAVE_SSSE3_INTRIN} -eq 1; then
CFLAGS="${CFLAGS} -DX86_SSSE3 -DX86_SSSE3_ADLER32"
SFLAGS="${SFLAGS} -DX86_SSSE3 -DX86_SSSE3_ADLER32"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_ssse3.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_ssse3.lo"
fi
if test ${HAVE_PCLMULQDQ_INTRIN} -eq 1; then
@ -1059,24 +1228,20 @@ case "${ARCH}" in
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc_folding.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc_folding.lo"
fi
# Enable deflate_quick at level 1?
if test $without_new_strategies -eq 0; then
CFLAGS="${CFLAGS} -DX86_QUICK_STRATEGY"
SFLAGS="${SFLAGS} -DX86_QUICK_STRATEGY"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} deflate_quick.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} deflate_quick.lo"
fi
fi
;;
# ARM specific optimizations
arm | armv[3467]l | armv4b | armv4tl | armv5tel | armv5tejl | armv[67]hl | armv7hnl | armv[78]-a | armv8-a+* | armv8.[1234]-a | armv8.[1234]-a+*)
arm*)
[ ! -z $CROSS_PREFIX ] && QEMU_ARCH=arm
ARCHDIR=arch/arm
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} armfeature.o fill_window_arm.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} armfeature.lo fill_window_arm.lo"
if test $without_optimizations -eq 0; then
CFLAGS="${CFLAGS} -DARM_FEATURES"
SFLAGS="${SFLAGS} -DARM_FEATURES"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} armfeature.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} armfeature.lo"
fi
GCC_MACHINE=$(${CC} -dumpmachine)
case "${GCC_MACHINE}" in
@ -1085,9 +1250,12 @@ case "${ARCH}" in
*gnueabi)
floatabi="-mfloat-abi=softfp" ;;
esac
CFLAGS="${CFLAGS} ${floatabi}"
SFLAGS="${SFLAGS} ${floatabi}"
case "${ARCH}" in
armv[345]*)
if test $without_optimizations -eq 0; then
if test $buildacle -eq 1; then
echo ACLE support not available
fi
@ -1095,11 +1263,13 @@ case "${ARCH}" in
if test $buildneon -eq 1; then
echo NEON support not available
fi
fi
;;
armv6l | armv6hl)
CFLAGS="${CFLAGS} -DUNALIGNED_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK"
if test $without_optimizations -eq 0; then
if test $buildacle -eq 1; then
echo ACLE support not available
fi
@ -1107,68 +1277,76 @@ case "${ARCH}" in
if test $buildneon -eq 1; then
echo NEON support not available
fi
fi
;;
arm | armv7*)
CFLAGS="${CFLAGS} -DUNALIGNED_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK"
if test $without_optimizations -eq 0; then
if test $buildacle -eq 1; then
echo ACLE support not available
fi
if test $buildneon -eq 1; then
CFLAGS="${CFLAGS} ${floatabi} -mfpu=neon -DARM_NEON_ADLER32"
SFLAGS="${SFLAGS} ${floatabi} -mfpu=neon -DARM_NEON_ADLER32"
if test $MFPU_NEON_AVAILABLE -eq 1;then
neonflag="-mfpu=neon"
fi
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo"
CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_neon.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_neon.lo"
fi
fi
;;
armv8-a | armv8-a+simd)
CFLAGS="${CFLAGS} -DUNALIGNED_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK"
CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
if test $without_optimizations -eq 0; then
if test $buildacle -eq 1; then
echo ACLE support not available
fi
if test $buildneon -eq 1; then
CFLAGS="${CFLAGS} ${floatabi}"
SFLAGS="${SFLAGS} ${floatabi}"
if test $MFPU_NEON_AVAILABLE -eq 1;then
CFLAGS="${CFLAGS} -mfpu=neon"
SFLAGS="${SFLAGS} -mfpu=neon"
neonflag="-mfpu=neon"
fi
CFLAGS="${CFLAGS} -DARM_NEON_ADLER32"
SFLAGS="${SFLAGS} -DARM_NEON_ADLER32"
CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_neon.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_neon.lo"
fi
fi
;;
armv8-a+crc | armv8-a+crc+simd | armv8.[1234]-a | armv8.[1234]-a+simd)
CFLAGS="-march=${ARCH} ${CFLAGS} -DARM_ACLE_CRC_HASH -DUNALIGNED_OK"
SFLAGS="-march=${ARCH} ${SFLAGS} -DARM_ACLE_CRC_HASH -DUNALIGNED_OK"
CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
acleflag="-march=${ARCH}"
if test $without_optimizations -eq 0; then
CFLAGS="${CFLAGS} -DARM_ACLE_CRC_HASH"
SFLAGS="${SFLAGS} -DARM_ACLE_CRC_HASH"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_acle.o insert_string_acle.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_acle.lo insert_string_acle.lo"
if test $buildneon -eq 1; then
CFLAGS="${CFLAGS} ${floatabi}"
SFLAGS="${SFLAGS} ${floatabi}"
if test $MFPU_NEON_AVAILABLE -eq 1;then
CFLAGS="${CFLAGS} -mfpu=neon"
SFLAGS="${SFLAGS} -mfpu=neon"
neonflag="-mfpu=neon"
fi
CFLAGS="${CFLAGS} -DARM_NEON_ADLER32"
SFLAGS="${SFLAGS} -DARM_NEON_ADLER32"
CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_neon.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_neon.lo"
fi
fi
;;
esac
@ -1178,14 +1356,19 @@ case "${ARCH}" in
aarch64)
[ ! -z $CROSS_PREFIX ] && QEMU_ARCH=aarch64
ARCHDIR=arch/arm
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} armfeature.o fill_window_arm.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} armfeature.lo fill_window_arm.lo"
if test $native -eq 0; then
ARCH="armv8-a"
else
ARCH="native"
fi
if test $without_optimizations -eq 0; then
CFLAGS="${CFLAGS} -DARM_FEATURES"
SFLAGS="${SFLAGS} -DARM_FEATURES"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} armfeature.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} armfeature.lo"
if test $buildacle -eq 1; then
if test $native -eq 0; then
ARCH="${ARCH}+crc"
@ -1195,31 +1378,61 @@ case "${ARCH}" in
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_acle.o insert_string_acle.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_acle.lo insert_string_acle.lo"
fi
if test $buildneon -eq 1; then
if test $native -eq 0; then
ARCH="${ARCH}+simd"
fi
CFLAGS="${CFLAGS} -DARM_NEON_ADLER32"
SFLAGS="${SFLAGS} -DARM_NEON_ADLER32"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo"
CFLAGS="${CFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
SFLAGS="${SFLAGS} -DARM_NEON_ADLER32 -DARM_NEON_CHUNKSET -DARM_NEON_SLIDEHASH"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} adler32_neon.o chunkset_neon.o slide_neon.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} adler32_neon.lo chunkset_neon.lo slide_neon.lo"
fi
CFLAGS="-march=${ARCH} ${CFLAGS} -DUNALIGNED_OK"
SFLAGS="-march=${ARCH} ${SFLAGS} -DUNALIGNED_OK"
fi
neonflag="-march=${ARCH}"
acleflag="-march=${ARCH}"
CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
;;
powerpc*)
case "${ARCH}" in
powerpc)
[ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc
;;
powerpc64)
[ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc64
;;
powerpc64le)
[ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc64le
CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNALIGNED64_OK"
;;
esac
ARCHDIR=arch/power
if test $without_optimizations -eq 0; then
if test $HAVE_POWER8 -eq 1; then
CFLAGS="${CFLAGS} -DPOWER8 -DPOWER_FEATURES -DPOWER8_VSX_ADLER32 -DPOWER8_VSX_SLIDEHASH"
SFLAGS="${SFLAGS} -DPOWER8 -DPOWER_FEATURES -DPOWER8_VSX_ADLER32 -DPOWER8_VSX_SLIDEHASH"
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} power.o adler32_power8.o slide_hash_power8.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} power.lo adler32_power8.lo slide_hash_power8.lo"
fi
fi
;;
s390x)
[ ! -z $CROSS_PREFIX ] && QEMU_ARCH=s390x
ARCHDIR=arch/s390
if test $without_optimizations -eq 0; then
if test $builddfltccdeflate -eq 1 -o $builddfltccinflate -eq 1; then
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} dfltcc_common.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} dfltcc_common.lo"
fi
if test $builddfltccdeflate -eq 1; then
CFLAGS="${CFLAGS} -DS390_DFLTCC_DEFLATE"
SFLAGS="${SFLAGS} -DS390_DFLTCC_DEFLATE"
@ -1227,6 +1440,7 @@ case "${ARCH}" in
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} dfltcc_deflate.lo"
ARCH="${ARCH}+dfltcc-deflate"
fi
if test $builddfltccinflate -eq 1; then
CFLAGS="${CFLAGS} -DS390_DFLTCC_INFLATE"
SFLAGS="${SFLAGS} -DS390_DFLTCC_INFLATE"
@ -1234,6 +1448,7 @@ case "${ARCH}" in
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} dfltcc_inflate.lo"
ARCH="${ARCH}+dfltcc-inflate"
fi
fi
;;
*)
[ ! -z $CROSS_PREFIX ] && QEMU_ARCH=$ARCH
@ -1286,12 +1501,24 @@ echo prefix = $prefix >> configure.log
echo sharedlibdir = $sharedlibdir >> configure.log
echo uname = $uname >> configure.log
echo sse2flag = $sse2flag >> configure.log
echo ssse3flag = $ssse3flag >> configure.log
echo sse4flag = $sse4flag >> configure.log
echo pclmulflag = $pclmulflag >> configure.log
echo acleflag = $acleflag >> configure.log
echo neonflag = $neonflag >> configure.log
echo ARCHDIR = ${ARCHDIR} >> configure.log
echo ARCH_STATIC_OBJS = ${ARCH_STATIC_OBJS} >> configure.log
echo ARCH_SHARED_OBJS = ${ARCH_SHARED_OBJS} >> configure.log
# Handle sed incompatibilities when using -i
replace_in_file() {
if [ "$OS" = 'Darwin' ]; then
sed -i '.tmp' -e "$1" "$2"
else
sed -i'.tmp' -e "$1" "$2"
fi
}
# update Makefile with the configure results
INCLUDES="-I$SRCDIR"
@ -1347,7 +1574,7 @@ sed < $SRCDIR/Makefile.in "
" > Makefile
# Append header files dependences.
for file in $(ls -1 $SRCDIR/*.c $SRCDIR/test/*.c $SRCDIR/test/fuzz/*.c $SRCDIR/$ARCHDIR/*.c); do
for file in $(ls -1 $SRCDIR/*.c $SRCDIR/test/*.c $SRCDIR/test/fuzz/*.c $SRCDIR/$ARCHDIR/*.c $SRCDIR/tools/*.c); do
short_name=$(echo $file | sed -e "s#$SRCDIR/##g")
incs=$(grep -h include $file | sed -n 's/# *\include *"\(.*\.h\)".*/\1/p' | sort | uniq)
includes=$(for i in $incs; do
@ -1366,17 +1593,7 @@ for file in $(ls -1 $SRCDIR/*.c $SRCDIR/test/*.c $SRCDIR/test/fuzz/*.c $SRCDIR/$
if grep -q "^$obj:" Makefile; then
# Replace the existing line with a line with all dependences.
sed -i "s#$obj:.*#$obj: \$(SRCDIR)/$short_name $includes#g" Makefile
# Special case example64 and minigzip64.
case "$obj" in
example.o)
sed -i "s#example64.o:.*#example64.o: \$(SRCDIR)/$short_name $includes#g" Makefile
;;
minigzip.o)
sed -i "s#minigzip64.o:.*#minigzip64.o: \$(SRCDIR)/$short_name $includes#g" Makefile
;;
esac
$(replace_in_file "s#$obj:.*#$obj: \$(SRCDIR)/$short_name $includes#g" Makefile)
else
# Append at the end of Makefile a new line with the header dependences.
echo "$obj: \$(SRCDIR)/$short_name $includes" >> Makefile
@ -1391,7 +1608,7 @@ for file in $(ls -1 $SRCDIR/*.c $SRCDIR/test/*.c $SRCDIR/test/fuzz/*.c $SRCDIR/$
if grep -q "^$lobj:" Makefile; then
# Replace the existing line with a line with all dependences.
sed -i "s#$lobj:.*#$lobj: \$(SRCDIR)/$short_name $includes#g" Makefile
$(replace_in_file "s#$lobj:.*#$lobj: \$(SRCDIR)/$short_name $includes#g" Makefile)
else
# Append at the end of Makefile a new line with the header dependences.
echo "$lobj: \$(SRCDIR)/$short_name $includes" >> Makefile
@ -1414,9 +1631,13 @@ sed < $SRCDIR/$ARCHDIR/Makefile.in "
/^SRCDIR *=/s#=.*#=$SRCDIR/$ARCHDIR#
/^SRCTOP *=/s#=.*#=$SRCDIR#
/^TOPDIR *=/s#=.*#=$BUILDDIR#
/^AVX2FLAG *=/s#=.*#=$avx2flag#
/^SSE2FLAG *=/s#=.*#=$sse2flag#
/^SSSE3FLAG *=/s#=.*#=$ssse3flag#
/^SSE4FLAG *=/s#=.*#=$sse4flag#
/^PCLMULFLAG *=/s#=.*#=$pclmulflag#
/^ACLEFLAG *=/s#=.*#=$acleflag#
/^NEONFLAG *=/s#=.*#=$neonflag#
" > $ARCHDIR/Makefile
# Append header files dependences.
@ -1438,7 +1659,7 @@ for file in $(ls -1 $SRCDIR/$ARCHDIR/*.c); do
short_name=$(basename $file)
if grep -q "^$obj:" $ARCHDIR/Makefile; then
# Replace the existing line with a line with all dependences.
sed -i "s#$obj:.*#$obj: \$(SRCDIR)/$short_name $includes#g" $ARCHDIR/Makefile
$(replace_in_file "s#$obj:.*#$obj: \$(SRCDIR)/$short_name $includes#g" $ARCHDIR/Makefile)
else
# Append at the end of Makefile a new line with the header dependences.
echo "$obj: \$(SRCDIR)/$short_name $includes" >> $ARCHDIR/Makefile
@ -1446,7 +1667,7 @@ for file in $(ls -1 $SRCDIR/$ARCHDIR/*.c); do
if grep -q "^$lobj:" $ARCHDIR/Makefile; then
# Replace the existing line with a line with all dependences.
sed -i "s#$lobj:.*#$lobj: \$(SRCDIR)/$short_name $includes#g" $ARCHDIR/Makefile
$(replace_in_file "s#$lobj:.*#$lobj: \$(SRCDIR)/$short_name $includes#g" $ARCHDIR/Makefile)
else
# Append at the end of Makefile a new line with the header dependences.
echo "$lobj: \$(SRCDIR)/$short_name $includes" >> $ARCHDIR/Makefile
@ -1468,6 +1689,7 @@ sed < $SRCDIR/test/Makefile.in "
/^COMPATTESTS *=/s#=.*#=$COMPATTESTS#
/^QEMU_RUN *=/s#=.*#=$QEMU_RUN#
/^WITH_FUZZERS *=/s#=.*#=$with_fuzzers#
/^LIBNAME *=/s#=.*#=$LIBNAME#
" > test/Makefile
# create zlib.pc with the configure results

View File

@ -9,251 +9,40 @@
* factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
*/
/* @(#) $Id$ */
#include "zbuild.h"
# include "gzendian.h"
#include "zendian.h"
#include <inttypes.h>
/*
Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
protection on the static variables used to control the first-use generation
of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
first call get_crc_table() to initialize the tables before allowing more than
one thread to use crc32().
DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h. A main()
routine is also produced, so that this one source file can be compiled to an
executable.
*/
#ifdef MAKECRCH
# include <stdio.h>
# ifndef DYNAMIC_CRC_TABLE
# define DYNAMIC_CRC_TABLE
# endif /* !DYNAMIC_CRC_TABLE */
#endif /* MAKECRCH */
#include "deflate.h"
#include "functable.h"
/* Local functions for crc concatenation */
#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */
static uint32_t gf2_matrix_times(const uint32_t *mat, uint32_t vec);
static uint32_t crc32_combine_(uint32_t crc1, uint32_t crc2, z_off64_t len2);
static void crc32_combine_gen_(uint32_t *op, z_off64_t len2);
/* ========================================================================= */
static uint32_t gf2_matrix_times(const uint32_t *mat, uint32_t vec) {
uint32_t sum = 0;
while (vec) {
if (vec & 1)
sum ^= *mat;
vec >>= 1;
mat++;
}
return sum;
}
#ifdef DYNAMIC_CRC_TABLE
volatile int crc_table_empty = 1;
static uint32_t crc_table[8][256];
static uint32_t crc_comb[GF2_DIM][GF2_DIM];
void make_crc_table(void);
static void gf2_matrix_square(uint32_t *square, const uint32_t *mat);
#ifdef MAKECRCH
static void write_table(FILE *, const uint32_t *, int);
#endif /* MAKECRCH */
/* ========================================================================= */
static void gf2_matrix_square(uint32_t *square, const uint32_t *mat) {
int n;
for (n = 0; n < GF2_DIM; n++)
square[n] = gf2_matrix_times(mat, mat[n]);
}
/*
Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
Polynomials over GF(2) are represented in binary, one bit per coefficient,
with the lowest powers in the most significant bit. Then adding polynomials
is just exclusive-or, and multiplying a polynomial by x is a right shift by
one. If we call the above polynomial p, and represent a byte as the
polynomial q, also with the lowest power in the most significant bit (so the
byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
where a mod b means the remainder after dividing a by b.
This calculation is done using the shift-register method of multiplying and
taking the remainder. The register is initialized to zero, and for each
incoming bit, x^32 is added mod p to the register if the bit is a one (where
x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
x (which is shifting right by one and adding x^32 mod p if the bit shifted
out is a one). We start with the highest power (least significant bit) of
q and repeat for all eight bits of q.
The first table is simply the CRC of all possible eight bit values. This is
all the information needed to generate CRCs on data a byte at a time for all
combinations of CRC register values and incoming bytes. The remaining tables
allow for word-at-a-time CRC calculation for both big-endian and little-
endian machines, where a word is four bytes.
*/
void make_crc_table() {
uint32_t c;
int n, k;
uint32_t poly; /* polynomial exclusive-or pattern */
/* terms of polynomial defining this crc (except x^32): */
static volatile int first = 1; /* flag to limit concurrent making */
static const unsigned char p[] = {0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 16, 22, 23, 26};
/* See if another task is already doing this (not thread-safe, but better
than nothing -- significantly reduces duration of vulnerability in
case the advice about DYNAMIC_CRC_TABLE is ignored) */
if (first) {
first = 0;
/* make exclusive-or pattern from polynomial (0xedb88320) */
poly = 0;
for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++)
poly |= (uint32_t)1 << (31 - p[n]);
/* generate a crc for every 8-bit value */
for (n = 0; n < 256; n++) {
c = (uint32_t)n;
for (k = 0; k < 8; k++)
c = c & 1 ? poly ^ (c >> 1) : c >> 1;
crc_table[0][n] = c;
}
/* generate crc for each value followed by one, two, and three zeros,
and then the byte reversal of those as well as the first table */
for (n = 0; n < 256; n++) {
c = crc_table[0][n];
crc_table[4][n] = ZSWAP32(c);
for (k = 1; k < 4; k++) {
c = crc_table[0][c & 0xff] ^ (c >> 8);
crc_table[k][n] = c;
crc_table[k + 4][n] = ZSWAP32(c);
}
}
/* generate zero operators table for crc32_combine() */
/* generate the operator to apply a single zero bit to a CRC -- the
first row adds the polynomial if the low bit is a 1, and the
remaining rows shift the CRC right one bit */
k = GF2_DIM - 3;
crc_comb[k][0] = 0xedb88320UL; /* CRC-32 polynomial */
uint32_t row = 1;
for (n = 1; n < GF2_DIM; n++) {
crc_comb[k][n] = row;
row <<= 1;
}
/* generate operators that apply 2, 4, and 8 zeros to a CRC, putting
the last one, the operator for one zero byte, at the 0 position */
gf2_matrix_square(crc_comb[k + 1], crc_comb[k]);
gf2_matrix_square(crc_comb[k + 2], crc_comb[k + 1]);
gf2_matrix_square(crc_comb[0], crc_comb[k + 2]);
/* generate operators for applying 2^n zero bytes to a CRC, filling out
the remainder of the table -- the operators repeat after GF2_DIM
values of n, so the table only needs GF2_DIM entries, regardless of
the size of the length being processed */
for (n = 1; n < k; n++)
gf2_matrix_square(crc_comb[n], crc_comb[n - 1]);
/* mark tables as complete, in case someone else is waiting */
crc_table_empty = 0;
} else { /* not first */
/* wait for the other guy to finish (not efficient, but rare) */
while (crc_table_empty)
{}
}
#ifdef MAKECRCH
{
FILE *out;
out = fopen("crc32.h", "w");
if (out == NULL) return;
/* write out CRC table to crc32.h */
fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n");
fprintf(out, " * Generated automatically by crc32.c\n */\n\n");
fprintf(out, "static const uint32_t ");
fprintf(out, "crc_table[8][256] =\n{\n {\n");
write_table(out, crc_table[0], 256);
for (k = 1; k < 8; k++) {
fprintf(out, " },\n {\n");
write_table(out, crc_table[k], 256);
}
fprintf(out, " }\n};\n");
/* write out zero operator table to crc32.h */
fprintf(out, "\nstatic const uint32_t ");
fprintf(out, "crc_comb[%d][%d] =\n{\n {\n", GF2_DIM, GF2_DIM);
write_table(out, crc_comb[0], GF2_DIM);
for (k = 1; k < GF2_DIM; k++) {
fprintf(out, " },\n {\n");
write_table(out, crc_comb[k], GF2_DIM);
}
fprintf(out, " }\n};\n");
fclose(out);
}
#endif /* MAKECRCH */
}
#ifdef MAKECRCH
static void write_table(FILE *out, const uint32_t *table, int k) {
int n;
for (n = 0; n < k; n++)
fprintf(out, "%s0x%08" PRIx32 "%s", n % 5 ? "" : " ",
(uint32_t)(table[n]),
n == k - 1 ? "\n" : (n % 5 == 4 ? ",\n" : ", "));
}
int main()
{
make_crc_table();
return 0;
}
#endif /* MAKECRCH */
#else /* !DYNAMIC_CRC_TABLE */
/* ========================================================================
* Tables of CRC-32s of all single-byte values, made by make_crc_table(),
* and tables of zero operator matrices for crc32_combine().
*/
#include "crc32.h"
#endif /* DYNAMIC_CRC_TABLE */
#include "crc32_tbl.h"
/* =========================================================================
* This function can be used by asm versions of crc32()
*/
const uint32_t * ZEXPORT PREFIX(get_crc_table)(void) {
#ifdef DYNAMIC_CRC_TABLE
if (crc_table_empty)
make_crc_table();
#endif /* DYNAMIC_CRC_TABLE */
const uint32_t * Z_EXPORT PREFIX(get_crc_table)(void) {
return (const uint32_t *)crc_table;
}
uint32_t ZEXPORT PREFIX(crc32_z)(uint32_t crc, const unsigned char *buf, size_t len) {
#ifdef ZLIB_COMPAT
unsigned long Z_EXPORT PREFIX(crc32_z)(unsigned long crc, const unsigned char *buf, size_t len) {
if (buf == NULL) return 0;
return (unsigned long)functable.crc32((uint32_t)crc, buf, len);
}
#else
uint32_t Z_EXPORT PREFIX(crc32_z)(uint32_t crc, const unsigned char *buf, size_t len) {
if (buf == NULL) return 0;
return functable.crc32(crc, buf, len);
}
#endif
/* ========================================================================= */
#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
#define DO4 DO1; DO1; DO1; DO1
/* ========================================================================= */
ZLIB_INTERNAL uint32_t crc32_generic(uint32_t crc, const unsigned char *buf, uint64_t len)
{
Z_INTERNAL uint32_t crc32_generic(uint32_t crc, const unsigned char *buf, uint64_t len) {
crc = crc ^ 0xffffffff;
#ifdef UNROLL_MORE
@ -274,9 +63,15 @@ ZLIB_INTERNAL uint32_t crc32_generic(uint32_t crc, const unsigned char *buf, uin
return crc ^ 0xffffffff;
}
uint32_t ZEXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t len) {
#ifdef ZLIB_COMPAT
unsigned long Z_EXPORT PREFIX(crc32)(unsigned long crc, const unsigned char *buf, unsigned int len) {
return (unsigned long)PREFIX(crc32_z)((uint32_t)crc, buf, len);
}
#else
uint32_t Z_EXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t len) {
return PREFIX(crc32_z)(crc, buf, len);
}
#endif
/*
This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit
@ -298,9 +93,9 @@ uint32_t ZEXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t
#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
/* ========================================================================= */
ZLIB_INTERNAL uint32_t crc32_little(uint32_t crc, const unsigned char *buf, uint64_t len) {
register uint32_t c;
register const uint32_t *buf4;
Z_INTERNAL uint32_t crc32_little(uint32_t crc, const unsigned char *buf, uint64_t len) {
Z_REGISTER uint32_t c;
Z_REGISTER const uint32_t *buf4;
c = crc;
c = ~c;
@ -340,9 +135,9 @@ ZLIB_INTERNAL uint32_t crc32_little(uint32_t crc, const unsigned char *buf, uint
#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
/* ========================================================================= */
ZLIB_INTERNAL uint32_t crc32_big(uint32_t crc, const unsigned char *buf, uint64_t len) {
register uint32_t c;
register const uint32_t *buf4;
Z_INTERNAL uint32_t crc32_big(uint32_t crc, const unsigned char *buf, uint64_t len) {
Z_REGISTER uint32_t c;
Z_REGISTER const uint32_t *buf4;
c = ZSWAP32(crc);
c = ~c;
@ -374,45 +169,19 @@ ZLIB_INTERNAL uint32_t crc32_big(uint32_t crc, const unsigned char *buf, uint64_
}
#endif /* BYTE_ORDER == BIG_ENDIAN */
/* ========================================================================= */
static uint32_t crc32_combine_(uint32_t crc1, uint32_t crc2, z_off64_t len2) {
int n;
#ifdef DYNAMIC_CRC_TABLE
if (crc_table_empty)
make_crc_table();
#endif /* DYNAMIC_CRC_TABLE */
if (len2 > 0)
/* operator for 2^n zeros repeats every GF2_DIM n values */
for (n = 0; len2; n = (n + 1) % GF2_DIM, len2 >>= 1)
if (len2 & 1)
crc1 = gf2_matrix_times(crc_comb[n], crc1);
return crc1 ^ crc2;
}
/* ========================================================================= */
uint32_t ZEXPORT PREFIX(crc32_combine)(uint32_t crc1, uint32_t crc2, z_off_t len2) {
return crc32_combine_(crc1, crc2, len2);
}
uint32_t ZEXPORT PREFIX(crc32_combine64)(uint32_t crc1, uint32_t crc2, z_off64_t len2) {
return crc32_combine_(crc1, crc2, len2);
}
#ifdef X86_PCLMULQDQ_CRC
#include "arch/x86/x86.h"
#include "arch/x86/crc_folding.h"
ZLIB_INTERNAL void crc_finalize(deflate_state *const s) {
Z_INTERNAL void crc_finalize(deflate_state *const s) {
if (x86_cpu_has_pclmulqdq)
s->strm->adler = crc_fold_512to32(s);
}
#endif
ZLIB_INTERNAL void crc_reset(deflate_state *const s) {
Z_INTERNAL void crc_reset(deflate_state *const s) {
#ifdef X86_PCLMULQDQ_CRC
x86_check_features();
if (x86_cpu_has_pclmulqdq) {
crc_fold_init(s);
return;
@ -421,7 +190,7 @@ ZLIB_INTERNAL void crc_reset(deflate_state *const s) {
s->strm->adler = PREFIX(crc32)(0L, NULL, 0);
}
ZLIB_INTERNAL void copy_with_crc(PREFIX3(stream) *strm, unsigned char *dst, unsigned long size) {
Z_INTERNAL void copy_with_crc(PREFIX3(stream) *strm, unsigned char *dst, unsigned long size) {
#ifdef X86_PCLMULQDQ_CRC
if (x86_cpu_has_pclmulqdq) {
crc_fold_copy(strm->state, dst, strm->next_in, size);
@ -431,68 +200,3 @@ ZLIB_INTERNAL void copy_with_crc(PREFIX3(stream) *strm, unsigned char *dst, unsi
memcpy(dst, strm->next_in, size);
strm->adler = PREFIX(crc32)(strm->adler, dst, size);
}
/* ========================================================================= */
static void crc32_combine_gen_(uint32_t *op, z_off64_t len2)
{
uint32_t row;
int j;
unsigned i;
#ifdef DYNAMIC_CRC_TABLE
if (crc_table_empty)
make_crc_table();
#endif /* DYNAMIC_CRC_TABLE */
/* if len2 is zero or negative, return the identity matrix */
if (len2 <= 0) {
row = 1;
for (j = 0; j < GF2_DIM; j++) {
op[j] = row;
row <<= 1;
}
return;
}
/* at least one bit in len2 is set -- find it, and copy the operator
corresponding to that position into op */
i = 0;
for (;;) {
if (len2 & 1) {
for (j = 0; j < GF2_DIM; j++)
op[j] = crc_comb[i][j];
break;
}
len2 >>= 1;
i = (i + 1) % GF2_DIM;
}
/* for each remaining bit set in len2 (if any), multiply op by the operator
corresponding to that position */
for (;;) {
len2 >>= 1;
i = (i + 1) % GF2_DIM;
if (len2 == 0)
break;
if (len2 & 1)
for (j = 0; j < GF2_DIM; j++)
op[j] = gf2_matrix_times(crc_comb[i], op[j]);
}
}
/* ========================================================================= */
void ZEXPORT PREFIX(crc32_combine_gen)(uint32_t *op, z_off_t len2)
{
crc32_combine_gen_(op, len2);
}
void ZEXPORT PREFIX(crc32_combine_gen64)(uint32_t *op, z_off64_t len2)
{
crc32_combine_gen_(op, len2);
}
/* ========================================================================= */
uint32_t ZEXPORT PREFIX(crc32_combine_op)(uint32_t crc1, uint32_t crc2, const uint32_t *op)
{
return gf2_matrix_times(op, crc1) ^ crc2;
}

108
libs/zlibng/crc32_comb.c Normal file
View File

@ -0,0 +1,108 @@
/* crc32_comb.c -- compute the CRC-32 of a data stream
* Copyright (C) 1995-2006, 2010, 2011, 2012, 2016, 2018 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*
* Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
* CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing
* tables for updating the shift register in one step with three exclusive-ors
* instead of four steps with four exclusive-ors. This results in about a
* factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
*/
#include "zbuild.h"
#include <inttypes.h>
#include "deflate.h"
#include "crc32_p.h"
#include "crc32_comb_tbl.h"
/* Local functions for crc concatenation */
static uint32_t crc32_combine_(uint32_t crc1, uint32_t crc2, z_off64_t len2);
static void crc32_combine_gen_(uint32_t *op, z_off64_t len2);
/* ========================================================================= */
static uint32_t crc32_combine_(uint32_t crc1, uint32_t crc2, z_off64_t len2) {
int n;
if (len2 > 0)
/* operator for 2^n zeros repeats every GF2_DIM n values */
for (n = 0; len2; n = (n + 1) % GF2_DIM, len2 >>= 1)
if (len2 & 1)
crc1 = gf2_matrix_times(crc_comb[n], crc1);
return crc1 ^ crc2;
}
/* ========================================================================= */
#ifdef ZLIB_COMPAT
unsigned long Z_EXPORT PREFIX(crc32_combine)(unsigned long crc1, unsigned long crc2, z_off_t len2) {
return (unsigned long)crc32_combine_((uint32_t)crc1, (uint32_t)crc2, len2);
}
unsigned long Z_EXPORT PREFIX4(crc32_combine)(unsigned long crc1, unsigned long crc2, z_off64_t len2) {
return (unsigned long)crc32_combine_((uint32_t)crc1, (uint32_t)crc2, len2);
}
#else
uint32_t Z_EXPORT PREFIX4(crc32_combine)(uint32_t crc1, uint32_t crc2, z_off64_t len2) {
return crc32_combine_(crc1, crc2, len2);
}
#endif
/* ========================================================================= */
static void crc32_combine_gen_(uint32_t *op, z_off64_t len2) {
uint32_t row;
int j;
unsigned i;
/* if len2 is zero or negative, return the identity matrix */
if (len2 <= 0) {
row = 1;
for (j = 0; j < GF2_DIM; j++) {
op[j] = row;
row <<= 1;
}
return;
}
/* at least one bit in len2 is set -- find it, and copy the operator
corresponding to that position into op */
i = 0;
for (;;) {
if (len2 & 1) {
for (j = 0; j < GF2_DIM; j++)
op[j] = crc_comb[i][j];
break;
}
len2 >>= 1;
i = (i + 1) % GF2_DIM;
}
/* for each remaining bit set in len2 (if any), multiply op by the operator
corresponding to that position */
for (;;) {
len2 >>= 1;
i = (i + 1) % GF2_DIM;
if (len2 == 0)
break;
if (len2 & 1)
for (j = 0; j < GF2_DIM; j++)
op[j] = gf2_matrix_times(crc_comb[i], op[j]);
}
}
/* ========================================================================= */
#ifdef ZLIB_COMPAT
void Z_EXPORT PREFIX(crc32_combine_gen)(uint32_t *op, z_off_t len2) {
crc32_combine_gen_(op, len2);
}
#endif
void Z_EXPORT PREFIX4(crc32_combine_gen)(uint32_t *op, z_off64_t len2) {
crc32_combine_gen_(op, len2);
}
/* ========================================================================= */
uint32_t Z_EXPORT PREFIX(crc32_combine_op)(uint32_t crc1, uint32_t crc2, const uint32_t *op) {
return gf2_matrix_times(op, crc1) ^ crc2;
}

View File

@ -0,0 +1,300 @@
#ifndef CRC32_COMB_TBL_H_
#define CRC32_COMB_TBL_H_
/* crc32_comb_tbl.h -- zero operators table for CRC combine
* Generated automatically by makecrct.c
*/
static const uint32_t crc_comb[32][32] =
{
{
0x77073096, 0xee0e612c, 0x076dc419, 0x0edb8832, 0x1db71064,
0x3b6e20c8, 0x76dc4190, 0xedb88320, 0x00000001, 0x00000002,
0x00000004, 0x00000008, 0x00000010, 0x00000020, 0x00000040,
0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800,
0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000,
0x00020000, 0x00040000, 0x00080000, 0x00100000, 0x00200000,
0x00400000, 0x00800000
},
{
0x191b3141, 0x32366282, 0x646cc504, 0xc8d98a08, 0x4ac21251,
0x958424a2, 0xf0794f05, 0x3b83984b, 0x77073096, 0xee0e612c,
0x076dc419, 0x0edb8832, 0x1db71064, 0x3b6e20c8, 0x76dc4190,
0xedb88320, 0x00000001, 0x00000002, 0x00000004, 0x00000008,
0x00000010, 0x00000020, 0x00000040, 0x00000080, 0x00000100,
0x00000200, 0x00000400, 0x00000800, 0x00001000, 0x00002000,
0x00004000, 0x00008000
},
{
0xb8bc6765, 0xaa09c88b, 0x8f629757, 0xc5b428ef, 0x5019579f,
0xa032af3e, 0x9b14583d, 0xed59b63b, 0x01c26a37, 0x0384d46e,
0x0709a8dc, 0x0e1351b8, 0x1c26a370, 0x384d46e0, 0x709a8dc0,
0xe1351b80, 0x191b3141, 0x32366282, 0x646cc504, 0xc8d98a08,
0x4ac21251, 0x958424a2, 0xf0794f05, 0x3b83984b, 0x77073096,
0xee0e612c, 0x076dc419, 0x0edb8832, 0x1db71064, 0x3b6e20c8,
0x76dc4190, 0xedb88320
},
{
0xccaa009e, 0x4225077d, 0x844a0efa, 0xd3e51bb5, 0x7cbb312b,
0xf9766256, 0x299dc2ed, 0x533b85da, 0xa6770bb4, 0x979f1129,
0xf44f2413, 0x33ef4e67, 0x67de9cce, 0xcfbd399c, 0x440b7579,
0x8816eaf2, 0xcb5cd3a5, 0x4dc8a10b, 0x9b914216, 0xec53826d,
0x03d6029b, 0x07ac0536, 0x0f580a6c, 0x1eb014d8, 0x3d6029b0,
0x7ac05360, 0xf580a6c0, 0x30704bc1, 0x60e09782, 0xc1c12f04,
0x58f35849, 0xb1e6b092
},
{
0xae689191, 0x87a02563, 0xd4314c87, 0x73139f4f, 0xe6273e9e,
0x173f7b7d, 0x2e7ef6fa, 0x5cfdedf4, 0xb9fbdbe8, 0xa886b191,
0x8a7c6563, 0xcf89cc87, 0x44629f4f, 0x88c53e9e, 0xcafb7b7d,
0x4e87f0bb, 0x9d0fe176, 0xe16ec4ad, 0x19ac8f1b, 0x33591e36,
0x66b23c6c, 0xcd6478d8, 0x41b9f7f1, 0x8373efe2, 0xdd96d985,
0x605cb54b, 0xc0b96a96, 0x5a03d36d, 0xb407a6da, 0xb37e4bf5,
0xbd8d91ab, 0xa06a2517
},
{
0xf1da05aa, 0x38c50d15, 0x718a1a2a, 0xe3143454, 0x1d596ee9,
0x3ab2ddd2, 0x7565bba4, 0xeacb7748, 0x0ee7e8d1, 0x1dcfd1a2,
0x3b9fa344, 0x773f4688, 0xee7e8d10, 0x078c1c61, 0x0f1838c2,
0x1e307184, 0x3c60e308, 0x78c1c610, 0xf1838c20, 0x38761e01,
0x70ec3c02, 0xe1d87804, 0x18c1f649, 0x3183ec92, 0x6307d924,
0xc60fb248, 0x576e62d1, 0xaedcc5a2, 0x86c88d05, 0xd6e01c4b,
0x76b13ed7, 0xed627dae
},
{
0x8f352d95, 0xc51b5d6b, 0x5147bc97, 0xa28f792e, 0x9e6ff41d,
0xe7aeee7b, 0x142cdab7, 0x2859b56e, 0x50b36adc, 0xa166d5b8,
0x99bcad31, 0xe8085c23, 0x0b61be07, 0x16c37c0e, 0x2d86f81c,
0x5b0df038, 0xb61be070, 0xb746c6a1, 0xb5fc8b03, 0xb0881047,
0xba6126cf, 0xafb34bdf, 0x841791ff, 0xd35e25bf, 0x7dcd4d3f,
0xfb9a9a7e, 0x2c4432bd, 0x5888657a, 0xb110caf4, 0xb95093a9,
0xa9d02113, 0x88d14467
},
{
0x33fff533, 0x67ffea66, 0xcfffd4cc, 0x448eafd9, 0x891d5fb2,
0xc94bb925, 0x49e6740b, 0x93cce816, 0xfce8d66d, 0x22a0aa9b,
0x45415536, 0x8a82aa6c, 0xce745299, 0x4799a373, 0x8f3346e6,
0xc5178b8d, 0x515e115b, 0xa2bc22b6, 0x9e09432d, 0xe763801b,
0x15b60677, 0x2b6c0cee, 0x56d819dc, 0xadb033b8, 0x80116131,
0xdb53c423, 0x6dd68e07, 0xdbad1c0e, 0x6c2b3e5d, 0xd8567cba,
0x6bddff35, 0xd7bbfe6a
},
{
0xce3371cb, 0x4717e5d7, 0x8e2fcbae, 0xc72e911d, 0x552c247b,
0xaa5848f6, 0x8fc197ad, 0xc4f2291b, 0x52955477, 0xa52aa8ee,
0x9124579d, 0xf939a97b, 0x290254b7, 0x5204a96e, 0xa40952dc,
0x9363a3f9, 0xfdb641b3, 0x201d8527, 0x403b0a4e, 0x8076149c,
0xdb9d2f79, 0x6c4b58b3, 0xd896b166, 0x6a5c648d, 0xd4b8c91a,
0x72009475, 0xe40128ea, 0x13735795, 0x26e6af2a, 0x4dcd5e54,
0x9b9abca8, 0xec447f11
},
{
0x1072db28, 0x20e5b650, 0x41cb6ca0, 0x8396d940, 0xdc5cb4c1,
0x63c86fc3, 0xc790df86, 0x5450b94d, 0xa8a1729a, 0x8a33e375,
0xcf16c0ab, 0x455c8717, 0x8ab90e2e, 0xce031a1d, 0x4777327b,
0x8eee64f6, 0xc6adcfad, 0x562a991b, 0xac553236, 0x83db622d,
0xdcc7c21b, 0x62fe8277, 0xc5fd04ee, 0x508b0f9d, 0xa1161f3a,
0x995d3835, 0xe9cb762b, 0x08e7ea17, 0x11cfd42e, 0x239fa85c,
0x473f50b8, 0x8e7ea170
},
{
0xf891f16f, 0x2a52e49f, 0x54a5c93e, 0xa94b927c, 0x89e622b9,
0xc8bd4333, 0x4a0b8027, 0x9417004e, 0xf35f06dd, 0x3dcf0bfb,
0x7b9e17f6, 0xf73c2fec, 0x35095999, 0x6a12b332, 0xd4256664,
0x733bca89, 0xe6779512, 0x179e2c65, 0x2f3c58ca, 0x5e78b194,
0xbcf16328, 0xa293c011, 0x9e568663, 0xe7dc0a87, 0x14c9134f,
0x2992269e, 0x53244d3c, 0xa6489a78, 0x97e032b1, 0xf4b16323,
0x3213c007, 0x6427800e
},
{
0x88b6ba63, 0xca1c7287, 0x4f49e34f, 0x9e93c69e, 0xe6568b7d,
0x17dc10bb, 0x2fb82176, 0x5f7042ec, 0xbee085d8, 0xa6b00df1,
0x96111da3, 0xf7533d07, 0x35d77c4f, 0x6baef89e, 0xd75df13c,
0x75cae439, 0xeb95c872, 0x0c5a96a5, 0x18b52d4a, 0x316a5a94,
0x62d4b528, 0xc5a96a50, 0x5023d2e1, 0xa047a5c2, 0x9bfe4dc5,
0xec8d9dcb, 0x026a3dd7, 0x04d47bae, 0x09a8f75c, 0x1351eeb8,
0x26a3dd70, 0x4d47bae0
},
{
0x5ad8a92c, 0xb5b15258, 0xb013a2f1, 0xbb5643a3, 0xaddd8107,
0x80ca044f, 0xdae50edf, 0x6ebb1bff, 0xdd7637fe, 0x619d69bd,
0xc33ad37a, 0x5d04a0b5, 0xba09416a, 0xaf638495, 0x85b60f6b,
0xd01d1897, 0x7b4b376f, 0xf6966ede, 0x365ddbfd, 0x6cbbb7fa,
0xd9776ff4, 0x699fd9a9, 0xd33fb352, 0x7d0e60e5, 0xfa1cc1ca,
0x2f4885d5, 0x5e910baa, 0xbd221754, 0xa13528e9, 0x991b5793,
0xe947a967, 0x09fe548f
},
{
0xb566f6e2, 0xb1bceb85, 0xb808d14b, 0xab60a4d7, 0x8db04fef,
0xc011999f, 0x5b52357f, 0xb6a46afe, 0xb639d3bd, 0xb702a13b,
0xb5744437, 0xb1998e2f, 0xb8421a1f, 0xabf5327f, 0x8c9b62bf,
0xc247c33f, 0x5ffe803f, 0xbffd007e, 0xa48b06bd, 0x92670b3b,
0xffbf1037, 0x240f262f, 0x481e4c5e, 0x903c98bc, 0xfb083739,
0x2d616833, 0x5ac2d066, 0xb585a0cc, 0xb07a47d9, 0xbb8589f3,
0xac7a15a7, 0x83852d0f
},
{
0x9d9129bf, 0xe053553f, 0x1bd7ac3f, 0x37af587e, 0x6f5eb0fc,
0xdebd61f8, 0x660bc5b1, 0xcc178b62, 0x435e1085, 0x86bc210a,
0xd6094455, 0x77638eeb, 0xeec71dd6, 0x06ff3ded, 0x0dfe7bda,
0x1bfcf7b4, 0x37f9ef68, 0x6ff3ded0, 0xdfe7bda0, 0x64be7d01,
0xc97cfa02, 0x4988f245, 0x9311e48a, 0xfd52cf55, 0x21d498eb,
0x43a931d6, 0x875263ac, 0xd5d5c119, 0x70da8473, 0xe1b508e6,
0x181b178d, 0x30362f1a
},
{
0x2ee43a2c, 0x5dc87458, 0xbb90e8b0, 0xac50d721, 0x83d0a803,
0xdcd05647, 0x62d1aacf, 0xc5a3559e, 0x5037ad7d, 0xa06f5afa,
0x9bafb3b5, 0xec2e612b, 0x032dc417, 0x065b882e, 0x0cb7105c,
0x196e20b8, 0x32dc4170, 0x65b882e0, 0xcb7105c0, 0x4d930dc1,
0x9b261b82, 0xed3d3145, 0x010b64cb, 0x0216c996, 0x042d932c,
0x085b2658, 0x10b64cb0, 0x216c9960, 0x42d932c0, 0x85b26580,
0xd015cd41, 0x7b5a9cc3
},
{
0x1b4511ee, 0x368a23dc, 0x6d1447b8, 0xda288f70, 0x6f2018a1,
0xde403142, 0x67f164c5, 0xcfe2c98a, 0x44b49555, 0x89692aaa,
0xc9a35315, 0x4837a06b, 0x906f40d6, 0xfbaf87ed, 0x2c2e099b,
0x585c1336, 0xb0b8266c, 0xba014a99, 0xaf739373, 0x859620a7,
0xd05d470f, 0x7bcb885f, 0xf79710be, 0x345f273d, 0x68be4e7a,
0xd17c9cf4, 0x79883fa9, 0xf3107f52, 0x3d51f8e5, 0x7aa3f1ca,
0xf547e394, 0x31fec169
},
{
0xbce15202, 0xa2b3a245, 0x9e1642cb, 0xe75d83d7, 0x15ca01ef,
0x2b9403de, 0x572807bc, 0xae500f78, 0x87d118b1, 0xd4d33723,
0x72d76807, 0xe5aed00e, 0x102ca65d, 0x20594cba, 0x40b29974,
0x816532e8, 0xd9bb6391, 0x6807c163, 0xd00f82c6, 0x7b6e03cd,
0xf6dc079a, 0x36c90975, 0x6d9212ea, 0xdb2425d4, 0x6d394de9,
0xda729bd2, 0x6f9431e5, 0xdf2863ca, 0x6521c1d5, 0xca4383aa,
0x4ff60115, 0x9fec022a
},
{
0xff08e5ef, 0x2560cd9f, 0x4ac19b3e, 0x9583367c, 0xf0776ab9,
0x3b9fd333, 0x773fa666, 0xee7f4ccc, 0x078f9fd9, 0x0f1f3fb2,
0x1e3e7f64, 0x3c7cfec8, 0x78f9fd90, 0xf1f3fb20, 0x3896f001,
0x712de002, 0xe25bc004, 0x1fc68649, 0x3f8d0c92, 0x7f1a1924,
0xfe343248, 0x271962d1, 0x4e32c5a2, 0x9c658b44, 0xe3ba10c9,
0x1c0527d3, 0x380a4fa6, 0x70149f4c, 0xe0293e98, 0x1b237b71,
0x3646f6e2, 0x6c8dedc4
},
{
0x6f76172e, 0xdeec2e5c, 0x66a95af9, 0xcd52b5f2, 0x41d46da5,
0x83a8db4a, 0xdc20b0d5, 0x633067eb, 0xc660cfd6, 0x57b099ed,
0xaf6133da, 0x85b361f5, 0xd017c5ab, 0x7b5e8d17, 0xf6bd1a2e,
0x360b321d, 0x6c16643a, 0xd82cc874, 0x6b2896a9, 0xd6512d52,
0x77d35ce5, 0xefa6b9ca, 0x043c75d5, 0x0878ebaa, 0x10f1d754,
0x21e3aea8, 0x43c75d50, 0x878ebaa0, 0xd46c7301, 0x73a9e043,
0xe753c086, 0x15d6874d
},
{
0x56f5cab9, 0xadeb9572, 0x80a62ca5, 0xda3d5f0b, 0x6f0bb857,
0xde1770ae, 0x675fe71d, 0xcebfce3a, 0x460e9a35, 0x8c1d346a,
0xc34b6e95, 0x5de7db6b, 0xbbcfb6d6, 0xacee6bed, 0x82add19b,
0xde2aa577, 0x67244caf, 0xce48995e, 0x47e034fd, 0x8fc069fa,
0xc4f1d5b5, 0x5292ad2b, 0xa5255a56, 0x913bb2ed, 0xf906639b,
0x297dc177, 0x52fb82ee, 0xa5f705dc, 0x909f0df9, 0xfa4f1db3,
0x2fef3d27, 0x5fde7a4e
},
{
0x385993ac, 0x70b32758, 0xe1664eb0, 0x19bd9b21, 0x337b3642,
0x66f66c84, 0xcdecd908, 0x40a8b451, 0x815168a2, 0xd9d3d705,
0x68d6a84b, 0xd1ad5096, 0x782ba76d, 0xf0574eda, 0x3bdf9bf5,
0x77bf37ea, 0xef7e6fd4, 0x058dd9e9, 0x0b1bb3d2, 0x163767a4,
0x2c6ecf48, 0x58dd9e90, 0xb1bb3d20, 0xb8077c01, 0xab7ffe43,
0x8d8efac7, 0xc06cf3cf, 0x5ba8e1df, 0xb751c3be, 0xb5d2813d,
0xb0d4043b, 0xbad90e37
},
{
0xb4247b20, 0xb339f001, 0xbd02e643, 0xa174cac7, 0x999893cf,
0xe84021df, 0x0bf145ff, 0x17e28bfe, 0x2fc517fc, 0x5f8a2ff8,
0xbf145ff0, 0xa559b9a1, 0x91c27503, 0xf8f5ec47, 0x2a9adecf,
0x5535bd9e, 0xaa6b7b3c, 0x8fa7f039, 0xc43ee633, 0x530cca27,
0xa619944e, 0x97422edd, 0xf5f55bfb, 0x309bb1b7, 0x6137636e,
0xc26ec6dc, 0x5fac8bf9, 0xbf5917f2, 0xa5c329a5, 0x90f7550b,
0xfa9fac57, 0x2e4e5eef
},
{
0x695186a7, 0xd2a30d4e, 0x7e371cdd, 0xfc6e39ba, 0x23ad7535,
0x475aea6a, 0x8eb5d4d4, 0xc61aafe9, 0x57445993, 0xae88b326,
0x8660600d, 0xd7b1c65b, 0x74128af7, 0xe82515ee, 0x0b3b2d9d,
0x16765b3a, 0x2cecb674, 0x59d96ce8, 0xb3b2d9d0, 0xbc14b5e1,
0xa3586d83, 0x9dc1dd47, 0xe0f2bccf, 0x1a947fdf, 0x3528ffbe,
0x6a51ff7c, 0xd4a3fef8, 0x7236fbb1, 0xe46df762, 0x13aae885,
0x2755d10a, 0x4eaba214
},
{
0x66bc001e, 0xcd78003c, 0x41810639, 0x83020c72, 0xdd751ea5,
0x619b3b0b, 0xc3367616, 0x5d1dea6d, 0xba3bd4da, 0xaf06aff5,
0x857c59ab, 0xd189b517, 0x78626c6f, 0xf0c4d8de, 0x3af8b7fd,
0x75f16ffa, 0xebe2dff4, 0x0cb4b9a9, 0x19697352, 0x32d2e6a4,
0x65a5cd48, 0xcb4b9a90, 0x4de63361, 0x9bcc66c2, 0xece9cbc5,
0x02a291cb, 0x05452396, 0x0a8a472c, 0x15148e58, 0x2a291cb0,
0x54523960, 0xa8a472c0
},
{
0xb58b27b3, 0xb0674927, 0xbbbf940f, 0xac0e2e5f, 0x836d5aff,
0xddabb3bf, 0x6026613f, 0xc04cc27e, 0x5be882bd, 0xb7d1057a,
0xb4d30cb5, 0xb2d71f2b, 0xbedf3817, 0xa6cf766f, 0x96efea9f,
0xf6aed37f, 0x362ca0bf, 0x6c59417e, 0xd8b282fc, 0x6a1403b9,
0xd4280772, 0x732108a5, 0xe642114a, 0x17f524d5, 0x2fea49aa,
0x5fd49354, 0xbfa926a8, 0xa4234b11, 0x93379063, 0xfd1e2687,
0x214d4b4f, 0x429a969e
},
{
0xfe273162, 0x273f6485, 0x4e7ec90a, 0x9cfd9214, 0xe28a2269,
0x1e654293, 0x3cca8526, 0x79950a4c, 0xf32a1498, 0x3d252f71,
0x7a4a5ee2, 0xf494bdc4, 0x32587dc9, 0x64b0fb92, 0xc961f724,
0x49b2e809, 0x9365d012, 0xfdbaa665, 0x20044a8b, 0x40089516,
0x80112a2c, 0xdb535219, 0x6dd7a273, 0xdbaf44e6, 0x6c2f8f8d,
0xd85f1f1a, 0x6bcf3875, 0xd79e70ea, 0x744de795, 0xe89bcf2a,
0x0a469815, 0x148d302a
},
{
0xd3c98813, 0x7ce21667, 0xf9c42cce, 0x28f95fdd, 0x51f2bfba,
0xa3e57f74, 0x9cbbf8a9, 0xe206f713, 0x1f7ce867, 0x3ef9d0ce,
0x7df3a19c, 0xfbe74338, 0x2cbf8031, 0x597f0062, 0xb2fe00c4,
0xbe8d07c9, 0xa66b09d3, 0x97a715e7, 0xf43f2d8f, 0x330f5d5f,
0x661ebabe, 0xcc3d757c, 0x430becb9, 0x8617d972, 0xd75eb4a5,
0x75cc6f0b, 0xeb98de16, 0x0c40ba6d, 0x188174da, 0x3102e9b4,
0x6205d368, 0xc40ba6d0
},
{
0xf7d6deb4, 0x34dcbb29, 0x69b97652, 0xd372eca4, 0x7d94df09,
0xfb29be12, 0x2d227a65, 0x5a44f4ca, 0xb489e994, 0xb262d569,
0xbfb4ac93, 0xa4185f67, 0x9341b88f, 0xfdf2775f, 0x2095e8ff,
0x412bd1fe, 0x8257a3fc, 0xdfde41b9, 0x64cd8533, 0xc99b0a66,
0x4847128d, 0x908e251a, 0xfa6d4c75, 0x2fab9eab, 0x5f573d56,
0xbeae7aac, 0xa62df319, 0x972ae073, 0xf524c6a7, 0x31388b0f,
0x6271161e, 0xc4e22c3c
},
{
0xedb88320, 0x00000001, 0x00000002, 0x00000004, 0x00000008,
0x00000010, 0x00000020, 0x00000040, 0x00000080, 0x00000100,
0x00000200, 0x00000400, 0x00000800, 0x00001000, 0x00002000,
0x00004000, 0x00008000, 0x00010000, 0x00020000, 0x00040000,
0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000,
0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000,
0x20000000, 0x40000000
},
{
0x76dc4190, 0xedb88320, 0x00000001, 0x00000002, 0x00000004,
0x00000008, 0x00000010, 0x00000020, 0x00000040, 0x00000080,
0x00000100, 0x00000200, 0x00000400, 0x00000800, 0x00001000,
0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000,
0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000,
0x00800000, 0x01000000, 0x02000000, 0x04000000, 0x08000000,
0x10000000, 0x20000000
},
{
0x1db71064, 0x3b6e20c8, 0x76dc4190, 0xedb88320, 0x00000001,
0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020,
0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400,
0x00000800, 0x00001000, 0x00002000, 0x00004000, 0x00008000,
0x00010000, 0x00020000, 0x00040000, 0x00080000, 0x00100000,
0x00200000, 0x00400000, 0x00800000, 0x01000000, 0x02000000,
0x04000000, 0x08000000
}
};
#endif /* CRC32_COMB_TBL_H_ */

19
libs/zlibng/crc32_p.h Normal file
View File

@ -0,0 +1,19 @@
#ifndef CRC32_P_H_
#define CRC32_P_H_
#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */
static inline uint32_t gf2_matrix_times(const uint32_t *mat, uint32_t vec) {
uint32_t sum = 0;
while (vec) {
if (vec & 1)
sum ^= *mat;
vec >>= 1;
mat++;
}
return sum;
}
#endif /* CRC32_P_H_ */

View File

@ -1,8 +1,8 @@
#ifndef CRC32_H_
#define CRC32_H_
#ifndef CRC32_TBL_H_
#define CRC32_TBL_H_
/* crc32.h -- tables for rapid CRC calculation
* Generated automatically by crc32.c
/* crc32_tbl.h -- tables for rapid CRC calculation
* Generated automatically by makecrct.c
*/
static const uint32_t crc_table[8][256] =
@ -441,295 +441,4 @@ static const uint32_t crc_table[8][256] =
}
};
static const uint32_t crc_comb[32][32] =
{
{
0x77073096UL, 0xee0e612cUL, 0x076dc419UL, 0x0edb8832UL, 0x1db71064UL,
0x3b6e20c8UL, 0x76dc4190UL, 0xedb88320UL, 0x00000001UL, 0x00000002UL,
0x00000004UL, 0x00000008UL, 0x00000010UL, 0x00000020UL, 0x00000040UL,
0x00000080UL, 0x00000100UL, 0x00000200UL, 0x00000400UL, 0x00000800UL,
0x00001000UL, 0x00002000UL, 0x00004000UL, 0x00008000UL, 0x00010000UL,
0x00020000UL, 0x00040000UL, 0x00080000UL, 0x00100000UL, 0x00200000UL,
0x00400000UL, 0x00800000UL
},
{
0x191b3141UL, 0x32366282UL, 0x646cc504UL, 0xc8d98a08UL, 0x4ac21251UL,
0x958424a2UL, 0xf0794f05UL, 0x3b83984bUL, 0x77073096UL, 0xee0e612cUL,
0x076dc419UL, 0x0edb8832UL, 0x1db71064UL, 0x3b6e20c8UL, 0x76dc4190UL,
0xedb88320UL, 0x00000001UL, 0x00000002UL, 0x00000004UL, 0x00000008UL,
0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL, 0x00000100UL,
0x00000200UL, 0x00000400UL, 0x00000800UL, 0x00001000UL, 0x00002000UL,
0x00004000UL, 0x00008000UL
},
{
0xb8bc6765UL, 0xaa09c88bUL, 0x8f629757UL, 0xc5b428efUL, 0x5019579fUL,
0xa032af3eUL, 0x9b14583dUL, 0xed59b63bUL, 0x01c26a37UL, 0x0384d46eUL,
0x0709a8dcUL, 0x0e1351b8UL, 0x1c26a370UL, 0x384d46e0UL, 0x709a8dc0UL,
0xe1351b80UL, 0x191b3141UL, 0x32366282UL, 0x646cc504UL, 0xc8d98a08UL,
0x4ac21251UL, 0x958424a2UL, 0xf0794f05UL, 0x3b83984bUL, 0x77073096UL,
0xee0e612cUL, 0x076dc419UL, 0x0edb8832UL, 0x1db71064UL, 0x3b6e20c8UL,
0x76dc4190UL, 0xedb88320UL
},
{
0xccaa009eUL, 0x4225077dUL, 0x844a0efaUL, 0xd3e51bb5UL, 0x7cbb312bUL,
0xf9766256UL, 0x299dc2edUL, 0x533b85daUL, 0xa6770bb4UL, 0x979f1129UL,
0xf44f2413UL, 0x33ef4e67UL, 0x67de9cceUL, 0xcfbd399cUL, 0x440b7579UL,
0x8816eaf2UL, 0xcb5cd3a5UL, 0x4dc8a10bUL, 0x9b914216UL, 0xec53826dUL,
0x03d6029bUL, 0x07ac0536UL, 0x0f580a6cUL, 0x1eb014d8UL, 0x3d6029b0UL,
0x7ac05360UL, 0xf580a6c0UL, 0x30704bc1UL, 0x60e09782UL, 0xc1c12f04UL,
0x58f35849UL, 0xb1e6b092UL
},
{
0xae689191UL, 0x87a02563UL, 0xd4314c87UL, 0x73139f4fUL, 0xe6273e9eUL,
0x173f7b7dUL, 0x2e7ef6faUL, 0x5cfdedf4UL, 0xb9fbdbe8UL, 0xa886b191UL,
0x8a7c6563UL, 0xcf89cc87UL, 0x44629f4fUL, 0x88c53e9eUL, 0xcafb7b7dUL,
0x4e87f0bbUL, 0x9d0fe176UL, 0xe16ec4adUL, 0x19ac8f1bUL, 0x33591e36UL,
0x66b23c6cUL, 0xcd6478d8UL, 0x41b9f7f1UL, 0x8373efe2UL, 0xdd96d985UL,
0x605cb54bUL, 0xc0b96a96UL, 0x5a03d36dUL, 0xb407a6daUL, 0xb37e4bf5UL,
0xbd8d91abUL, 0xa06a2517UL
},
{
0xf1da05aaUL, 0x38c50d15UL, 0x718a1a2aUL, 0xe3143454UL, 0x1d596ee9UL,
0x3ab2ddd2UL, 0x7565bba4UL, 0xeacb7748UL, 0x0ee7e8d1UL, 0x1dcfd1a2UL,
0x3b9fa344UL, 0x773f4688UL, 0xee7e8d10UL, 0x078c1c61UL, 0x0f1838c2UL,
0x1e307184UL, 0x3c60e308UL, 0x78c1c610UL, 0xf1838c20UL, 0x38761e01UL,
0x70ec3c02UL, 0xe1d87804UL, 0x18c1f649UL, 0x3183ec92UL, 0x6307d924UL,
0xc60fb248UL, 0x576e62d1UL, 0xaedcc5a2UL, 0x86c88d05UL, 0xd6e01c4bUL,
0x76b13ed7UL, 0xed627daeUL
},
{
0x8f352d95UL, 0xc51b5d6bUL, 0x5147bc97UL, 0xa28f792eUL, 0x9e6ff41dUL,
0xe7aeee7bUL, 0x142cdab7UL, 0x2859b56eUL, 0x50b36adcUL, 0xa166d5b8UL,
0x99bcad31UL, 0xe8085c23UL, 0x0b61be07UL, 0x16c37c0eUL, 0x2d86f81cUL,
0x5b0df038UL, 0xb61be070UL, 0xb746c6a1UL, 0xb5fc8b03UL, 0xb0881047UL,
0xba6126cfUL, 0xafb34bdfUL, 0x841791ffUL, 0xd35e25bfUL, 0x7dcd4d3fUL,
0xfb9a9a7eUL, 0x2c4432bdUL, 0x5888657aUL, 0xb110caf4UL, 0xb95093a9UL,
0xa9d02113UL, 0x88d14467UL
},
{
0x33fff533UL, 0x67ffea66UL, 0xcfffd4ccUL, 0x448eafd9UL, 0x891d5fb2UL,
0xc94bb925UL, 0x49e6740bUL, 0x93cce816UL, 0xfce8d66dUL, 0x22a0aa9bUL,
0x45415536UL, 0x8a82aa6cUL, 0xce745299UL, 0x4799a373UL, 0x8f3346e6UL,
0xc5178b8dUL, 0x515e115bUL, 0xa2bc22b6UL, 0x9e09432dUL, 0xe763801bUL,
0x15b60677UL, 0x2b6c0ceeUL, 0x56d819dcUL, 0xadb033b8UL, 0x80116131UL,
0xdb53c423UL, 0x6dd68e07UL, 0xdbad1c0eUL, 0x6c2b3e5dUL, 0xd8567cbaUL,
0x6bddff35UL, 0xd7bbfe6aUL
},
{
0xce3371cbUL, 0x4717e5d7UL, 0x8e2fcbaeUL, 0xc72e911dUL, 0x552c247bUL,
0xaa5848f6UL, 0x8fc197adUL, 0xc4f2291bUL, 0x52955477UL, 0xa52aa8eeUL,
0x9124579dUL, 0xf939a97bUL, 0x290254b7UL, 0x5204a96eUL, 0xa40952dcUL,
0x9363a3f9UL, 0xfdb641b3UL, 0x201d8527UL, 0x403b0a4eUL, 0x8076149cUL,
0xdb9d2f79UL, 0x6c4b58b3UL, 0xd896b166UL, 0x6a5c648dUL, 0xd4b8c91aUL,
0x72009475UL, 0xe40128eaUL, 0x13735795UL, 0x26e6af2aUL, 0x4dcd5e54UL,
0x9b9abca8UL, 0xec447f11UL
},
{
0x1072db28UL, 0x20e5b650UL, 0x41cb6ca0UL, 0x8396d940UL, 0xdc5cb4c1UL,
0x63c86fc3UL, 0xc790df86UL, 0x5450b94dUL, 0xa8a1729aUL, 0x8a33e375UL,
0xcf16c0abUL, 0x455c8717UL, 0x8ab90e2eUL, 0xce031a1dUL, 0x4777327bUL,
0x8eee64f6UL, 0xc6adcfadUL, 0x562a991bUL, 0xac553236UL, 0x83db622dUL,
0xdcc7c21bUL, 0x62fe8277UL, 0xc5fd04eeUL, 0x508b0f9dUL, 0xa1161f3aUL,
0x995d3835UL, 0xe9cb762bUL, 0x08e7ea17UL, 0x11cfd42eUL, 0x239fa85cUL,
0x473f50b8UL, 0x8e7ea170UL
},
{
0xf891f16fUL, 0x2a52e49fUL, 0x54a5c93eUL, 0xa94b927cUL, 0x89e622b9UL,
0xc8bd4333UL, 0x4a0b8027UL, 0x9417004eUL, 0xf35f06ddUL, 0x3dcf0bfbUL,
0x7b9e17f6UL, 0xf73c2fecUL, 0x35095999UL, 0x6a12b332UL, 0xd4256664UL,
0x733bca89UL, 0xe6779512UL, 0x179e2c65UL, 0x2f3c58caUL, 0x5e78b194UL,
0xbcf16328UL, 0xa293c011UL, 0x9e568663UL, 0xe7dc0a87UL, 0x14c9134fUL,
0x2992269eUL, 0x53244d3cUL, 0xa6489a78UL, 0x97e032b1UL, 0xf4b16323UL,
0x3213c007UL, 0x6427800eUL
},
{
0x88b6ba63UL, 0xca1c7287UL, 0x4f49e34fUL, 0x9e93c69eUL, 0xe6568b7dUL,
0x17dc10bbUL, 0x2fb82176UL, 0x5f7042ecUL, 0xbee085d8UL, 0xa6b00df1UL,
0x96111da3UL, 0xf7533d07UL, 0x35d77c4fUL, 0x6baef89eUL, 0xd75df13cUL,
0x75cae439UL, 0xeb95c872UL, 0x0c5a96a5UL, 0x18b52d4aUL, 0x316a5a94UL,
0x62d4b528UL, 0xc5a96a50UL, 0x5023d2e1UL, 0xa047a5c2UL, 0x9bfe4dc5UL,
0xec8d9dcbUL, 0x026a3dd7UL, 0x04d47baeUL, 0x09a8f75cUL, 0x1351eeb8UL,
0x26a3dd70UL, 0x4d47bae0UL
},
{
0x5ad8a92cUL, 0xb5b15258UL, 0xb013a2f1UL, 0xbb5643a3UL, 0xaddd8107UL,
0x80ca044fUL, 0xdae50edfUL, 0x6ebb1bffUL, 0xdd7637feUL, 0x619d69bdUL,
0xc33ad37aUL, 0x5d04a0b5UL, 0xba09416aUL, 0xaf638495UL, 0x85b60f6bUL,
0xd01d1897UL, 0x7b4b376fUL, 0xf6966edeUL, 0x365ddbfdUL, 0x6cbbb7faUL,
0xd9776ff4UL, 0x699fd9a9UL, 0xd33fb352UL, 0x7d0e60e5UL, 0xfa1cc1caUL,
0x2f4885d5UL, 0x5e910baaUL, 0xbd221754UL, 0xa13528e9UL, 0x991b5793UL,
0xe947a967UL, 0x09fe548fUL
},
{
0xb566f6e2UL, 0xb1bceb85UL, 0xb808d14bUL, 0xab60a4d7UL, 0x8db04fefUL,
0xc011999fUL, 0x5b52357fUL, 0xb6a46afeUL, 0xb639d3bdUL, 0xb702a13bUL,
0xb5744437UL, 0xb1998e2fUL, 0xb8421a1fUL, 0xabf5327fUL, 0x8c9b62bfUL,
0xc247c33fUL, 0x5ffe803fUL, 0xbffd007eUL, 0xa48b06bdUL, 0x92670b3bUL,
0xffbf1037UL, 0x240f262fUL, 0x481e4c5eUL, 0x903c98bcUL, 0xfb083739UL,
0x2d616833UL, 0x5ac2d066UL, 0xb585a0ccUL, 0xb07a47d9UL, 0xbb8589f3UL,
0xac7a15a7UL, 0x83852d0fUL
},
{
0x9d9129bfUL, 0xe053553fUL, 0x1bd7ac3fUL, 0x37af587eUL, 0x6f5eb0fcUL,
0xdebd61f8UL, 0x660bc5b1UL, 0xcc178b62UL, 0x435e1085UL, 0x86bc210aUL,
0xd6094455UL, 0x77638eebUL, 0xeec71dd6UL, 0x06ff3dedUL, 0x0dfe7bdaUL,
0x1bfcf7b4UL, 0x37f9ef68UL, 0x6ff3ded0UL, 0xdfe7bda0UL, 0x64be7d01UL,
0xc97cfa02UL, 0x4988f245UL, 0x9311e48aUL, 0xfd52cf55UL, 0x21d498ebUL,
0x43a931d6UL, 0x875263acUL, 0xd5d5c119UL, 0x70da8473UL, 0xe1b508e6UL,
0x181b178dUL, 0x30362f1aUL
},
{
0x2ee43a2cUL, 0x5dc87458UL, 0xbb90e8b0UL, 0xac50d721UL, 0x83d0a803UL,
0xdcd05647UL, 0x62d1aacfUL, 0xc5a3559eUL, 0x5037ad7dUL, 0xa06f5afaUL,
0x9bafb3b5UL, 0xec2e612bUL, 0x032dc417UL, 0x065b882eUL, 0x0cb7105cUL,
0x196e20b8UL, 0x32dc4170UL, 0x65b882e0UL, 0xcb7105c0UL, 0x4d930dc1UL,
0x9b261b82UL, 0xed3d3145UL, 0x010b64cbUL, 0x0216c996UL, 0x042d932cUL,
0x085b2658UL, 0x10b64cb0UL, 0x216c9960UL, 0x42d932c0UL, 0x85b26580UL,
0xd015cd41UL, 0x7b5a9cc3UL
},
{
0x1b4511eeUL, 0x368a23dcUL, 0x6d1447b8UL, 0xda288f70UL, 0x6f2018a1UL,
0xde403142UL, 0x67f164c5UL, 0xcfe2c98aUL, 0x44b49555UL, 0x89692aaaUL,
0xc9a35315UL, 0x4837a06bUL, 0x906f40d6UL, 0xfbaf87edUL, 0x2c2e099bUL,
0x585c1336UL, 0xb0b8266cUL, 0xba014a99UL, 0xaf739373UL, 0x859620a7UL,
0xd05d470fUL, 0x7bcb885fUL, 0xf79710beUL, 0x345f273dUL, 0x68be4e7aUL,
0xd17c9cf4UL, 0x79883fa9UL, 0xf3107f52UL, 0x3d51f8e5UL, 0x7aa3f1caUL,
0xf547e394UL, 0x31fec169UL
},
{
0xbce15202UL, 0xa2b3a245UL, 0x9e1642cbUL, 0xe75d83d7UL, 0x15ca01efUL,
0x2b9403deUL, 0x572807bcUL, 0xae500f78UL, 0x87d118b1UL, 0xd4d33723UL,
0x72d76807UL, 0xe5aed00eUL, 0x102ca65dUL, 0x20594cbaUL, 0x40b29974UL,
0x816532e8UL, 0xd9bb6391UL, 0x6807c163UL, 0xd00f82c6UL, 0x7b6e03cdUL,
0xf6dc079aUL, 0x36c90975UL, 0x6d9212eaUL, 0xdb2425d4UL, 0x6d394de9UL,
0xda729bd2UL, 0x6f9431e5UL, 0xdf2863caUL, 0x6521c1d5UL, 0xca4383aaUL,
0x4ff60115UL, 0x9fec022aUL
},
{
0xff08e5efUL, 0x2560cd9fUL, 0x4ac19b3eUL, 0x9583367cUL, 0xf0776ab9UL,
0x3b9fd333UL, 0x773fa666UL, 0xee7f4cccUL, 0x078f9fd9UL, 0x0f1f3fb2UL,
0x1e3e7f64UL, 0x3c7cfec8UL, 0x78f9fd90UL, 0xf1f3fb20UL, 0x3896f001UL,
0x712de002UL, 0xe25bc004UL, 0x1fc68649UL, 0x3f8d0c92UL, 0x7f1a1924UL,
0xfe343248UL, 0x271962d1UL, 0x4e32c5a2UL, 0x9c658b44UL, 0xe3ba10c9UL,
0x1c0527d3UL, 0x380a4fa6UL, 0x70149f4cUL, 0xe0293e98UL, 0x1b237b71UL,
0x3646f6e2UL, 0x6c8dedc4UL
},
{
0x6f76172eUL, 0xdeec2e5cUL, 0x66a95af9UL, 0xcd52b5f2UL, 0x41d46da5UL,
0x83a8db4aUL, 0xdc20b0d5UL, 0x633067ebUL, 0xc660cfd6UL, 0x57b099edUL,
0xaf6133daUL, 0x85b361f5UL, 0xd017c5abUL, 0x7b5e8d17UL, 0xf6bd1a2eUL,
0x360b321dUL, 0x6c16643aUL, 0xd82cc874UL, 0x6b2896a9UL, 0xd6512d52UL,
0x77d35ce5UL, 0xefa6b9caUL, 0x043c75d5UL, 0x0878ebaaUL, 0x10f1d754UL,
0x21e3aea8UL, 0x43c75d50UL, 0x878ebaa0UL, 0xd46c7301UL, 0x73a9e043UL,
0xe753c086UL, 0x15d6874dUL
},
{
0x56f5cab9UL, 0xadeb9572UL, 0x80a62ca5UL, 0xda3d5f0bUL, 0x6f0bb857UL,
0xde1770aeUL, 0x675fe71dUL, 0xcebfce3aUL, 0x460e9a35UL, 0x8c1d346aUL,
0xc34b6e95UL, 0x5de7db6bUL, 0xbbcfb6d6UL, 0xacee6bedUL, 0x82add19bUL,
0xde2aa577UL, 0x67244cafUL, 0xce48995eUL, 0x47e034fdUL, 0x8fc069faUL,
0xc4f1d5b5UL, 0x5292ad2bUL, 0xa5255a56UL, 0x913bb2edUL, 0xf906639bUL,
0x297dc177UL, 0x52fb82eeUL, 0xa5f705dcUL, 0x909f0df9UL, 0xfa4f1db3UL,
0x2fef3d27UL, 0x5fde7a4eUL
},
{
0x385993acUL, 0x70b32758UL, 0xe1664eb0UL, 0x19bd9b21UL, 0x337b3642UL,
0x66f66c84UL, 0xcdecd908UL, 0x40a8b451UL, 0x815168a2UL, 0xd9d3d705UL,
0x68d6a84bUL, 0xd1ad5096UL, 0x782ba76dUL, 0xf0574edaUL, 0x3bdf9bf5UL,
0x77bf37eaUL, 0xef7e6fd4UL, 0x058dd9e9UL, 0x0b1bb3d2UL, 0x163767a4UL,
0x2c6ecf48UL, 0x58dd9e90UL, 0xb1bb3d20UL, 0xb8077c01UL, 0xab7ffe43UL,
0x8d8efac7UL, 0xc06cf3cfUL, 0x5ba8e1dfUL, 0xb751c3beUL, 0xb5d2813dUL,
0xb0d4043bUL, 0xbad90e37UL
},
{
0xb4247b20UL, 0xb339f001UL, 0xbd02e643UL, 0xa174cac7UL, 0x999893cfUL,
0xe84021dfUL, 0x0bf145ffUL, 0x17e28bfeUL, 0x2fc517fcUL, 0x5f8a2ff8UL,
0xbf145ff0UL, 0xa559b9a1UL, 0x91c27503UL, 0xf8f5ec47UL, 0x2a9adecfUL,
0x5535bd9eUL, 0xaa6b7b3cUL, 0x8fa7f039UL, 0xc43ee633UL, 0x530cca27UL,
0xa619944eUL, 0x97422eddUL, 0xf5f55bfbUL, 0x309bb1b7UL, 0x6137636eUL,
0xc26ec6dcUL, 0x5fac8bf9UL, 0xbf5917f2UL, 0xa5c329a5UL, 0x90f7550bUL,
0xfa9fac57UL, 0x2e4e5eefUL
},
{
0x695186a7UL, 0xd2a30d4eUL, 0x7e371cddUL, 0xfc6e39baUL, 0x23ad7535UL,
0x475aea6aUL, 0x8eb5d4d4UL, 0xc61aafe9UL, 0x57445993UL, 0xae88b326UL,
0x8660600dUL, 0xd7b1c65bUL, 0x74128af7UL, 0xe82515eeUL, 0x0b3b2d9dUL,
0x16765b3aUL, 0x2cecb674UL, 0x59d96ce8UL, 0xb3b2d9d0UL, 0xbc14b5e1UL,
0xa3586d83UL, 0x9dc1dd47UL, 0xe0f2bccfUL, 0x1a947fdfUL, 0x3528ffbeUL,
0x6a51ff7cUL, 0xd4a3fef8UL, 0x7236fbb1UL, 0xe46df762UL, 0x13aae885UL,
0x2755d10aUL, 0x4eaba214UL
},
{
0x66bc001eUL, 0xcd78003cUL, 0x41810639UL, 0x83020c72UL, 0xdd751ea5UL,
0x619b3b0bUL, 0xc3367616UL, 0x5d1dea6dUL, 0xba3bd4daUL, 0xaf06aff5UL,
0x857c59abUL, 0xd189b517UL, 0x78626c6fUL, 0xf0c4d8deUL, 0x3af8b7fdUL,
0x75f16ffaUL, 0xebe2dff4UL, 0x0cb4b9a9UL, 0x19697352UL, 0x32d2e6a4UL,
0x65a5cd48UL, 0xcb4b9a90UL, 0x4de63361UL, 0x9bcc66c2UL, 0xece9cbc5UL,
0x02a291cbUL, 0x05452396UL, 0x0a8a472cUL, 0x15148e58UL, 0x2a291cb0UL,
0x54523960UL, 0xa8a472c0UL
},
{
0xb58b27b3UL, 0xb0674927UL, 0xbbbf940fUL, 0xac0e2e5fUL, 0x836d5affUL,
0xddabb3bfUL, 0x6026613fUL, 0xc04cc27eUL, 0x5be882bdUL, 0xb7d1057aUL,
0xb4d30cb5UL, 0xb2d71f2bUL, 0xbedf3817UL, 0xa6cf766fUL, 0x96efea9fUL,
0xf6aed37fUL, 0x362ca0bfUL, 0x6c59417eUL, 0xd8b282fcUL, 0x6a1403b9UL,
0xd4280772UL, 0x732108a5UL, 0xe642114aUL, 0x17f524d5UL, 0x2fea49aaUL,
0x5fd49354UL, 0xbfa926a8UL, 0xa4234b11UL, 0x93379063UL, 0xfd1e2687UL,
0x214d4b4fUL, 0x429a969eUL
},
{
0xfe273162UL, 0x273f6485UL, 0x4e7ec90aUL, 0x9cfd9214UL, 0xe28a2269UL,
0x1e654293UL, 0x3cca8526UL, 0x79950a4cUL, 0xf32a1498UL, 0x3d252f71UL,
0x7a4a5ee2UL, 0xf494bdc4UL, 0x32587dc9UL, 0x64b0fb92UL, 0xc961f724UL,
0x49b2e809UL, 0x9365d012UL, 0xfdbaa665UL, 0x20044a8bUL, 0x40089516UL,
0x80112a2cUL, 0xdb535219UL, 0x6dd7a273UL, 0xdbaf44e6UL, 0x6c2f8f8dUL,
0xd85f1f1aUL, 0x6bcf3875UL, 0xd79e70eaUL, 0x744de795UL, 0xe89bcf2aUL,
0x0a469815UL, 0x148d302aUL
},
{
0xd3c98813UL, 0x7ce21667UL, 0xf9c42cceUL, 0x28f95fddUL, 0x51f2bfbaUL,
0xa3e57f74UL, 0x9cbbf8a9UL, 0xe206f713UL, 0x1f7ce867UL, 0x3ef9d0ceUL,
0x7df3a19cUL, 0xfbe74338UL, 0x2cbf8031UL, 0x597f0062UL, 0xb2fe00c4UL,
0xbe8d07c9UL, 0xa66b09d3UL, 0x97a715e7UL, 0xf43f2d8fUL, 0x330f5d5fUL,
0x661ebabeUL, 0xcc3d757cUL, 0x430becb9UL, 0x8617d972UL, 0xd75eb4a5UL,
0x75cc6f0bUL, 0xeb98de16UL, 0x0c40ba6dUL, 0x188174daUL, 0x3102e9b4UL,
0x6205d368UL, 0xc40ba6d0UL
},
{
0xf7d6deb4UL, 0x34dcbb29UL, 0x69b97652UL, 0xd372eca4UL, 0x7d94df09UL,
0xfb29be12UL, 0x2d227a65UL, 0x5a44f4caUL, 0xb489e994UL, 0xb262d569UL,
0xbfb4ac93UL, 0xa4185f67UL, 0x9341b88fUL, 0xfdf2775fUL, 0x2095e8ffUL,
0x412bd1feUL, 0x8257a3fcUL, 0xdfde41b9UL, 0x64cd8533UL, 0xc99b0a66UL,
0x4847128dUL, 0x908e251aUL, 0xfa6d4c75UL, 0x2fab9eabUL, 0x5f573d56UL,
0xbeae7aacUL, 0xa62df319UL, 0x972ae073UL, 0xf524c6a7UL, 0x31388b0fUL,
0x6271161eUL, 0xc4e22c3cUL
},
{
0xedb88320UL, 0x00000001UL, 0x00000002UL, 0x00000004UL, 0x00000008UL,
0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL, 0x00000100UL,
0x00000200UL, 0x00000400UL, 0x00000800UL, 0x00001000UL, 0x00002000UL,
0x00004000UL, 0x00008000UL, 0x00010000UL, 0x00020000UL, 0x00040000UL,
0x00080000UL, 0x00100000UL, 0x00200000UL, 0x00400000UL, 0x00800000UL,
0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL, 0x10000000UL,
0x20000000UL, 0x40000000UL
},
{
0x76dc4190UL, 0xedb88320UL, 0x00000001UL, 0x00000002UL, 0x00000004UL,
0x00000008UL, 0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL,
0x00000100UL, 0x00000200UL, 0x00000400UL, 0x00000800UL, 0x00001000UL,
0x00002000UL, 0x00004000UL, 0x00008000UL, 0x00010000UL, 0x00020000UL,
0x00040000UL, 0x00080000UL, 0x00100000UL, 0x00200000UL, 0x00400000UL,
0x00800000UL, 0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL,
0x10000000UL, 0x20000000UL
},
{
0x1db71064UL, 0x3b6e20c8UL, 0x76dc4190UL, 0xedb88320UL, 0x00000001UL,
0x00000002UL, 0x00000004UL, 0x00000008UL, 0x00000010UL, 0x00000020UL,
0x00000040UL, 0x00000080UL, 0x00000100UL, 0x00000200UL, 0x00000400UL,
0x00000800UL, 0x00001000UL, 0x00002000UL, 0x00004000UL, 0x00008000UL,
0x00010000UL, 0x00020000UL, 0x00040000UL, 0x00080000UL, 0x00100000UL,
0x00200000UL, 0x00400000UL, 0x00800000UL, 0x01000000UL, 0x02000000UL,
0x04000000UL, 0x08000000UL
}
};
#endif /* CRC32_H_ */
#endif /* CRC32_TBL_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -10,10 +10,8 @@
subject to change. Applications should only use zlib.h.
*/
/* @(#) $Id$ */
#include "zutil.h"
#include "gzendian.h"
#include "zendian.h"
/* define NO_GZIP when compiling if you want to disable gzip header and
trailer creation by deflate(). NO_GZIP would be used to avoid linking in
@ -23,10 +21,6 @@
# define GZIP
#endif
#define NIL 0
/* Tail of hash chains */
/* ===========================================================================
* Internal compression state.
*/
@ -52,7 +46,7 @@
#define MAX_BITS 15
/* All codes must not exceed MAX_BITS bits */
#define Buf_size 16
#define BIT_BUF_SIZE 64
/* size of bit buffer in bi_buf */
#define END_BLOCK 256
@ -70,6 +64,10 @@
#define FINISH_STATE 666 /* stream complete */
/* Stream status */
#define HASH_BITS 16u /* log2(HASH_SIZE) */
#define HASH_SIZE 65536u /* number of elements in hash table */
#define HASH_MASK (HASH_SIZE - 1u) /* HASH_SIZE-1 */
/* Data structure describing a single value and its code string. */
typedef struct ct_data_s {
@ -97,34 +95,47 @@ typedef struct tree_desc_s {
} tree_desc;
typedef uint16_t Pos;
typedef unsigned IPos;
/* A Pos is an index in the character window. We use short instead of int to
* save space in the various tables. IPos is used only for parameter passing.
* save space in the various tables.
*/
typedef struct internal_state {
PREFIX3(stream) *strm; /* pointer back to this zlib stream */
int status; /* as the name implies */
unsigned char *pending_buf; /* output still pending */
unsigned long pending_buf_size; /* size of pending_buf */
unsigned char *pending_out; /* next pending byte to output to the stream */
uint32_t pending_buf_size; /* size of pending_buf */
uint32_t pending; /* nb of bytes in the pending buffer */
int wrap; /* bit 0 true for zlib, bit 1 true for gzip */
PREFIX(gz_headerp) gzhead; /* gzip header information to write */
uint32_t gzindex; /* where in extra, name, or comment */
unsigned char method; /* can only be DEFLATED */
PREFIX(gz_headerp) gzhead; /* gzip header information to write */
int status; /* as the name implies */
int last_flush; /* value of flush param for previous deflate call */
int reproducible; /* Whether reproducible compression results are required. */
#ifdef X86_PCLMULQDQ_CRC
unsigned crc0[4 * 5];
#endif
int block_open;
/* Whether or not a block is currently open for the QUICK deflation scheme.
* This is set to 1 if there is an active block, or 0 if the block was just closed.
*/
/* used by deflate.c: */
unsigned int w_size; /* LZ77 window size (32K by default) */
unsigned int w_bits; /* log2(w_size) (8..16) */
unsigned int w_mask; /* w_size - 1 */
unsigned int lookahead; /* number of valid bytes ahead in window */
unsigned int high_water;
/* High water mark offset in window for initialized bytes -- bytes above
* this are set to zero in order to avoid memory check warnings when
* longest match routines access bytes past the input. This is then
* updated to the new high water mark.
*/
unsigned int window_size;
/* Actual size of window: 2*wSize, except when the user input buffer
* is directly used as sliding window.
*/
unsigned char *window;
/* Sliding window. Input bytes are read into the second half of the window,
@ -136,44 +147,24 @@ typedef struct internal_state {
* To do: use the user input buffer as sliding window.
*/
unsigned long window_size;
/* Actual size of window: 2*wSize, except when the user input buffer
* is directly used as sliding window.
*/
Pos *prev;
/* Link to older string with same hash index. To limit the size of this
* array to 64K, this link is maintained only for the last 32K strings.
* An index in this array is thus a window index modulo 32K.
*/
Pos *head; /* Heads of the hash chains or NIL. */
Pos *head; /* Heads of the hash chains or 0. */
unsigned int ins_h; /* hash index of string to be inserted */
unsigned int hash_size; /* number of elements in hash table */
unsigned int hash_bits; /* log2(hash_size) */
unsigned int hash_mask; /* hash_size-1 */
#if !defined(__x86_64__) && !defined(_M_X64) && !defined(__i386) && !defined(_M_IX86)
unsigned int hash_shift;
#endif
/* Number of bits by which ins_h must be shifted at each input
* step. It must be such that after MIN_MATCH steps, the oldest
* byte no longer takes part in the hash key, that is:
* hash_shift * MIN_MATCH >= hash_bits
*/
long block_start;
int block_start;
/* Window position at the beginning of the current output block. Gets
* negative when the window is moved backwards.
*/
unsigned int match_length; /* length of best match */
IPos prev_match; /* previous match */
Pos prev_match; /* previous match */
int match_available; /* set if previous match exists */
unsigned int strstart; /* start of string to insert */
unsigned int match_start; /* start of matching string */
unsigned int lookahead; /* number of valid bytes ahead in window */
unsigned int prev_length;
/* Length of the best match at previous step. Matches not greater than this
@ -181,15 +172,13 @@ typedef struct internal_state {
*/
unsigned int max_chain_length;
/* To speed up deflation, hash chains are never searched beyond this
* length. A higher limit improves compression ratio but degrades the
* speed.
/* To speed up deflation, hash chains are never searched beyond this length.
* A higher limit improves compression ratio but degrades the speed.
*/
unsigned int max_lazy_match;
/* Attempt to find a better match only when the current match is strictly
* smaller than this value. This mechanism is used only for compression
* levels >= 4.
/* Attempt to find a better match only when the current match is strictly smaller
* than this value. This mechanism is used only for compression levels >= 4.
*/
# define max_insert_length max_lazy_match
/* Insert new strings in the hash table only if the match length is not
@ -205,6 +194,11 @@ typedef struct internal_state {
int nice_match; /* Stop searching when current match exceeds this */
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
/* Only used if X86_PCLMULQDQ_CRC is defined */
unsigned crc0[4 * 5];
#endif
/* used by trees.c: */
/* Didn't use ct_data typedef below to suppress compiler warning */
struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */
@ -229,8 +223,6 @@ typedef struct internal_state {
/* Depth of each subtree used as tie breaker for trees of equal frequency
*/
unsigned char *sym_buf; /* buffer for distances and literals/lengths */
unsigned int lit_bufsize;
/* Size of match buffer for literals/lengths. There are 4 reasons for
* limiting lit_bufsize to 64K:
@ -251,6 +243,7 @@ typedef struct internal_state {
* - I can't count above 4
*/
unsigned char *sym_buf; /* buffer for distances and literals/lengths */
unsigned int sym_next; /* running index in sym_buf */
unsigned int sym_end; /* symbol table full when sym_next reaches this */
@ -259,33 +252,22 @@ typedef struct internal_state {
unsigned int matches; /* number of string matches in current block */
unsigned int insert; /* bytes at end of window left to insert */
#ifdef ZLIB_DEBUG
/* compressed_len and bits_sent are only used if ZLIB_DEBUG is defined */
unsigned long compressed_len; /* total bit length of compressed file mod 2^32 */
unsigned long bits_sent; /* bit length of compressed data sent mod 2^32 */
#endif
uint16_t bi_buf;
/* Output buffer. bits are inserted starting at the bottom (least
* significant bits).
*/
int bi_valid;
/* Number of valid bits in bi_buf. All bits above the last valid bit
* are always zero.
*/
/* Reserved for future use and alignment purposes */
char *reserved_p;
unsigned long high_water;
/* High water mark offset in window for initialized bytes -- bytes above
* this are set to zero in order to avoid memory check warnings when
* longest match routines access bytes past the input. This is then
* updated to the new high water mark.
*/
int block_open;
/* Whether or not a block is currently open for the QUICK deflation scheme.
* This is set to 1 if there is an active block, or 0 if the block was just
* closed.
*/
uint64_t bi_buf;
/* Output buffer. bits are inserted starting at the bottom (least significant bits). */
} deflate_state;
int32_t bi_valid;
/* Number of valid bits in bi_buf. All bits above the last valid bit are always zero. */
/* Reserved for future use and alignment purposes */
int32_t reserved[11];
} ALIGNED_(8) deflate_state;
typedef enum {
need_more, /* block not completed, need more input or more output */
@ -297,18 +279,88 @@ typedef enum {
/* Output a byte on the stream.
* IN assertion: there is enough room in pending_buf.
*/
#define put_byte(s, c) {s->pending_buf[s->pending++] = (unsigned char)(c);}
#define put_byte(s, c) { \
s->pending_buf[s->pending++] = (unsigned char)(c); \
}
/* ===========================================================================
* Output a short LSB first on the stream.
* IN assertion: there is enough room in pendingBuf.
* IN assertion: there is enough room in pending_buf.
*/
static inline void put_short(deflate_state *s, uint16_t w) {
#if BYTE_ORDER == BIG_ENDIAN
w = ZSWAP16(w);
#endif
memcpy(&(s->pending_buf[s->pending]), &w, sizeof(uint16_t));
#if defined(UNALIGNED_OK)
*(uint16_t *)(&s->pending_buf[s->pending]) = w;
s->pending += 2;
#else
put_byte(s, (w & 0xff));
put_byte(s, ((w >> 8) & 0xff));
#endif
}
/* ===========================================================================
* Output a short MSB first on the stream.
* IN assertion: there is enough room in pending_buf.
*/
static inline void put_short_msb(deflate_state *s, uint16_t w) {
put_byte(s, ((w >> 8) & 0xff));
put_byte(s, (w & 0xff));
}
/* ===========================================================================
* Output a 32-bit unsigned int LSB first on the stream.
* IN assertion: there is enough room in pending_buf.
*/
static inline void put_uint32(deflate_state *s, uint32_t dw) {
#if defined(UNALIGNED_OK)
*(uint32_t *)(&s->pending_buf[s->pending]) = dw;
s->pending += 4;
#else
put_byte(s, (dw & 0xff));
put_byte(s, ((dw >> 8) & 0xff));
put_byte(s, ((dw >> 16) & 0xff));
put_byte(s, ((dw >> 24) & 0xff));
#endif
}
/* ===========================================================================
* Output a 32-bit unsigned int MSB first on the stream.
* IN assertion: there is enough room in pending_buf.
*/
static inline void put_uint32_msb(deflate_state *s, uint32_t dw) {
#if defined(UNALIGNED_OK)
*(uint32_t *)(&s->pending_buf[s->pending]) = ZSWAP32(dw);
s->pending += 4;
#else
put_byte(s, ((dw >> 24) & 0xff));
put_byte(s, ((dw >> 16) & 0xff));
put_byte(s, ((dw >> 8) & 0xff));
put_byte(s, (dw & 0xff));
#endif
}
/* ===========================================================================
* Output a 64-bit unsigned int LSB first on the stream.
* IN assertion: there is enough room in pending_buf.
*/
static inline void put_uint64(deflate_state *s, uint64_t lld) {
#if defined(UNALIGNED64_OK)
*(uint64_t *)(&s->pending_buf[s->pending]) = lld;
s->pending += 8;
#elif defined(UNALIGNED_OK)
*(uint32_t *)(&s->pending_buf[s->pending]) = lld & 0xffffffff;
s->pending += 4;
*(uint32_t *)(&s->pending_buf[s->pending]) = (lld >> 32) & 0xffffffff;
s->pending += 4;
#else
put_byte(s, (lld & 0xff));
put_byte(s, ((lld >> 8) & 0xff));
put_byte(s, ((lld >> 16) & 0xff));
put_byte(s, ((lld >> 24) & 0xff));
put_byte(s, ((lld >> 32) & 0xff));
put_byte(s, ((lld >> 40) & 0xff));
put_byte(s, ((lld >> 48) & 0xff));
put_byte(s, ((lld >> 56) & 0xff));
#endif
}
#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
@ -326,120 +378,34 @@ static inline void put_short(deflate_state *s, uint16_t w) {
memory checker errors from longest match routines */
void ZLIB_INTERNAL fill_window_c(deflate_state *s);
void Z_INTERNAL fill_window(deflate_state *s);
void Z_INTERNAL slide_hash_c(deflate_state *s);
/* in trees.c */
void ZLIB_INTERNAL _tr_init(deflate_state *s);
int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc);
void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, char *buf, unsigned long stored_len, int last);
void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s);
void ZLIB_INTERNAL _tr_align(deflate_state *s);
void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, char *buf, unsigned long stored_len, int last);
void ZLIB_INTERNAL bi_windup(deflate_state *s);
unsigned ZLIB_INTERNAL bi_reverse(unsigned code, int len);
void ZLIB_INTERNAL flush_pending(PREFIX3(streamp) strm);
#define d_code(dist) ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
void Z_INTERNAL zng_tr_init(deflate_state *s);
void Z_INTERNAL zng_tr_flush_block(deflate_state *s, char *buf, uint32_t stored_len, int last);
void Z_INTERNAL zng_tr_flush_bits(deflate_state *s);
void Z_INTERNAL zng_tr_align(deflate_state *s);
void Z_INTERNAL zng_tr_stored_block(deflate_state *s, char *buf, uint32_t stored_len, int last);
unsigned Z_INTERNAL bi_reverse(unsigned code, int len);
void Z_INTERNAL flush_pending(PREFIX3(streamp) strm);
#define d_code(dist) ((dist) < 256 ? zng_dist_code[dist] : zng_dist_code[256+((dist)>>7)])
/* Mapping from a distance to a distance code. dist is the distance - 1 and
* must not have side effects. _dist_code[256] and _dist_code[257] are never
* must not have side effects. zng_dist_code[256] and zng_dist_code[257] are never
* used.
*/
#ifndef ZLIB_DEBUG
/* Inline versions of _tr_tally for speed: */
# if defined(GEN_TREES_H)
extern unsigned char ZLIB_INTERNAL _length_code[];
extern unsigned char ZLIB_INTERNAL _dist_code[];
# else
extern const unsigned char ZLIB_INTERNAL _length_code[];
extern const unsigned char ZLIB_INTERNAL _dist_code[];
# endif
# define _tr_tally_lit(s, c, flush) \
{ unsigned char cc = (c); \
s->sym_buf[s->sym_next++] = 0; \
s->sym_buf[s->sym_next++] = 0; \
s->sym_buf[s->sym_next++] = cc; \
s->dyn_ltree[cc].Freq++; \
flush = (s->sym_next == s->sym_end); \
}
# define _tr_tally_dist(s, distance, length, flush) \
{ unsigned char len = (unsigned char)(length); \
uint16_t dist = (uint16_t)(distance); \
s->sym_buf[s->sym_next++] = dist; \
s->sym_buf[s->sym_next++] = dist >> 8; \
s->sym_buf[s->sym_next++] = len; \
dist--; \
s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
s->dyn_dtree[d_code(dist)].Freq++; \
flush = (s->sym_next == s->sym_end); \
}
#else
# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
# define _tr_tally_dist(s, distance, length, flush) \
flush = _tr_tally(s, (unsigned)(distance), (unsigned)(length))
#endif
/* ===========================================================================
* Update a hash value with the given input byte
* IN assertion: all calls to to UPDATE_HASH are made with consecutive
* input characters, so that a running hash key can be computed from the
* previous key instead of complete recalculation each time.
*/
#ifdef NOT_TWEAK_COMPILER
#define TRIGGER_LEVEL 6
#else
#define TRIGGER_LEVEL 5
#endif
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
#define UPDATE_HASH(s, h, i) \
do {\
if (s->level < TRIGGER_LEVEL) \
h = (3483 * (s->window[i]) +\
23081* (s->window[i+1]) +\
6954 * (s->window[i+2]) +\
20947* (s->window[i+3])) & s->hash_mask;\
else\
h = (25881* (s->window[i]) +\
24674* (s->window[i+1]) +\
25811* (s->window[i+2])) & s->hash_mask;\
} while (0)
#else
# define UPDATE_HASH(s, h, i) (h = (((h) << s->hash_shift) ^ (s->window[i + (MIN_MATCH-1)])) & s->hash_mask)
#endif
#ifndef ZLIB_DEBUG
# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
/* Send a code of the given tree. c and tree must not have side effects */
#else /* ZLIB_DEBUG */
# define send_code(s, c, tree) \
{ if (z_verbose > 2) { \
fprintf(stderr, "\ncd %3d ", (c)); \
} \
send_bits(s, tree[c].Code, tree[c].Len); \
}
#endif
/* Bit buffer and compress bits calculation debugging */
#ifdef ZLIB_DEBUG
void send_bits(deflate_state *s, int value, int length);
# define cmpr_bits_add(s, len) s->compressed_len += (len)
# define cmpr_bits_align(s) s->compressed_len = (s->compressed_len + 7) & ~7L
# define sent_bits_add(s, bits) s->bits_sent += (bits)
# define sent_bits_align(s) s->bits_sent = (s->bits_sent + 7) & ~7L
#else
#define send_bits(s, value, length) \
{ int len = length;\
if (s->bi_valid > (int)Buf_size - len) {\
int val = (int)value;\
s->bi_buf |= (uint16_t)val << s->bi_valid;\
put_short(s, s->bi_buf);\
s->bi_buf = (uint16_t)val >> (Buf_size - s->bi_valid);\
s->bi_valid += len - Buf_size;\
} else {\
s->bi_buf |= (uint16_t)(value) << s->bi_valid;\
s->bi_valid += len;\
}\
}
# define cmpr_bits_add(s, len) (void)(len)
# define cmpr_bits_align(s)
# define sent_bits_add(s, bits) (void)(bits)
# define sent_bits_align(s)
#endif
#endif /* DEFLATE_H_ */

View File

@ -7,7 +7,6 @@
#include "zbuild.h"
#include "deflate.h"
#include "deflate_p.h"
#include "match_p.h"
#include "functable.h"
/* ===========================================================================
@ -17,9 +16,11 @@
* new strings in the dictionary only for unmatched strings or for short
* matches. It is used only for the fast compression options.
*/
ZLIB_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
IPos hash_head; /* head of the hash chain */
int bflush; /* set if current block must be flushed */
Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
Pos hash_head; /* head of the hash chain */
int bflush = 0; /* set if current block must be flushed */
int64_t dist;
uint32_t match_len = 0;
for (;;) {
/* Make sure that we always have enough lookahead, except
@ -28,93 +29,78 @@ ZLIB_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
* string following the next match.
*/
if (s->lookahead < MIN_LOOKAHEAD) {
functable.fill_window(s);
if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
fill_window(s);
if (UNLIKELY(s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH)) {
return need_more;
}
if (s->lookahead == 0)
if (UNLIKELY(s->lookahead == 0))
break; /* flush the current block */
}
/* Insert the string window[strstart .. strstart+2] in the
* dictionary, and set hash_head to the head of the hash chain:
*/
hash_head = NIL;
if (s->lookahead >= MIN_MATCH) {
hash_head = functable.insert_string(s, s->strstart, 1);
}
hash_head = functable.quick_insert_string(s, s->strstart);
dist = (int64_t)s->strstart - hash_head;
/* Find the longest match, discarding those <= prev_length.
* At this point we have always match_length < MIN_MATCH
* At this point we have always match length < MIN_MATCH
*/
if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) {
if (dist <= MAX_DIST(s) && dist > 0) {
/* To simplify the code, we prevent matches with the string
* of window index 0 (in particular we have to avoid a match
* of the string with itself at the start of the input file).
*/
s->match_length = longest_match(s, hash_head);
match_len = functable.longest_match(s, hash_head);
/* longest_match() sets match_start */
}
if (s->match_length >= MIN_MATCH) {
check_match(s, s->strstart, s->match_start, s->match_length);
}
_tr_tally_dist(s, s->strstart - s->match_start, s->match_length - MIN_MATCH, bflush);
if (match_len >= MIN_MATCH) {
check_match(s, s->strstart, s->match_start, match_len);
s->lookahead -= s->match_length;
bflush = zng_tr_tally_dist(s, s->strstart - s->match_start, match_len - MIN_MATCH);
s->lookahead -= match_len;
/* Insert new strings in the hash table only if the match length
* is not too large. This saves time but degrades compression.
*/
if (s->match_length <= s->max_insert_length && s->lookahead >= MIN_MATCH) {
s->match_length--; /* string at strstart already in table */
if (match_len <= s->max_insert_length && s->lookahead >= MIN_MATCH) {
match_len--; /* string at strstart already in table */
s->strstart++;
#ifdef NOT_TWEAK_COMPILER
do {
functable.insert_string(s, s->strstart, 1);
s->strstart++;
/* strstart never exceeds WSIZE-MAX_MATCH, so there are
* always MIN_MATCH bytes ahead.
*/
} while (--s->match_length != 0);
#else
{
functable.insert_string(s, s->strstart, s->match_length);
s->strstart += s->match_length;
s->match_length = 0;
}
#endif
functable.insert_string(s, s->strstart, match_len);
s->strstart += match_len;
} else {
s->strstart += s->match_length;
s->match_length = 0;
s->ins_h = s->window[s->strstart];
#ifndef NOT_TWEAK_COMPILER
s->strstart += match_len;
#if MIN_MATCH != 3
functable.insert_string(s, s->strstart + 2 - MIN_MATCH, MIN_MATCH - 2);
#else
functable.insert_string(s, s->strstart + 2 - MIN_MATCH, 1);
#if MIN_MATCH != 3
#warning Call insert_string() MIN_MATCH-3 more times
#endif
functable.quick_insert_string(s, s->strstart + 2 - MIN_MATCH);
#endif
/* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
* matter since it will be recomputed at next deflate call.
*/
}
match_len = 0;
} else {
/* No match, output a literal byte */
Tracevv((stderr, "%c", s->window[s->strstart]));
_tr_tally_lit(s, s->window[s->strstart], bflush);
bflush = zng_tr_tally_lit(s, s->window[s->strstart]);
s->lookahead--;
s->strstart++;
}
if (bflush)
if (UNLIKELY(bflush))
FLUSH_BLOCK(s, 0);
}
s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
if (flush == Z_FINISH) {
if (UNLIKELY(flush == Z_FINISH)) {
FLUSH_BLOCK(s, 1);
return finish_done;
}
if (s->sym_next)
if (UNLIKELY(s->sym_next))
FLUSH_BLOCK(s, 0);
return block_done;
}

View File

@ -7,72 +7,50 @@
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifndef NO_MEDIUM_STRATEGY
#include <stdint.h>
#include "zbuild.h"
#include "deflate.h"
#include "deflate_p.h"
#include "match_p.h"
#include "functable.h"
struct match {
unsigned int match_start;
unsigned int match_length;
unsigned int strstart;
unsigned int orgstart;
uint16_t match_start;
uint16_t match_length;
uint16_t strstart;
uint16_t orgstart;
};
#define MAX_DIST2 ((1 << MAX_WBITS) - MIN_LOOKAHEAD)
static int tr_tally_dist(deflate_state *s, int distance, int length) {
return _tr_tally(s, distance, length);
}
static int tr_tally_lit(deflate_state *s, int c) {
return _tr_tally(s, 0, c);
}
static int emit_match(deflate_state *s, struct match match) {
int flush = 0;
int bflush = 0;
/* matches that are not long enough we need to emit as literals */
if (match.match_length < MIN_MATCH) {
while (match.match_length) {
flush += tr_tally_lit(s, s->window[match.strstart]);
bflush += zng_tr_tally_lit(s, s->window[match.strstart]);
s->lookahead--;
match.strstart++;
match.match_length--;
}
return flush;
return bflush;
}
check_match(s, match.strstart, match.match_start, match.match_length);
flush += tr_tally_dist(s, match.strstart - match.match_start, match.match_length - MIN_MATCH);
bflush += zng_tr_tally_dist(s, match.strstart - match.match_start, match.match_length - MIN_MATCH);
s->lookahead -= match.match_length;
return flush;
return bflush;
}
static void insert_match(deflate_state *s, struct match match) {
if (unlikely(s->lookahead <= match.match_length + MIN_MATCH))
if (UNLIKELY(s->lookahead <= (unsigned int)(match.match_length + MIN_MATCH)))
return;
/* matches that are not long enough we need to emit as literals */
if (match.match_length < MIN_MATCH) {
#ifdef NOT_TWEAK_COMPILER
while (match.match_length) {
if (LIKELY(match.match_length < MIN_MATCH)) {
match.strstart++;
match.match_length--;
if (match.match_length) {
if (match.strstart >= match.orgstart) {
functable.insert_string(s, match.strstart, 1);
}
}
}
#else
match.strstart++;
match.match_length--;
if (match.match_length > 0) {
if (UNLIKELY(match.match_length > 0)) {
if (match.strstart >= match.orgstart) {
if (match.strstart + match.match_length - 1 >= match.orgstart) {
functable.insert_string(s, match.strstart, match.match_length);
@ -83,7 +61,6 @@ static void insert_match(deflate_state *s, struct match match) {
match.match_length = 0;
}
}
#endif
return;
}
@ -93,39 +70,26 @@ static void insert_match(deflate_state *s, struct match match) {
if (match.match_length <= 16* s->max_insert_length && s->lookahead >= MIN_MATCH) {
match.match_length--; /* string at strstart already in table */
match.strstart++;
#ifdef NOT_TWEAK_COMPILER
do {
if (likely(match.strstart >= match.orgstart)) {
functable.insert_string(s, match.strstart, 1);
}
match.strstart++;
/* strstart never exceeds WSIZE-MAX_MATCH, so there are
* always MIN_MATCH bytes ahead.
*/
} while (--match.match_length != 0);
#else
if (likely(match.strstart >= match.orgstart)) {
if (likely(match.strstart + match.match_length - 1 >= match.orgstart)) {
if (LIKELY(match.strstart >= match.orgstart)) {
if (LIKELY(match.strstart + match.match_length - 1 >= match.orgstart)) {
functable.insert_string(s, match.strstart, match.match_length);
} else {
functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
}
} else if (match.orgstart < match.strstart + match.match_length) {
functable.insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart);
}
match.strstart += match.match_length;
match.match_length = 0;
#endif
} else {
match.strstart += match.match_length;
match.match_length = 0;
s->ins_h = s->window[match.strstart];
if (match.strstart >= (MIN_MATCH - 2))
#ifndef NOT_TWEAK_COMPILER
#if MIN_MATCH != 3
functable.insert_string(s, match.strstart + 2 - MIN_MATCH, MIN_MATCH - 2);
#else
functable.insert_string(s, match.strstart + 2 - MIN_MATCH, 1);
#if MIN_MATCH != 3
#warning Call insert_string() MIN_MATCH-3 more times
#endif
functable.quick_insert_string(s, match.strstart + 2 - MIN_MATCH);
#endif
/* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
* matter since it will be recomputed at next deflate call.
@ -134,7 +98,7 @@ static void insert_match(deflate_state *s, struct match match) {
}
static void fizzle_matches(deflate_state *s, struct match *current, struct match *next) {
IPos limit;
Pos limit;
unsigned char *match, *orig;
int changed = 0;
struct match c, n;
@ -143,36 +107,36 @@ static void fizzle_matches(deflate_state *s, struct match *current, struct match
if (current->match_length <= 1)
return;
if (unlikely(current->match_length > 1 + next->match_start))
if (UNLIKELY(current->match_length > 1 + next->match_start))
return;
if (unlikely(current->match_length > 1 + next->strstart))
if (UNLIKELY(current->match_length > 1 + next->strstart))
return;
match = s->window - current->match_length + 1 + next->match_start;
orig = s->window - current->match_length + 1 + next->strstart;
/* quick exit check.. if this fails then don't bother with anything else */
if (likely(*match != *orig))
if (LIKELY(*match != *orig))
return;
c = *current;
n = *next;
/* step one: try to move the "next" match to the left as much as possible */
limit = next->strstart > MAX_DIST2 ? next->strstart - MAX_DIST2 : 0;
limit = next->strstart > MAX_DIST(s) ? next->strstart - (Pos)MAX_DIST(s) : 0;
match = s->window + n.match_start - 1;
orig = s->window + n.strstart - 1;
while (*match == *orig) {
if (c.match_length < 1)
if (UNLIKELY(c.match_length < 1))
break;
if (n.strstart <= limit)
if (UNLIKELY(n.strstart <= limit))
break;
if (n.match_length >= 256)
if (UNLIKELY(n.match_length >= 256))
break;
if (n.match_start <= 1)
if (UNLIKELY(n.match_start <= 1))
break;
n.strstart--;
@ -196,15 +160,18 @@ static void fizzle_matches(deflate_state *s, struct match *current, struct match
}
}
ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
struct match current_match, next_match;
Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
/* Align the first struct to start on a new cacheline, this allows us to fit both structs in one cacheline */
ALIGNED_(16) struct match current_match;
struct match next_match;
memset(&current_match, 0, sizeof(struct match));
memset(&next_match, 0, sizeof(struct match));
for (;;) {
IPos hash_head = 0; /* head of the hash chain */
int bflush; /* set if current block must be flushed */
Pos hash_head = 0; /* head of the hash chain */
int bflush = 0; /* set if current block must be flushed */
int64_t dist;
/* Make sure that we always have enough lookahead, except
* at the end of the input file. We need MAX_MATCH bytes
@ -212,15 +179,14 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
* string following the next current_match.
*/
if (s->lookahead < MIN_LOOKAHEAD) {
functable.fill_window(s);
fill_window(s);
if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
return need_more;
}
if (s->lookahead == 0)
if (UNLIKELY(s->lookahead == 0))
break; /* flush the current block */
next_match.match_length = 0;
}
s->prev_length = 2;
/* Insert the string window[strstart .. strstart+2] in the
* dictionary, and set hash_head to the head of the hash chain:
@ -230,63 +196,63 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
if (next_match.match_length > 0) {
current_match = next_match;
next_match.match_length = 0;
} else {
hash_head = 0;
if (s->lookahead >= MIN_MATCH) {
hash_head = functable.insert_string(s, s->strstart, 1);
hash_head = functable.quick_insert_string(s, s->strstart);
}
/* set up the initial match to be a 1 byte literal */
current_match.match_start = 0;
current_match.match_length = 1;
current_match.strstart = s->strstart;
current_match.strstart = (uint16_t)s->strstart;
current_match.orgstart = current_match.strstart;
/* Find the longest match, discarding those <= prev_length.
* At this point we have always match_length < MIN_MATCH
*/
if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST2) {
dist = (int64_t)s->strstart - hash_head;
if (dist <= MAX_DIST(s) && dist > 0) {
/* To simplify the code, we prevent matches with the string
* of window index 0 (in particular we have to avoid a match
* of the string with itself at the start of the input file).
*/
current_match.match_length = longest_match(s, hash_head);
current_match.match_start = s->match_start;
if (current_match.match_length < MIN_MATCH)
current_match.match_length = (uint16_t)functable.longest_match(s, hash_head);
current_match.match_start = (uint16_t)s->match_start;
if (UNLIKELY(current_match.match_length < MIN_MATCH))
current_match.match_length = 1;
if (current_match.match_start >= current_match.strstart) {
if (UNLIKELY(current_match.match_start >= current_match.strstart)) {
/* this can happen due to some restarts */
current_match.match_length = 1;
}
} else {
/* Set up the match to be a 1 byte literal */
current_match.match_start = 0;
current_match.match_length = 1;
}
}
insert_match(s, current_match);
/* now, look ahead one */
if (s->lookahead > MIN_LOOKAHEAD && (current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD)) {
if (LIKELY(s->lookahead > MIN_LOOKAHEAD && (uint32_t)(current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD))) {
s->strstart = current_match.strstart + current_match.match_length;
hash_head = functable.insert_string(s, s->strstart, 1);
hash_head = functable.quick_insert_string(s, s->strstart);
/* set up the initial match to be a 1 byte literal */
next_match.match_start = 0;
next_match.match_length = 1;
next_match.strstart = s->strstart;
next_match.strstart = (uint16_t)s->strstart;
next_match.orgstart = next_match.strstart;
/* Find the longest match, discarding those <= prev_length.
* At this point we have always match_length < MIN_MATCH
*/
if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST2) {
dist = (int64_t)s->strstart - hash_head;
if (dist <= MAX_DIST(s) && dist > 0) {
/* To simplify the code, we prevent matches with the string
* of window index 0 (in particular we have to avoid a match
* of the string with itself at the start of the input file).
*/
next_match.match_length = longest_match(s, hash_head);
next_match.match_start = s->match_start;
if (next_match.match_start >= next_match.strstart) {
next_match.match_length = (uint16_t)functable.longest_match(s, hash_head);
next_match.match_start = (uint16_t)s->match_start;
if (UNLIKELY(next_match.match_start >= next_match.strstart)) {
/* this can happen due to some restarts */
next_match.match_length = 1;
}
@ -294,13 +260,13 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
next_match.match_length = 1;
else
fizzle_matches(s, &current_match, &next_match);
} else {
/* Set up the match to be a 1 byte literal */
next_match.match_start = 0;
next_match.match_length = 1;
}
/* short matches with a very long distance are rarely a good idea encoding wise */
if (next_match.match_length == 3 && (next_match.strstart - next_match.match_start) > 12000)
next_match.match_length = 1;
s->strstart = current_match.strstart;
} else {
next_match.match_length = 0;
}
@ -311,7 +277,7 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
/* move the "cursor" forward */
s->strstart += current_match.match_length;
if (bflush)
if (UNLIKELY(bflush))
FLUSH_BLOCK(s, 0);
}
s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
@ -319,7 +285,7 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
FLUSH_BLOCK(s, 1);
return finish_done;
}
if (s->sym_next)
if (UNLIKELY(s->sym_next))
FLUSH_BLOCK(s, 0);
return block_done;

View File

@ -12,39 +12,45 @@
/* Forward declare common non-inlined functions declared in deflate.c */
#ifdef ZLIB_DEBUG
void check_match(deflate_state *s, IPos start, IPos match, int length);
void check_match(deflate_state *s, Pos start, Pos match, int length);
#else
#define check_match(s, start, match, length)
#endif
void flush_pending(PREFIX3(stream) *strm);
/* ===========================================================================
* Insert string str in the dictionary and set match_head to the previous head
* of the hash chain (the most recent string with same hash key). Return
* the previous length of the hash chain.
* IN assertion: all calls to to INSERT_STRING are made with consecutive
* input characters and the first MIN_MATCH bytes of str are valid
* (except for the last MIN_MATCH-1 bytes of the input file).
* Save the match info and tally the frequency counts. Return true if
* the current block must be flushed.
*/
static inline Pos insert_string_c(deflate_state *const s, const Pos str, unsigned int count) {
Pos ret = 0;
unsigned int idx;
extern const unsigned char Z_INTERNAL zng_length_code[];
extern const unsigned char Z_INTERNAL zng_dist_code[];
for (idx = 0; idx < count; idx++) {
UPDATE_HASH(s, s->ins_h, str+idx);
static inline int zng_tr_tally_lit(deflate_state *s, unsigned char c) {
/* c is the unmatched char */
s->sym_buf[s->sym_next++] = 0;
s->sym_buf[s->sym_next++] = 0;
s->sym_buf[s->sym_next++] = c;
s->dyn_ltree[c].Freq++;
Tracevv((stderr, "%c", c));
Assert(c <= (MAX_MATCH-MIN_MATCH), "zng_tr_tally: bad literal");
return (s->sym_next == s->sym_end);
}
Pos head = s->head[s->ins_h];
if (head != str+idx) {
s->prev[(str+idx) & s->w_mask] = head;
s->head[s->ins_h] = str+idx;
if (idx == count - 1)
ret = head;
} else if (idx == count - 1) {
ret = str + idx;
}
}
return ret;
static inline int zng_tr_tally_dist(deflate_state *s, uint32_t dist, uint32_t len) {
/* dist: distance of matched string */
/* len: match length-MIN_MATCH */
s->sym_buf[s->sym_next++] = (uint8_t)(dist);
s->sym_buf[s->sym_next++] = (uint8_t)(dist >> 8);
s->sym_buf[s->sym_next++] = (uint8_t)len;
s->matches++;
dist--;
Assert(dist < MAX_DIST(s) && (uint16_t)d_code(dist) < (uint16_t)D_CODES,
"zng_tr_tally: bad match");
s->dyn_ltree[zng_length_code[len]+LITERALS+1].Freq++;
s->dyn_dtree[d_code(dist)].Freq++;
return (s->sym_next == s->sym_end);
}
/* ===========================================================================
@ -52,14 +58,13 @@ static inline Pos insert_string_c(deflate_state *const s, const Pos str, unsigne
* IN assertion: strstart is set to the end of the current match.
*/
#define FLUSH_BLOCK_ONLY(s, last) { \
_tr_flush_block(s, (s->block_start >= 0L ? \
zng_tr_flush_block(s, (s->block_start >= 0 ? \
(char *)&s->window[(unsigned)s->block_start] : \
NULL), \
(unsigned long)((long)s->strstart - s->block_start), \
(uint32_t)((int)s->strstart - s->block_start), \
(last)); \
s->block_start = s->strstart; \
s->block_start = (int)s->strstart; \
flush_pending(s->strm); \
Tracev((stderr, "[FLUSH]")); \
}
/* Same but force premature exit if necessary. */

121
libs/zlibng/deflate_quick.c Normal file
View File

@ -0,0 +1,121 @@
/*
* The deflate_quick deflate strategy, designed to be used when cycles are
* at a premium.
*
* Copyright (C) 2013 Intel Corporation. All rights reserved.
* Authors:
* Wajdi Feghali <wajdi.k.feghali@intel.com>
* Jim Guilford <james.guilford@intel.com>
* Vinodh Gopal <vinodh.gopal@intel.com>
* Erdinc Ozturk <erdinc.ozturk@intel.com>
* Jim Kukunas <james.t.kukunas@linux.intel.com>
*
* Portions are Copyright (C) 2016 12Sided Technology, LLC.
* Author:
* Phil Vachon <pvachon@12sidedtech.com>
*
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "deflate.h"
#include "deflate_p.h"
#include "functable.h"
#include "trees_emit.h"
extern const ct_data static_ltree[L_CODES+2];
extern const ct_data static_dtree[D_CODES];
#define QUICK_START_BLOCK(s, last) { \
zng_tr_emit_tree(s, STATIC_TREES, last); \
s->block_open = 1 + (int)last; \
s->block_start = (int)s->strstart; \
}
#define QUICK_END_BLOCK(s, last) { \
if (s->block_open) { \
zng_tr_emit_end_block(s, static_ltree, last); \
s->block_open = 0; \
s->block_start = (int)s->strstart; \
flush_pending(s->strm); \
if (s->strm->avail_out == 0) \
return (last) ? finish_started : need_more; \
} \
}
Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) {
Pos hash_head;
int64_t dist;
unsigned match_len, last;
last = (flush == Z_FINISH) ? 1 : 0;
if (UNLIKELY(last && s->block_open != 2)) {
/* Emit end of previous block */
QUICK_END_BLOCK(s, 0);
/* Emit start of last block */
QUICK_START_BLOCK(s, last);
} else if (UNLIKELY(s->block_open == 0 && s->lookahead > 0)) {
/* Start new block only when we have lookahead data, so that if no
input data is given an empty block will not be written */
QUICK_START_BLOCK(s, last);
}
for (;;) {
if (UNLIKELY(s->pending + ((BIT_BUF_SIZE + 7) >> 3) >= s->pending_buf_size)) {
flush_pending(s->strm);
if (s->strm->avail_out == 0) {
return (last && s->strm->avail_in == 0) ? finish_started : need_more;
}
}
if (UNLIKELY(s->lookahead < MIN_LOOKAHEAD)) {
fill_window(s);
if (UNLIKELY(s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH)) {
return need_more;
}
if (UNLIKELY(s->lookahead == 0))
break;
if (UNLIKELY(s->block_open == 0)) {
/* Start new block when we have lookahead data, so that if no
input data is given an empty block will not be written */
QUICK_START_BLOCK(s, last);
}
}
if (LIKELY(s->lookahead >= MIN_MATCH)) {
hash_head = functable.quick_insert_string(s, s->strstart);
dist = (int64_t)s->strstart - hash_head;
if (dist <= MAX_DIST(s) && dist > 0) {
match_len = functable.compare258(s->window + s->strstart, s->window + hash_head);
if (match_len >= MIN_MATCH) {
if (UNLIKELY(match_len > s->lookahead))
match_len = s->lookahead;
check_match(s, s->strstart, hash_head, match_len);
zng_tr_emit_dist(s, static_ltree, static_dtree, match_len - MIN_MATCH, (uint32_t)dist);
s->lookahead -= match_len;
s->strstart += match_len;
continue;
}
}
}
zng_tr_emit_lit(s, static_ltree, s->window[s->strstart]);
s->strstart++;
s->lookahead--;
}
s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
if (UNLIKELY(last)) {
QUICK_END_BLOCK(s, 1);
return finish_done;
}
QUICK_END_BLOCK(s, 0);
return block_done;
}

View File

@ -7,26 +7,18 @@
#include "zbuild.h"
#include "deflate.h"
#include "deflate_p.h"
#include "match_p.h"
#include "functable.h"
/* ===========================================================================
* Local data
*/
#ifndef TOO_FAR
# define TOO_FAR 4096
#endif
/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
/* ===========================================================================
* Same as deflate_medium, but achieves better compression. We use a lazy
* evaluation for matches: a match is finally adopted only if there is
* no better match at the next window position.
*/
ZLIB_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
IPos hash_head; /* head of hash chain */
Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
Pos hash_head; /* head of hash chain */
int bflush; /* set if current block must be flushed */
int64_t dist;
uint32_t match_len;
/* Process the input block. */
for (;;) {
@ -36,57 +28,53 @@ ZLIB_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
* string following the next match.
*/
if (s->lookahead < MIN_LOOKAHEAD) {
functable.fill_window(s);
if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
fill_window(s);
if (UNLIKELY(s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH)) {
return need_more;
}
if (s->lookahead == 0)
if (UNLIKELY(s->lookahead == 0))
break; /* flush the current block */
}
/* Insert the string window[strstart .. strstart+2] in the
* dictionary, and set hash_head to the head of the hash chain:
*/
hash_head = NIL;
if (s->lookahead >= MIN_MATCH) {
hash_head = functable.insert_string(s, s->strstart, 1);
hash_head = 0;
if (LIKELY(s->lookahead >= MIN_MATCH)) {
hash_head = functable.quick_insert_string(s, s->strstart);
}
/* Find the longest match, discarding those <= prev_length.
*/
s->prev_length = s->match_length, s->prev_match = s->match_start;
s->match_length = MIN_MATCH-1;
s->prev_match = (Pos)s->match_start;
match_len = MIN_MATCH-1;
dist = (int64_t)s->strstart - hash_head;
if (hash_head != NIL && s->prev_length < s->max_lazy_match && s->strstart - hash_head <= MAX_DIST(s)) {
if (dist <= MAX_DIST(s) && dist > 0 && s->prev_length < s->max_lazy_match) {
/* To simplify the code, we prevent matches with the string
* of window index 0 (in particular we have to avoid a match
* of the string with itself at the start of the input file).
*/
s->match_length = longest_match(s, hash_head);
match_len = functable.longest_match(s, hash_head);
/* longest_match() sets match_start */
if (s->match_length <= 5 && (s->strategy == Z_FILTERED
#if TOO_FAR <= 32767
|| (s->match_length == MIN_MATCH && s->strstart - s->match_start > TOO_FAR)
#endif
)) {
if (match_len <= 5 && (s->strategy == Z_FILTERED)) {
/* If prev_match is also MIN_MATCH, match_start is garbage
* but we will ignore the current match anyway.
*/
s->match_length = MIN_MATCH-1;
match_len = MIN_MATCH-1;
}
}
/* If there was a match at the previous step and the current
* match is not better, output the previous match:
*/
if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
if (s->prev_length >= MIN_MATCH && match_len <= s->prev_length) {
unsigned int max_insert = s->strstart + s->lookahead - MIN_MATCH;
/* Do not insert strings in hash table beyond this. */
check_match(s, s->strstart-1, s->prev_match, s->prev_length);
_tr_tally_dist(s, s->strstart -1 - s->prev_match, s->prev_length - MIN_MATCH, bflush);
bflush = zng_tr_tally_dist(s, s->strstart -1 - s->prev_match, s->prev_length - MIN_MATCH);
/* Insert in hash table all strings up to the end of the match.
* strstart-1 and strstart are already inserted. If there is not
@ -95,70 +83,55 @@ ZLIB_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
*/
s->lookahead -= s->prev_length-1;
#ifdef NOT_TWEAK_COMPILER
s->prev_length -= 2;
do {
if (++s->strstart <= max_insert) {
functable.insert_string(s, s->strstart, 1);
}
} while (--s->prev_length != 0);
s->match_available = 0;
s->match_length = MIN_MATCH-1;
s->strstart++;
#else
{
unsigned int mov_fwd = s->prev_length - 2;
if (max_insert > s->strstart) {
unsigned int insert_cnt = mov_fwd;
if (unlikely(insert_cnt > max_insert - s->strstart))
if (UNLIKELY(insert_cnt > max_insert - s->strstart))
insert_cnt = max_insert - s->strstart;
functable.insert_string(s, s->strstart + 1, insert_cnt);
}
s->prev_length = 0;
s->match_available = 0;
s->match_length = MIN_MATCH-1;
s->strstart += mov_fwd + 1;
}
#endif /*NOT_TWEAK_COMPILER*/
if (bflush) FLUSH_BLOCK(s, 0);
if (UNLIKELY(bflush))
FLUSH_BLOCK(s, 0);
} else if (s->match_available) {
/* If there was no match at the previous position, output a
* single literal. If there was a match but the current match
* is longer, truncate the previous match to a single literal.
*/
Tracevv((stderr, "%c", s->window[s->strstart-1]));
_tr_tally_lit(s, s->window[s->strstart-1], bflush);
if (bflush) {
bflush = zng_tr_tally_lit(s, s->window[s->strstart-1]);
if (UNLIKELY(bflush))
FLUSH_BLOCK_ONLY(s, 0);
}
s->prev_length = match_len;
s->strstart++;
s->lookahead--;
if (s->strm->avail_out == 0)
if (UNLIKELY(s->strm->avail_out == 0))
return need_more;
} else {
/* There is no previous match to compare with, wait for
* the next step to decide.
*/
s->prev_length = match_len;
s->match_available = 1;
s->strstart++;
s->lookahead--;
}
}
Assert(flush != Z_NO_FLUSH, "no flush?");
if (s->match_available) {
Tracevv((stderr, "%c", s->window[s->strstart-1]));
_tr_tally_lit(s, s->window[s->strstart-1], bflush);
if (UNLIKELY(s->match_available)) {
(void) zng_tr_tally_lit(s, s->window[s->strstart-1]);
s->match_available = 0;
}
s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
if (flush == Z_FINISH) {
if (UNLIKELY(flush == Z_FINISH)) {
FLUSH_BLOCK(s, 1);
return finish_done;
}
if (s->sym_next)
if (UNLIKELY(s->sym_next))
FLUSH_BLOCK(s, 0);
return block_done;
}

View File

@ -0,0 +1,44 @@
#ifndef X86_BUILTIN_CTZ_H
#define X86_BUILTIN_CTZ_H
#if defined(_MSC_VER) && !defined(__clang__)
#if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined(_M_ARM) || defined(_M_ARM64)
#include <intrin.h>
#ifdef X86_FEATURES
# include "arch/x86/x86.h"
#endif
/* This is not a general purpose replacement for __builtin_ctz. The function expects that value is != 0
* Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward is not checked
*/
static __forceinline unsigned long __builtin_ctz(uint32_t value) {
#ifdef X86_FEATURES
if (x86_cpu_has_tzcnt)
return _tzcnt_u32(value);
#endif
unsigned long trailing_zero;
_BitScanForward(&trailing_zero, value);
return trailing_zero;
}
#define HAVE_BUILTIN_CTZ
#ifdef _M_AMD64
/* This is not a general purpose replacement for __builtin_ctzll. The function expects that value is != 0
* Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward64 is not checked
*/
static __forceinline unsigned long long __builtin_ctzll(uint64_t value) {
#ifdef X86_FEATURES
if (x86_cpu_has_tzcnt)
return _tzcnt_u64(value);
#endif
unsigned long trailing_zero;
_BitScanForward64(&trailing_zero, value);
return trailing_zero;
}
#define HAVE_BUILTIN_CTZLL
#endif
#endif
#endif
#endif

View File

@ -4,40 +4,95 @@
*/
#include "zbuild.h"
#include "functable.h"
#include "zendian.h"
#include "deflate.h"
#include "deflate_p.h"
#include "gzendian.h"
#include "functable.h"
/* insert_string */
#ifdef X86_SSE4_2_CRC_HASH
extern Pos insert_string_sse(deflate_state *const s, const Pos str, unsigned int count);
#elif defined(ARM_ACLE_CRC_HASH)
extern Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count);
#ifdef X86_FEATURES
# include "fallback_builtins.h"
#endif
/* fill_window */
/* insert_string */
extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count);
#ifdef X86_SSE42_CRC_HASH
extern void insert_string_sse4(deflate_state *const s, const uint32_t str, uint32_t count);
#elif defined(ARM_ACLE_CRC_HASH)
extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count);
#endif
/* quick_insert_string */
extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str);
#ifdef X86_SSE42_CRC_HASH
extern Pos quick_insert_string_sse4(deflate_state *const s, const uint32_t str);
#elif defined(ARM_ACLE_CRC_HASH)
extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str);
#endif
/* slide_hash */
#ifdef X86_SSE2
extern void fill_window_sse(deflate_state *s);
#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
extern void fill_window_arm(deflate_state *s);
void slide_hash_sse2(deflate_state *s);
#elif defined(ARM_NEON_SLIDEHASH)
void slide_hash_neon(deflate_state *s);
#elif defined(POWER8_VSX_SLIDEHASH)
void slide_hash_power8(deflate_state *s);
#endif
#ifdef X86_AVX2
void slide_hash_avx2(deflate_state *s);
#endif
/* adler32 */
extern uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len);
#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(ARM_NEON_ADLER32)
#ifdef ARM_NEON_ADLER32
extern uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len);
#endif
ZLIB_INTERNAL uint32_t crc32_generic(uint32_t, const unsigned char *, uint64_t);
#ifdef DYNAMIC_CRC_TABLE
extern volatile int crc_table_empty;
extern void make_crc_table(void);
#ifdef X86_SSSE3_ADLER32
extern uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, size_t len);
#endif
#ifdef X86_AVX2_ADLER32
extern uint32_t adler32_avx2(uint32_t adler, const unsigned char *buf, size_t len);
#endif
#ifdef POWER8_VSX_ADLER32
extern uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, size_t len);
#endif
#ifdef __ARM_FEATURE_CRC32
/* memory chunking */
extern uint32_t chunksize_c(void);
extern uint8_t* chunkcopy_c(uint8_t *out, uint8_t const *from, unsigned len);
extern uint8_t* chunkcopy_safe_c(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
extern uint8_t* chunkunroll_c(uint8_t *out, unsigned *dist, unsigned *len);
extern uint8_t* chunkmemset_c(uint8_t *out, unsigned dist, unsigned len);
extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left);
#ifdef X86_SSE2_CHUNKSET
extern uint32_t chunksize_sse2(void);
extern uint8_t* chunkcopy_sse2(uint8_t *out, uint8_t const *from, unsigned len);
extern uint8_t* chunkcopy_safe_sse2(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
extern uint8_t* chunkunroll_sse2(uint8_t *out, unsigned *dist, unsigned *len);
extern uint8_t* chunkmemset_sse2(uint8_t *out, unsigned dist, unsigned len);
extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
#endif
#ifdef X86_AVX_CHUNKSET
extern uint32_t chunksize_avx(void);
extern uint8_t* chunkcopy_avx(uint8_t *out, uint8_t const *from, unsigned len);
extern uint8_t* chunkcopy_safe_avx(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
extern uint8_t* chunkunroll_avx(uint8_t *out, unsigned *dist, unsigned *len);
extern uint8_t* chunkmemset_avx(uint8_t *out, unsigned dist, unsigned len);
extern uint8_t* chunkmemset_safe_avx(uint8_t *out, unsigned dist, unsigned len, unsigned left);
#endif
#ifdef ARM_NEON_CHUNKSET
extern uint32_t chunksize_neon(void);
extern uint8_t* chunkcopy_neon(uint8_t *out, uint8_t const *from, unsigned len);
extern uint8_t* chunkcopy_safe_neon(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
extern uint8_t* chunkunroll_neon(uint8_t *out, unsigned *dist, unsigned *len);
extern uint8_t* chunkmemset_neon(uint8_t *out, unsigned dist, unsigned len);
extern uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left);
#endif
/* CRC32 */
Z_INTERNAL uint32_t crc32_generic(uint32_t, const unsigned char *, uint64_t);
#ifdef ARM_ACLE_CRC_HASH
extern uint32_t crc32_acle(uint32_t, const unsigned char *, uint64_t);
#endif
@ -47,76 +102,287 @@ extern uint32_t crc32_little(uint32_t, const unsigned char *, uint64_t);
extern uint32_t crc32_big(uint32_t, const unsigned char *, uint64_t);
#endif
/* stub definitions */
ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsigned int count);
ZLIB_INTERNAL void fill_window_stub(deflate_state *s);
ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len);
ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len);
/* compare258 */
extern uint32_t compare258_c(const unsigned char *src0, const unsigned char *src1);
#ifdef UNALIGNED_OK
extern uint32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char *src1);
extern uint32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char *src1);
#ifdef UNALIGNED64_OK
extern uint32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char *src1);
#endif
#ifdef X86_SSE42_CMP_STR
extern uint32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1);
#endif
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
extern uint32_t compare258_unaligned_avx2(const unsigned char *src0, const unsigned char *src1);
#endif
#endif
/* functable init */
ZLIB_INTERNAL __thread struct functable_s functable = {fill_window_stub,insert_string_stub,adler32_stub,crc32_stub};
/* longest_match */
extern uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
#ifdef UNALIGNED_OK
extern uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
extern uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match);
#ifdef UNALIGNED64_OK
extern uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
#endif
#ifdef X86_SSE42_CMP_STR
extern uint32_t longest_match_unaligned_sse4(deflate_state *const s, Pos cur_match);
#endif
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
extern uint32_t longest_match_unaligned_avx2(deflate_state *const s, Pos cur_match);
#endif
#endif
Z_INTERNAL Z_TLS struct functable_s functable;
Z_INTERNAL void cpu_check_features(void)
{
static int features_checked = 0;
if (features_checked)
return;
#if defined(X86_FEATURES)
x86_check_features();
#elif defined(ARM_FEATURES)
arm_check_features();
#elif defined(POWER_FEATURES)
power_check_features();
#endif
features_checked = 1;
}
/* stub functions */
ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsigned int count) {
Z_INTERNAL void insert_string_stub(deflate_state *const s, const uint32_t str, uint32_t count) {
// Initialize default
functable.insert_string=&insert_string_c;
#ifdef X86_SSE4_2_CRC_HASH
functable.insert_string = &insert_string_c;
cpu_check_features();
#ifdef X86_SSE42_CRC_HASH
if (x86_cpu_has_sse42)
functable.insert_string=&insert_string_sse;
#elif defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH)
functable.insert_string = &insert_string_sse4;
#elif defined(ARM_ACLE_CRC_HASH)
if (arm_cpu_has_crc32)
functable.insert_string = &insert_string_acle;
#endif
return functable.insert_string(s, str, count);
functable.insert_string(s, str, count);
}
ZLIB_INTERNAL void fill_window_stub(deflate_state *s) {
// Initialize default
functable.fill_window=&fill_window_c;
Z_INTERNAL Pos quick_insert_string_stub(deflate_state *const s, const uint32_t str) {
functable.quick_insert_string = &quick_insert_string_c;
#ifdef X86_SSE42_CRC_HASH
if (x86_cpu_has_sse42)
functable.quick_insert_string = &quick_insert_string_sse4;
#elif defined(ARM_ACLE_CRC_HASH)
if (arm_cpu_has_crc32)
functable.quick_insert_string = &quick_insert_string_acle;
#endif
return functable.quick_insert_string(s, str);
}
Z_INTERNAL void slide_hash_stub(deflate_state *s) {
functable.slide_hash = &slide_hash_c;
cpu_check_features();
#ifdef X86_SSE2
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
if (x86_cpu_has_sse2)
# endif
functable.fill_window=&fill_window_sse;
#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
functable.fill_window=&fill_window_arm;
functable.slide_hash = &slide_hash_sse2;
#elif defined(ARM_NEON_SLIDEHASH)
# ifndef ARM_NOCHECK_NEON
if (arm_cpu_has_neon)
# endif
functable.slide_hash = &slide_hash_neon;
#endif
#ifdef X86_AVX2
if (x86_cpu_has_avx2)
functable.slide_hash = &slide_hash_avx2;
#endif
#ifdef POWER8_VSX_SLIDEHASH
if (power_cpu_has_arch_2_07)
functable.slide_hash = &slide_hash_power8;
#endif
functable.fill_window(s);
functable.slide_hash(s);
}
ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len) {
Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len) {
// Initialize default
functable.adler32 = &adler32_c;
cpu_check_features();
#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(ARM_NEON_ADLER32)
#ifdef ARM_NEON_ADLER32
# ifndef ARM_NOCHECK_NEON
if (arm_cpu_has_neon)
# endif
functable.adler32 = &adler32_neon;
#endif
#ifdef X86_SSSE3_ADLER32
if (x86_cpu_has_ssse3)
functable.adler32 = &adler32_ssse3;
#endif
#ifdef X86_AVX2_ADLER32
if (x86_cpu_has_avx2)
functable.adler32 = &adler32_avx2;
#endif
#ifdef POWER8_VSX_ADLER32
if (power_cpu_has_arch_2_07)
functable.adler32 = &adler32_power8;
#endif
return functable.adler32(adler, buf, len);
}
ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len) {
Z_INTERNAL uint32_t chunksize_stub(void) {
// Initialize default
functable.chunksize = &chunksize_c;
#ifdef X86_SSE2_CHUNKSET
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
if (x86_cpu_has_sse2)
# endif
functable.chunksize = &chunksize_sse2;
#endif
#ifdef X86_AVX_CHUNKSET
if (x86_cpu_has_avx2)
functable.chunksize = &chunksize_avx;
#endif
#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunksize = &chunksize_neon;
#endif
return functable.chunksize();
}
Z_INTERNAL uint8_t* chunkcopy_stub(uint8_t *out, uint8_t const *from, unsigned len) {
// Initialize default
functable.chunkcopy = &chunkcopy_c;
#ifdef X86_SSE2_CHUNKSET
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
if (x86_cpu_has_sse2)
# endif
functable.chunkcopy = &chunkcopy_sse2;
#endif
#ifdef X86_AVX_CHUNKSET
if (x86_cpu_has_avx2)
functable.chunkcopy = &chunkcopy_avx;
#endif
#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunkcopy = &chunkcopy_neon;
#endif
return functable.chunkcopy(out, from, len);
}
Z_INTERNAL uint8_t* chunkcopy_safe_stub(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe) {
// Initialize default
functable.chunkcopy_safe = &chunkcopy_safe_c;
#ifdef X86_SSE2_CHUNKSET
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
if (x86_cpu_has_sse2)
# endif
functable.chunkcopy_safe = &chunkcopy_safe_sse2;
#endif
#ifdef X86_AVX_CHUNKSET
if (x86_cpu_has_avx2)
functable.chunkcopy_safe = &chunkcopy_safe_avx;
#endif
#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunkcopy_safe = &chunkcopy_safe_neon;
#endif
return functable.chunkcopy_safe(out, from, len, safe);
}
Z_INTERNAL uint8_t* chunkunroll_stub(uint8_t *out, unsigned *dist, unsigned *len) {
// Initialize default
functable.chunkunroll = &chunkunroll_c;
#ifdef X86_SSE2_CHUNKSET
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
if (x86_cpu_has_sse2)
# endif
functable.chunkunroll = &chunkunroll_sse2;
#endif
#ifdef X86_AVX_CHUNKSET
if (x86_cpu_has_avx2)
functable.chunkunroll = &chunkunroll_avx;
#endif
#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunkunroll = &chunkunroll_neon;
#endif
return functable.chunkunroll(out, dist, len);
}
Z_INTERNAL uint8_t* chunkmemset_stub(uint8_t *out, unsigned dist, unsigned len) {
// Initialize default
functable.chunkmemset = &chunkmemset_c;
#ifdef X86_SSE2_CHUNKSET
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
if (x86_cpu_has_sse2)
# endif
functable.chunkmemset = &chunkmemset_sse2;
#endif
#ifdef X86_AVX_CHUNKSET
if (x86_cpu_has_avx2)
functable.chunkmemset = &chunkmemset_avx;
#endif
#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunkmemset = &chunkmemset_neon;
#endif
return functable.chunkmemset(out, dist, len);
}
Z_INTERNAL uint8_t* chunkmemset_safe_stub(uint8_t *out, unsigned dist, unsigned len, unsigned left) {
// Initialize default
functable.chunkmemset_safe = &chunkmemset_safe_c;
#ifdef X86_SSE2_CHUNKSET
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
if (x86_cpu_has_sse2)
# endif
functable.chunkmemset_safe = &chunkmemset_safe_sse2;
#endif
#ifdef X86_AVX_CHUNKSET
if (x86_cpu_has_avx2)
functable.chunkmemset_safe = &chunkmemset_safe_avx;
#endif
#ifdef ARM_NEON_CHUNKSET
if (arm_cpu_has_neon)
functable.chunkmemset_safe = &chunkmemset_safe_neon;
#endif
return functable.chunkmemset_safe(out, dist, len, left);
}
Z_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len) {
int32_t use_byfour = sizeof(void *) == sizeof(ptrdiff_t);
Assert(sizeof(uint64_t) >= sizeof(size_t),
"crc32_z takes size_t but internally we have a uint64_t len");
/* return a function pointer for optimized arches here after a capability test */
#ifdef DYNAMIC_CRC_TABLE
if (crc_table_empty)
make_crc_table();
#endif /* DYNAMIC_CRC_TABLE */
cpu_check_features();
if (sizeof(void *) == sizeof(ptrdiff_t)) {
if (use_byfour) {
#if BYTE_ORDER == LITTLE_ENDIAN
functable.crc32 = crc32_little;
# if defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH)
# if defined(ARM_ACLE_CRC_HASH)
if (arm_cpu_has_crc32)
functable.crc32 = crc32_acle;
# endif
@ -131,3 +397,70 @@ ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64
return functable.crc32(crc, buf, len);
}
Z_INTERNAL uint32_t compare258_stub(const unsigned char *src0, const unsigned char *src1) {
functable.compare258 = &compare258_c;
#ifdef UNALIGNED_OK
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
functable.compare258 = &compare258_unaligned_64;
# elif defined(HAVE_BUILTIN_CTZ)
functable.compare258 = &compare258_unaligned_32;
# else
functable.compare258 = &compare258_unaligned_16;
# endif
# ifdef X86_SSE42_CMP_STR
if (x86_cpu_has_sse42)
functable.compare258 = &compare258_unaligned_sse4;
# endif
# if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
if (x86_cpu_has_avx2)
functable.compare258 = &compare258_unaligned_avx2;
# endif
#endif
return functable.compare258(src0, src1);
}
Z_INTERNAL uint32_t longest_match_stub(deflate_state *const s, Pos cur_match) {
functable.longest_match = &longest_match_c;
#ifdef UNALIGNED_OK
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
functable.longest_match = &longest_match_unaligned_64;
# elif defined(HAVE_BUILTIN_CTZ)
functable.longest_match = &longest_match_unaligned_32;
# else
functable.longest_match = &longest_match_unaligned_16;
# endif
# ifdef X86_SSE42_CMP_STR
if (x86_cpu_has_sse42)
functable.longest_match = &longest_match_unaligned_sse4;
# endif
# if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
if (x86_cpu_has_avx2)
functable.longest_match = &longest_match_unaligned_avx2;
# endif
#endif
return functable.longest_match(s, cur_match);
}
/* functable init */
Z_INTERNAL Z_TLS struct functable_s functable = {
insert_string_stub,
quick_insert_string_stub,
adler32_stub,
crc32_stub,
slide_hash_stub,
compare258_stub,
longest_match_stub,
chunksize_stub,
chunkcopy_stub,
chunkcopy_safe_stub,
chunkunroll_stub,
chunkmemset_stub,
chunkmemset_safe_stub
};

View File

@ -9,13 +9,21 @@
#include "deflate.h"
struct functable_s {
void (* fill_window) (deflate_state *s);
Pos (* insert_string) (deflate_state *const s, const Pos str, unsigned int count);
void (* insert_string) (deflate_state *const s, const uint32_t str, uint32_t count);
Pos (* quick_insert_string)(deflate_state *const s, const uint32_t str);
uint32_t (* adler32) (uint32_t adler, const unsigned char *buf, size_t len);
uint32_t (* crc32) (uint32_t crc, const unsigned char *buf, uint64_t len);
void (* slide_hash) (deflate_state *s);
uint32_t (* compare258) (const unsigned char *src0, const unsigned char *src1);
uint32_t (* longest_match) (deflate_state *const s, Pos cur_match);
uint32_t (* chunksize) (void);
uint8_t* (* chunkcopy) (uint8_t *out, uint8_t const *from, unsigned len);
uint8_t* (* chunkcopy_safe) (uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
uint8_t* (* chunkunroll) (uint8_t *out, unsigned *dist, unsigned *len);
uint8_t* (* chunkmemset) (uint8_t *out, unsigned dist, unsigned len);
uint8_t* (* chunkmemset_safe) (uint8_t *out, unsigned dist, unsigned len, unsigned left);
};
ZLIB_INTERNAL extern __thread struct functable_s functable;
Z_INTERNAL extern Z_TLS struct functable_s functable;
#endif

View File

@ -1,24 +0,0 @@
/* gzclose.c -- zlib gzclose() function
* Copyright (C) 2004, 2010 Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zbuild.h"
#include "gzguts.h"
/* gzclose() is in a separate file so that it is linked in only if it is used.
That way the other gzclose functions can be used instead to avoid linking in
unneeded compression or decompression routines. */
int ZEXPORT PREFIX(gzclose)(gzFile file) {
#ifndef NO_GZCOMPRESS
gz_state *state;
if (file == NULL)
return Z_STREAM_ERROR;
state = (gz_state *)file;
return state->mode == GZ_READ ? PREFIX(gzclose_r)(file) : PREFIX(gzclose_w)(file);
#else
return PREFIX(gzclose_r)(file);
#endif
}

Some files were not shown because too many files have changed in this diff Show More