mirror of
https://github.com/EQEmu/Server.git
synced 2025-12-11 16:51:29 +00:00
[Library] Update zlibng (#1255)
* Update zlibng * Set cmake path more directly in zlibng to hopefully fix an issue with the build on drone * I'm dumb, missing / in path * Mackal helps with a dumb gitignore issue * Adding all the files, not sure what's ignoring them and im tired of looking * Some tweaks to zlibng build to hopefully get it to build properly. works on msvc now
This commit is contained in:
parent
e6dee96266
commit
2957f5084d
@ -252,6 +252,7 @@ IF(ZLIB_FOUND)
|
||||
SET(ZLIB_LIBRARY_TYPE "zlib-ng")
|
||||
SET(ZLIB_LIBRARY_LIBS "zlibstatic")
|
||||
SET(ZLIB_LIBRARY_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/libs/zlibng")
|
||||
INCLUDE_DIRECTORIES(SYSTEM "${CMAKE_CURRENT_BINARY_DIR}/libs/zlibng")
|
||||
ELSE()
|
||||
SET(ZLIB_LIBRARY_TYPE " zlib")
|
||||
SET(ZLIB_LIBRARY_LIBS ${ZLIB_LIBRARY})
|
||||
|
||||
39
libs/zlibng/.github/workflows/analyze.yml
vendored
Normal file
39
libs/zlibng/.github/workflows/analyze.yml
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
name: CI Static Analysis
|
||||
on: [push, pull_request]
|
||||
jobs:
|
||||
GCC-10:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Install packages (Ubuntu)
|
||||
run: |
|
||||
sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y gcc-10
|
||||
- name: Generate project files
|
||||
run: |
|
||||
cmake . -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DWITH_FUZZERS=OFF -DWITH_CODE_COVERAGE=OFF -DWITH_MAINTAINER_WARNINGS=OFF
|
||||
env:
|
||||
CC: gcc-10
|
||||
CFLAGS: "-fanalyzer -Werror -Wanalyzer-double-fclose -Wanalyzer-double-free -Wanalyzer-exposure-through-output-file -Wanalyzer-file-leak -Wanalyzer-free-of-non-heap -Wanalyzer-malloc-leak -Wanalyzer-null-argument -Wanalyzer-null-dereference -Wanalyzer-possible-null-argument -Wanalyzer-possible-null-dereference -Wanalyzer-stale-setjmp-buffer -Wanalyzer-tainted-array-index -Wanalyzer-unsafe-call-within-signal-handler -Wanalyzer-use-after-free -Wanalyzer-use-of-pointer-in-stale-stack-frame"
|
||||
CI: true
|
||||
- name: Compile source code
|
||||
run: |
|
||||
cmake --build . --config Release > /dev/null
|
||||
Clang-12:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
- name: Install packages (Ubuntu)
|
||||
run: |
|
||||
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
|
||||
sudo apt-add-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic main" -y
|
||||
sudo apt install clang-tools-12 -y
|
||||
- name: Generate project files
|
||||
run: |
|
||||
scan-build-12 --status-bugs cmake . -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DWITH_FUZZERS=OFF -DWITH_CODE_COVERAGE=OFF -DWITH_MAINTAINER_WARNINGS=OFF
|
||||
env:
|
||||
CI: true
|
||||
- name: Compile source code
|
||||
run: |
|
||||
scan-build-12 --status-bugs cmake --build . --config Release > /dev/null
|
||||
381
libs/zlibng/.github/workflows/cmake.yml
vendored
Normal file
381
libs/zlibng/.github/workflows/cmake.yml
vendored
Normal file
@ -0,0 +1,381 @@
|
||||
name: CI CMake
|
||||
on: [push, pull_request]
|
||||
jobs:
|
||||
ci-cmake:
|
||||
name: ${{ matrix.name }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- name: Ubuntu GCC
|
||||
os: ubuntu-latest
|
||||
compiler: gcc
|
||||
cmake-args: -DWITH_SANITIZER=Address
|
||||
codecov: ubuntu_gcc
|
||||
|
||||
- name: Ubuntu GCC OSB -O1 No Unaligned64
|
||||
os: ubuntu-latest
|
||||
compiler: gcc
|
||||
cmake-args: -DWITH_UNALIGNED=ON -DUNALIGNED64_OK=OFF -DWITH_SANITIZER=Undefined
|
||||
build-dir: ../build
|
||||
build-src-dir: ../zlib-ng
|
||||
codecov: ubuntu_gcc_osb
|
||||
cflags: -O1 -g3
|
||||
|
||||
- name: Ubuntu GCC -O3 No Unaligned
|
||||
os: ubuntu-latest
|
||||
compiler: gcc
|
||||
cmake-args: -DWITH_UNALIGNED=OFF
|
||||
codecov: ubuntu_gcc_o3
|
||||
cflags: -O3
|
||||
|
||||
- name: Ubuntu GCC Link Zlib
|
||||
os: ubuntu-latest
|
||||
compiler: gcc
|
||||
cmake-args: -DZLIB_DUAL_LINK=ON
|
||||
|
||||
- name: Ubuntu GCC No AVX2
|
||||
os: ubuntu-latest
|
||||
compiler: gcc
|
||||
cmake-args: -DWITH_AVX2=OFF -DWITH_SANITIZER=Undefined
|
||||
codecov: ubuntu_gcc_no_avx2
|
||||
|
||||
- name: Ubuntu GCC No SSE2
|
||||
os: ubuntu-latest
|
||||
compiler: gcc
|
||||
cmake-args: -DWITH_SSE2=OFF -DWITH_SANITIZER=Undefined
|
||||
codecov: ubuntu_gcc_no_sse2
|
||||
|
||||
- name: Ubuntu GCC No SSE4
|
||||
os: ubuntu-latest
|
||||
compiler: gcc
|
||||
cmake-args: -DWITH_SSE4=OFF -DWITH_SANITIZER=Undefined
|
||||
codecov: ubuntu_gcc_no_sse4
|
||||
|
||||
- name: Ubuntu GCC No PCLMULQDQ
|
||||
os: ubuntu-latest
|
||||
compiler: gcc
|
||||
cmake-args: -DWITH_PCLMULQDQ=OFF -DWITH_SANITIZER=Undefined
|
||||
codecov: ubuntu_gcc_no_pclmulqdq
|
||||
|
||||
- name: Ubuntu GCC Compat No Opt
|
||||
os: ubuntu-latest
|
||||
compiler: gcc
|
||||
cmake-args: -DZLIB_COMPAT=ON -DWITH_NEW_STRATEGIES=OFF -DWITH_OPTIM=OFF -DWITH_SANITIZER=Address
|
||||
codecov: ubuntu_gcc_compat_no_opt
|
||||
cflags: -DNOT_TWEAK_COMPILER
|
||||
|
||||
- name: Ubuntu GCC ARM SF
|
||||
os: ubuntu-latest
|
||||
compiler: arm-linux-gnueabi-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi -DWITH_SANITIZER=Address
|
||||
packages: qemu gcc-arm-linux-gnueabi libc-dev-armel-cross
|
||||
codecov: ubuntu_gcc_armsf
|
||||
|
||||
- name: Ubuntu GCC ARM SF Compat No Opt
|
||||
os: ubuntu-latest
|
||||
compiler: arm-linux-gnueabi-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi -DZLIB_COMPAT=ON -DWITH_NEW_STRATEGIES=OFF -DWITH_OPTIM=OFF -DWITH_SANITIZER=Undefined
|
||||
packages: qemu gcc-arm-linux-gnueabi libc-dev-armel-cross
|
||||
codecov: ubuntu_gcc_armsf_compat_no_opt
|
||||
|
||||
- name: Ubuntu GCC ARM HF
|
||||
os: ubuntu-latest
|
||||
compiler: arm-linux-gnueabihf-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf -DWITH_SANITIZER=Address
|
||||
packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
|
||||
codecov: ubuntu_gcc_armhf
|
||||
|
||||
- name: Ubuntu GCC ARM HF No ACLE
|
||||
os: ubuntu-latest
|
||||
compiler: arm-linux-gnueabihf-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf -DWITH_ACLE=OFF -DWITH_SANITIZER=Address
|
||||
packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
|
||||
codecov: ubuntu_gcc_armhf_no_acle
|
||||
|
||||
- name: Ubuntu GCC ARM HF No NEON
|
||||
os: ubuntu-latest
|
||||
compiler: arm-linux-gnueabihf-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf -DWITH_NEON=OFF -DWITH_SANITIZER=Address
|
||||
packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
|
||||
codecov: ubuntu_gcc_armhf_no_neon
|
||||
|
||||
- name: Ubuntu GCC ARM HF Compat No Opt
|
||||
os: ubuntu-latest
|
||||
compiler: arm-linux-gnueabihf-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf -DZLIB_COMPAT=ON -DWITH_NEW_STRATEGIES=OFF -DWITH_OPTIM=OFF -DWITH_SANITIZER=Undefined
|
||||
packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
|
||||
codecov: ubuntu_gcc_armhf_compat_no_opt
|
||||
|
||||
- name: Ubuntu GCC AARCH64
|
||||
os: ubuntu-latest
|
||||
compiler: aarch64-linux-gnu-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake -DWITH_SANITIZER=Address
|
||||
asan-options: detect_leaks=0
|
||||
packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
|
||||
codecov: ubuntu_gcc_aarch64
|
||||
|
||||
- name: Ubuntu GCC AARCH64 No ACLE
|
||||
os: ubuntu-latest
|
||||
compiler: aarch64-linux-gnu-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake -DWITH_ACLE=OFF -DWITH_SANITIZER=Undefined
|
||||
asan-options: detect_leaks=0
|
||||
packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
|
||||
codecov: ubuntu_gcc_aarch64_no_acle
|
||||
|
||||
- name: Ubuntu GCC AARCH64 No NEON
|
||||
os: ubuntu-latest
|
||||
compiler: aarch64-linux-gnu-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake -DWITH_NEON=OFF -DWITH_SANITIZER=Undefined
|
||||
asan-options: detect_leaks=0
|
||||
packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
|
||||
codecov: ubuntu_gcc_aarch64_no_neon
|
||||
|
||||
- name: Ubuntu GCC AARCH64 Compat No Opt
|
||||
os: ubuntu-latest
|
||||
compiler: aarch64-linux-gnu-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake -DZLIB_COMPAT=ON -DWITH_NEW_STRATEGIES=OFF -DWITH_OPTIM=OFF -DWITH_SANITIZER=Undefined
|
||||
asan-options: detect_leaks=0
|
||||
packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
|
||||
codecov: ubuntu_gcc_aarch64_compat_no_opt
|
||||
|
||||
- name: Ubuntu GCC PPC
|
||||
os: ubuntu-latest
|
||||
compiler: powerpc-linux-gnu-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-powerpc.cmake
|
||||
packages: qemu gcc-powerpc-linux-gnu libc-dev-powerpc-cross
|
||||
ldflags: -static
|
||||
codecov: ubuntu_gcc_ppc
|
||||
|
||||
- name: Ubuntu GCC PPC64
|
||||
os: ubuntu-latest
|
||||
compiler: powerpc64-linux-gnu-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-powerpc64.cmake
|
||||
packages: qemu gcc-powerpc64-linux-gnu libc-dev-ppc64-cross
|
||||
ldflags: -static
|
||||
codecov: ubuntu_gcc_ppc64
|
||||
|
||||
- name: Ubuntu GCC PPC64LE
|
||||
os: ubuntu-latest
|
||||
compiler: powerpc64le-linux-gnu-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-powerpc64le.cmake
|
||||
packages: qemu gcc-powerpc64le-linux-gnu libc-dev-ppc64el-cross
|
||||
codecov: ubuntu_gcc_ppc64le
|
||||
|
||||
- name: Ubuntu GCC SPARC64
|
||||
os: ubuntu-latest
|
||||
compiler: sparc64-linux-gnu-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-sparc64.cmake
|
||||
packages: qemu gcc-sparc64-linux-gnu libc-dev-sparc64-cross
|
||||
ldflags: -static
|
||||
codecov: ubuntu_gcc_sparc64
|
||||
|
||||
- name: Ubuntu GCC S390X
|
||||
os: ubuntu-latest
|
||||
compiler: s390x-linux-gnu-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-s390x.cmake -DWITH_SANITIZER=Address
|
||||
packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
|
||||
ldflags: -static
|
||||
codecov: ubuntu_gcc_s390x
|
||||
|
||||
- name: Ubuntu GCC S390X DFLTCC
|
||||
os: ubuntu-latest
|
||||
compiler: s390x-linux-gnu-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-s390x.cmake -DWITH_DFLTCC_DEFLATE=ON -DWITH_DFLTCC_INFLATE=ON -DWITH_SANITIZER=Address
|
||||
packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
|
||||
ldflags: -static
|
||||
codecov: ubuntu_gcc_s390x
|
||||
|
||||
- name: Ubuntu GCC S390X DFLTCC Compat
|
||||
os: ubuntu-latest
|
||||
compiler: s390x-linux-gnu-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-s390x.cmake -DZLIB_COMPAT=ON -DWITH_DFLTCC_DEFLATE=ON -DWITH_DFLTCC_INFLATE=ON -DWITH_SANITIZER=Undefined
|
||||
packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
|
||||
ldflags: -static
|
||||
codecov: ubuntu_gcc_s390x
|
||||
|
||||
- name: Ubuntu MinGW i686
|
||||
os: ubuntu-latest
|
||||
compiler: i686-w64-mingw32-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-mingw-i686.cmake
|
||||
packages: wine32 gcc-mingw-w64
|
||||
# Codecov disabled due to gcov locking issue error
|
||||
|
||||
- name: Ubuntu MinGW x86_64
|
||||
os: ubuntu-latest
|
||||
compiler: x86_64-w64-mingw32-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-mingw-x86_64.cmake
|
||||
packages: wine-stable gcc-mingw-w64
|
||||
codecov: ubuntu_gcc_mingw_x86_64
|
||||
|
||||
- name: Ubuntu Clang
|
||||
os: ubuntu-latest
|
||||
compiler: clang
|
||||
packages: llvm-6.0
|
||||
gcov-exec: llvm-cov-6.0 gcov
|
||||
codecov: ubuntu_clang
|
||||
|
||||
- name: Ubuntu Clang Inflate Strict
|
||||
os: ubuntu-latest
|
||||
compiler: clang
|
||||
cmake-args: -DWITH_INFLATE_STRICT=ON
|
||||
packages: llvm-6.0
|
||||
gcov-exec: llvm-cov-6.0 gcov
|
||||
codecov: ubuntu_clang_inflate_strict
|
||||
|
||||
- name: Ubuntu Clang Inflate Allow Invalid Dist
|
||||
os: ubuntu-latest
|
||||
compiler: clang
|
||||
cmake-args: -DWITH_INFLATE_ALLOW_INVALID_DIST=ON
|
||||
packages: llvm-6.0
|
||||
gcov-exec: llvm-cov-6.0 gcov
|
||||
codecov: ubuntu_clang_inflate_allow_invalid_dist
|
||||
|
||||
- name: Ubuntu Clang Memory Map
|
||||
os: ubuntu-latest
|
||||
compiler: clang
|
||||
cflags: -DUSE_MMAP
|
||||
packages: llvm-6.0
|
||||
gcov-exec: llvm-cov-6.0 gcov
|
||||
codecov: ubuntu_clang_mmap
|
||||
|
||||
- name: Ubuntu Clang Debug
|
||||
os: ubuntu-latest
|
||||
compiler: clang
|
||||
packages: llvm-6.0
|
||||
gcov-exec: llvm-cov-6.0 gcov
|
||||
codecov: ubuntu_clang_debug
|
||||
build-config: Debug
|
||||
|
||||
- name: Ubuntu Clang MSAN
|
||||
os: ubuntu-latest
|
||||
compiler: clang
|
||||
cmake-args: -GNinja -DWITH_SANITIZER=Memory
|
||||
packages: ninja-build llvm-6.0
|
||||
gcov-exec: llvm-cov-6.0 gcov
|
||||
cflags: -g3 -fno-omit-frame-pointer -fno-optimize-sibling-calls -fsanitize-memory-track-origins
|
||||
codecov: ubuntu_clang_msan
|
||||
|
||||
- name: Windows MSVC Win32
|
||||
os: windows-latest
|
||||
compiler: cl
|
||||
cmake-args: -A Win32
|
||||
|
||||
- name: Windows MSVC Win64
|
||||
os: windows-latest
|
||||
compiler: cl
|
||||
cmake-args: -A x64
|
||||
|
||||
- name: Windows MSVC ARM No Test
|
||||
os: windows-latest
|
||||
compiler: cl
|
||||
cmake-args: -A ARM
|
||||
|
||||
- name: Windows MSVC ARM64 No Test
|
||||
os: windows-latest
|
||||
compiler: cl
|
||||
cmake-args: -A ARM64
|
||||
|
||||
- name: Windows GCC
|
||||
os: windows-latest
|
||||
compiler: gcc
|
||||
cmake-args: -G Ninja
|
||||
codecov: win64_gcc
|
||||
|
||||
- name: Windows GCC Compat No Opt
|
||||
os: windows-latest
|
||||
compiler: gcc
|
||||
cmake-args: -G Ninja -DZLIB_COMPAT=ON -DWITH_NEW_STRATEGIES=OFF -DWITH_OPTIM=OFF
|
||||
codecov: win64_gcc_compat_no_opt
|
||||
|
||||
- name: macOS Clang
|
||||
os: macos-latest
|
||||
compiler: clang
|
||||
cmake-args: -DWITH_SANITIZER=Address
|
||||
codecov: macos_clang
|
||||
|
||||
- name: macOS GCC
|
||||
os: macos-latest
|
||||
compiler: gcc-10
|
||||
cmake-args: -DWITH_SANITIZER=Undefined
|
||||
packages: gcc@10
|
||||
gcov-exec: gcov-10
|
||||
codecov: macos_gcc
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Checkout test corpora
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
repository: nmoinvaz/corpora
|
||||
path: test/data/corpora
|
||||
|
||||
- name: Install packages (Ubuntu)
|
||||
if: runner.os == 'Linux' && matrix.packages
|
||||
run: |
|
||||
sudo dpkg --add-architecture i386 # Required for wine32
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ${{ matrix.packages }}
|
||||
|
||||
- name: Install packages (Windows)
|
||||
if: runner.os == 'Windows'
|
||||
run: |
|
||||
choco install ninja ${{ matrix.packages }} --no-progress
|
||||
|
||||
- name: Install packages (macOS)
|
||||
if: runner.os == 'macOS'
|
||||
run: |
|
||||
brew install ninja ${{ matrix.packages }}
|
||||
env:
|
||||
HOMEBREW_NO_INSTALL_CLEANUP: 1
|
||||
|
||||
- name: Install codecov.io tools
|
||||
if: matrix.codecov
|
||||
run: |
|
||||
python -u -m pip install codecov
|
||||
|
||||
- name: Generate project files
|
||||
# Shared libaries turned off for qemu ppc* and sparc & reduce code coverage sources
|
||||
run: |
|
||||
mkdir ${{ matrix.build-dir || '.not-used' }}
|
||||
cd ${{ matrix.build-dir || '.' }}
|
||||
cmake ${{ matrix.build-src-dir || '.' }} ${{ matrix.cmake-args }} -DCMAKE_BUILD_TYPE=${{ matrix.build-config || 'Release' }} -DBUILD_SHARED_LIBS=OFF -DWITH_FUZZERS=ON -DWITH_CODE_COVERAGE=ON -DWITH_MAINTAINER_WARNINGS=ON
|
||||
env:
|
||||
CC: ${{ matrix.compiler }}
|
||||
CFLAGS: ${{ matrix.cflags }}
|
||||
LDFLAGS: ${{ matrix.ldflags }}
|
||||
CI: true
|
||||
|
||||
- name: Compile source code
|
||||
run: |
|
||||
cd ${{ matrix.build-dir || '.' }}
|
||||
cmake --build . --config ${{ matrix.build-config || 'Release' }}
|
||||
|
||||
- name: Run test cases
|
||||
# Don't run tests on Windows ARM
|
||||
if: runner.os != 'Windows' || contains(matrix.name, 'ARM') == false
|
||||
run: |
|
||||
cd ${{ matrix.build-dir || '.' }}
|
||||
ctest --verbose -C Release --output-on-failure --max-width 120 -j 6
|
||||
env:
|
||||
ASAN_OPTIONS: ${{ matrix.asan-options || 'verbosity=0' }}:abort_on_error=1
|
||||
MSAN_OPTIONS: ${{ matrix.msan-options || 'verbosity=0' }}:abort_on_error=1
|
||||
TSAN_OPTIONS: ${{ matrix.tsan-options || 'verbosity=0' }}:abort_on_error=1
|
||||
LSAN_OPTIONS: ${{ matrix.lsan-options || 'verbosity=0' }}:abort_on_error=1
|
||||
|
||||
- name: Upload coverage report
|
||||
if: matrix.codecov && ( env.CODECOV_TOKEN_SECRET != '' || github.repository == 'zlib-ng/zlib-ng' )
|
||||
shell: bash
|
||||
run: |
|
||||
bash tools/codecov-upload.sh
|
||||
env:
|
||||
# Codecov does not yet support GitHub Actions
|
||||
CODECOV_TOKEN_SECRET: "${{secrets.CODECOV_TOKEN}}"
|
||||
CODECOV_TOKEN: "${{ secrets.CODECOV_TOKEN || 'e4fdf847-f541-4ab1-9d50-3d27e5913906' }}"
|
||||
CODECOV_FLAGS: "${{ matrix.codecov }}"
|
||||
CODECOV_NAME: "${{ matrix.name }}"
|
||||
CODECOV_EXEC: "${{ matrix.gcov-exec || 'gcov' }}"
|
||||
CODECOV_DIR: "${{ matrix.build-dir || '.' }}"
|
||||
185
libs/zlibng/.github/workflows/configure.yml
vendored
Normal file
185
libs/zlibng/.github/workflows/configure.yml
vendored
Normal file
@ -0,0 +1,185 @@
|
||||
name: CI Configure
|
||||
on: [push, pull_request]
|
||||
jobs:
|
||||
ci-configure:
|
||||
name: ${{ matrix.name }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- name: Ubuntu GCC
|
||||
os: ubuntu-latest
|
||||
compiler: gcc
|
||||
configure-args: --warn
|
||||
|
||||
- name: Ubuntu GCC OSB
|
||||
os: ubuntu-latest
|
||||
compiler: gcc
|
||||
configure-args: --warn
|
||||
build-dir: ../build
|
||||
build-src-dir: ../zlib-ng
|
||||
|
||||
- name: Ubuntu GCC Compat No Opt
|
||||
os: ubuntu-latest
|
||||
compiler: gcc
|
||||
configure-args: --warn --zlib-compat --without-optimizations --without-new-strategies
|
||||
|
||||
- name: Ubuntu GCC ARM SF
|
||||
os: ubuntu-latest
|
||||
compiler: arm-linux-gnueabi-gcc
|
||||
configure-args: --warn
|
||||
chost: arm-linux-gnueabi
|
||||
packages: qemu gcc-arm-linux-gnueabi libc-dev-armel-cross
|
||||
|
||||
- name: Ubuntu GCC ARM SF Compat No Opt
|
||||
os: ubuntu-latest
|
||||
compiler: arm-linux-gnueabi-gcc
|
||||
configure-args: --warn --zlib-compat --without-optimizations --without-new-strategies
|
||||
chost: arm-linux-gnueabi
|
||||
packages: qemu gcc-arm-linux-gnueabi libc-dev-armel-cross
|
||||
|
||||
- name: Ubuntu GCC ARM HF
|
||||
os: ubuntu-latest
|
||||
compiler: arm-linux-gnueabihf-gcc
|
||||
configure-args: --warn
|
||||
chost: arm-linux-gnueabihf
|
||||
packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
|
||||
|
||||
- name: Ubuntu GCC ARM HF No ACLE
|
||||
os: ubuntu-latest
|
||||
compiler: arm-linux-gnueabihf-gcc
|
||||
configure-args: --warn --without-acle
|
||||
chost: arm-linux-gnueabihf
|
||||
packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
|
||||
|
||||
- name: Ubuntu GCC ARM HF No NEON
|
||||
os: ubuntu-latest
|
||||
compiler: arm-linux-gnueabihf-gcc
|
||||
configure-args: --warn --without-neon
|
||||
chost: arm-linux-gnueabihf
|
||||
packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
|
||||
|
||||
- name: Ubuntu GCC ARM HF Compat No Opt
|
||||
os: ubuntu-latest
|
||||
compiler: arm-linux-gnueabihf-gcc
|
||||
configure-args: --warn --zlib-compat --without-optimizations --without-new-strategies
|
||||
chost: arm-linux-gnueabihf
|
||||
packages: qemu gcc-arm-linux-gnueabihf libc-dev-armel-cross
|
||||
|
||||
- name: Ubuntu GCC AARCH64
|
||||
os: ubuntu-latest
|
||||
compiler: aarch64-linux-gnu-gcc
|
||||
configure-args: --warn
|
||||
chost: aarch64-linux-gnu
|
||||
packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
|
||||
|
||||
- name: Ubuntu GCC AARCH64 No ACLE
|
||||
os: ubuntu-latest
|
||||
compiler: aarch64-linux-gnu-gcc
|
||||
configure-args: --warn --without-acle
|
||||
chost: aarch64-linux-gnu
|
||||
packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
|
||||
|
||||
- name: Ubuntu GCC AARCH64 No NEON
|
||||
os: ubuntu-latest
|
||||
compiler: aarch64-linux-gnu-gcc
|
||||
configure-args: --warn --without-neon
|
||||
chost: aarch64-linux-gnu
|
||||
packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
|
||||
|
||||
- name: Ubuntu GCC AARCH64 Compat No Opt
|
||||
os: ubuntu-latest
|
||||
compiler: aarch64-linux-gnu-gcc
|
||||
configure-args: --warn --zlib-compat --without-optimizations --without-new-strategies
|
||||
chost: aarch64-linux-gnu
|
||||
packages: qemu gcc-aarch64-linux-gnu libc-dev-arm64-cross
|
||||
|
||||
- name: Ubuntu GCC PPC
|
||||
os: ubuntu-latest
|
||||
compiler: powerpc-linux-gnu-gcc
|
||||
configure-args: --warn --static
|
||||
chost: powerpc-linux-gnu
|
||||
packages: qemu gcc-powerpc-linux-gnu libc-dev-powerpc-cross
|
||||
cflags: -static
|
||||
ldflags: -static
|
||||
|
||||
- name: Ubuntu GCC PPC64
|
||||
os: ubuntu-latest
|
||||
compiler: powerpc64-linux-gnu-gcc
|
||||
configure-args: --warn --static
|
||||
chost: powerpc-linux-gnu
|
||||
packages: qemu gcc-powerpc64-linux-gnu libc-dev-ppc64-cross
|
||||
cflags: -static
|
||||
ldflags: -static
|
||||
|
||||
- name: Ubuntu GCC PPC64LE
|
||||
os: ubuntu-latest
|
||||
compiler: powerpc64le-linux-gnu-gcc
|
||||
configure-args: --warn
|
||||
chost: powerpc64le-linux-gnu
|
||||
packages: qemu gcc-powerpc64le-linux-gnu libc-dev-ppc64el-cross
|
||||
|
||||
- name: Ubuntu GCC S390X
|
||||
os: ubuntu-latest
|
||||
compiler: s390x-linux-gnu-gcc
|
||||
configure-args: --warn --static
|
||||
chost: s390x-linux-gnu
|
||||
packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
|
||||
cflags: -static
|
||||
ldflags: -static
|
||||
|
||||
- name: Ubuntu GCC S390X DFLTCC
|
||||
os: ubuntu-latest
|
||||
compiler: s390x-linux-gnu-gcc
|
||||
configure-args: --warn --static --with-dfltcc-deflate --with-dfltcc-inflate
|
||||
chost: s390x-linux-gnu
|
||||
packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
|
||||
cflags: -static
|
||||
ldflags: -static
|
||||
|
||||
- name: Ubuntu GCC S390X DFLTCC Compat
|
||||
os: ubuntu-latest
|
||||
compiler: s390x-linux-gnu-gcc
|
||||
configure-args: --warn --zlib-compat --static --with-dfltcc-deflate --with-dfltcc-inflate
|
||||
chost: s390x-linux-gnu
|
||||
packages: qemu gcc-s390x-linux-gnu libc-dev-s390x-cross
|
||||
cflags: -static
|
||||
ldflags: -static
|
||||
|
||||
- name: macOS GCC
|
||||
os: macOS-latest
|
||||
compiler: gcc
|
||||
configure-args: --warn
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v1
|
||||
|
||||
- name: Install packages (Ubuntu)
|
||||
if: runner.os == 'Linux' && matrix.packages
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ${{ matrix.packages }}
|
||||
|
||||
- name: Generate project files
|
||||
run: |
|
||||
mkdir ${{ matrix.build-dir || '.not-used' }}
|
||||
cd ${{ matrix.build-dir || '.' }}
|
||||
${{ matrix.build-src-dir || '.' }}/configure ${{ matrix.configure-args }}
|
||||
env:
|
||||
CC: ${{ matrix.compiler }}
|
||||
CFLAGS: ${{ matrix.cflags }}
|
||||
LDFLAGS: ${{ matrix.ldflags }}
|
||||
CHOST: ${{ matrix.chost }}
|
||||
CI: true
|
||||
|
||||
- name: Compile source code
|
||||
run: |
|
||||
cd ${{ matrix.build-dir || '.' }}
|
||||
make -j2
|
||||
|
||||
- name: Run test cases
|
||||
run: |
|
||||
cd ${{ matrix.build-dir || '.' }}
|
||||
make test
|
||||
23
libs/zlibng/.github/workflows/fuzz.yml
vendored
Normal file
23
libs/zlibng/.github/workflows/fuzz.yml
vendored
Normal file
@ -0,0 +1,23 @@
|
||||
name: CI Fuzz
|
||||
on: [pull_request]
|
||||
jobs:
|
||||
Fuzzing:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Build Fuzzers
|
||||
uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master
|
||||
with:
|
||||
oss-fuzz-project-name: 'zlib-ng'
|
||||
dry-run: false
|
||||
- name: Run Fuzzers
|
||||
uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master
|
||||
with:
|
||||
oss-fuzz-project-name: 'zlib-ng'
|
||||
fuzz-seconds: 600
|
||||
dry-run: false
|
||||
- name: Upload Crash
|
||||
uses: actions/upload-artifact@v1
|
||||
if: failure()
|
||||
with:
|
||||
name: artifacts
|
||||
path: ./out/artifacts
|
||||
46
libs/zlibng/.github/workflows/libpng.yml
vendored
Normal file
46
libs/zlibng/.github/workflows/libpng.yml
vendored
Normal file
@ -0,0 +1,46 @@
|
||||
name: CI Libpng
|
||||
on: [pull_request]
|
||||
jobs:
|
||||
pngtest:
|
||||
name: Ubuntu Clang
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository (zlib-ng)
|
||||
uses: actions/checkout@v1
|
||||
|
||||
- name: Generate project files (zlib-ng)
|
||||
run: |
|
||||
cmake . -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DZLIB_COMPAT=ON -DZLIB_ENABLE_TESTS=OFF
|
||||
env:
|
||||
CC: clang
|
||||
CFLAGS: -fPIC
|
||||
CI: true
|
||||
|
||||
- name: Compile source code (zlib-ng)
|
||||
run: |
|
||||
cmake --build . --config Release
|
||||
|
||||
- name: Checkout repository (libpng)
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
repository: glennrp/libpng
|
||||
path: libpng
|
||||
|
||||
- name: Generate project files (libpng)
|
||||
run: |
|
||||
cd libpng
|
||||
cmake . -DCMAKE_BUILD_TYPE=Release -DPNG_TESTS=ON -DPNG_STATIC=OFF -DZLIB_INCLUDE_DIR=.. -DZLIB_LIBRARY=$PWD/../libz.a
|
||||
env:
|
||||
CC: clang
|
||||
CI: true
|
||||
|
||||
- name: Compile source code (libpng)
|
||||
run: |
|
||||
cd libpng
|
||||
cmake --build . --config Release
|
||||
|
||||
- name: Run test cases (libpng)
|
||||
run: |
|
||||
cd libpng
|
||||
ctest -C Release --output-on-failure --max-width 120
|
||||
48
libs/zlibng/.github/workflows/nmake.yml
vendored
Normal file
48
libs/zlibng/.github/workflows/nmake.yml
vendored
Normal file
@ -0,0 +1,48 @@
|
||||
name: CI NMake
|
||||
on: [push, pull_request]
|
||||
jobs:
|
||||
ci-cmake:
|
||||
name: ${{ matrix.name }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- name: Windows NMake x86
|
||||
os: windows-latest
|
||||
makefile: win32/Makefile.msc
|
||||
vc-vars: x86
|
||||
|
||||
- name: Windows NMake x64
|
||||
os: windows-latest
|
||||
makefile: win32/Makefile.msc
|
||||
vc-vars: x86_amd64
|
||||
|
||||
- name: Windows NMake ARM No Test
|
||||
os: windows-latest
|
||||
makefile: win32/Makefile.arm
|
||||
vc-vars: x86_arm
|
||||
|
||||
- name: Windows NMake ARM64 No Test
|
||||
os: windows-latest
|
||||
makefile: win32/Makefile.a64
|
||||
vc-vars: x86_arm64
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v1
|
||||
|
||||
- name: Compile source code
|
||||
shell: cmd
|
||||
run: |
|
||||
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.vc-vars }}
|
||||
nmake -f ${{ matrix.makefile }}
|
||||
|
||||
- name: Run test cases
|
||||
shell: cmd
|
||||
# Don't run tests on Windows ARM
|
||||
if: contains(matrix.vc-vars, 'arm') == false
|
||||
run: |
|
||||
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.vc-vars }}
|
||||
nmake -f ${{ matrix.makefile }} test
|
||||
nmake -f ${{ matrix.makefile }} testdll
|
||||
121
libs/zlibng/.github/workflows/pkgcheck.yml
vendored
Normal file
121
libs/zlibng/.github/workflows/pkgcheck.yml
vendored
Normal file
@ -0,0 +1,121 @@
|
||||
name: CI Pkgcheck
|
||||
on: [push, pull_request]
|
||||
jobs:
|
||||
ci-pkgcheck:
|
||||
name: ${{ matrix.name }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- name: Ubuntu GCC
|
||||
os: ubuntu-latest
|
||||
compiler: gcc
|
||||
|
||||
- name: Ubuntu GCC -m32
|
||||
os: ubuntu-latest
|
||||
compiler: gcc
|
||||
packages: gcc-multilib
|
||||
cmake-args: -DCMAKE_C_FLAGS=-m32
|
||||
cflags: -m32
|
||||
ldflags: -m32
|
||||
|
||||
- name: Ubuntu GCC ARM HF
|
||||
os: ubuntu-latest
|
||||
chost: arm-linux-gnueabihf
|
||||
compiler: arm-linux-gnueabihf-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf
|
||||
packages: qemu gcc-arm-linux-gnueabihf libc6-dev-armhf-cross
|
||||
|
||||
- name: Ubuntu GCC AARCH64
|
||||
os: ubuntu-latest
|
||||
chost: aarch64-linux-gnu
|
||||
compiler: aarch64-linux-gnu-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake
|
||||
packages: qemu gcc-aarch64-linux-gnu libc6-dev-arm64-cross
|
||||
|
||||
- name: Ubuntu GCC PPC
|
||||
os: ubuntu-latest
|
||||
chost: powerpc-linux-gnu
|
||||
compiler: powerpc-linux-gnu-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-powerpc.cmake
|
||||
packages: qemu gcc-powerpc-linux-gnu libc6-dev-powerpc-cross
|
||||
|
||||
- name: Ubuntu GCC PPC64LE
|
||||
os: ubuntu-latest
|
||||
chost: powerpc64le-linux-gnu
|
||||
compiler: powerpc64le-linux-gnu-gcc
|
||||
cmake-args: -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-powerpc64le.cmake
|
||||
packages: qemu gcc-powerpc64le-linux-gnu libc6-dev-ppc64el-cross
|
||||
|
||||
- name: macOS Clang
|
||||
os: macOS-latest
|
||||
compiler: clang
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v1
|
||||
|
||||
- name: Install packages (Ubuntu)
|
||||
if: runner.os == 'Linux'
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends abigail-tools ninja-build diffoscope ${{ matrix.packages }}
|
||||
|
||||
- name: Install packages (macOS)
|
||||
if: runner.os == 'macOS'
|
||||
run: |
|
||||
brew install ninja diffoscope ${{ matrix.packages }}
|
||||
env:
|
||||
HOMEBREW_NO_INSTALL_CLEANUP: 1
|
||||
|
||||
- name: Select Xcode version (macOS)
|
||||
# Use a version of Xcode that supports ZERO_AR_DATE until CMake supports
|
||||
# AppleClang linking with libtool using -D argument
|
||||
# https://gitlab.kitware.com/cmake/cmake/-/issues/19852
|
||||
if: runner.os == 'macOS'
|
||||
uses: maxim-lobanov/setup-xcode@v1
|
||||
with:
|
||||
xcode-version: '12.1.1'
|
||||
|
||||
- name: Compare builds
|
||||
run: |
|
||||
sh test/pkgcheck.sh
|
||||
env:
|
||||
CC: ${{ matrix.compiler }}
|
||||
CFLAGS: ${{ matrix.cflags }}
|
||||
CHOST: ${{ matrix.chost }}
|
||||
CMAKE_ARGS: ${{ matrix.cmake-args }}
|
||||
LDFLAGS: ${{ matrix.ldflags }}
|
||||
|
||||
- name: Compare builds (compat)
|
||||
run: |
|
||||
sh test/pkgcheck.sh --zlib-compat
|
||||
env:
|
||||
CC: ${{ matrix.compiler }}
|
||||
CFLAGS: ${{ matrix.cflags }}
|
||||
CHOST: ${{ matrix.chost }}
|
||||
CMAKE_ARGS: ${{ matrix.cmake-args }}
|
||||
LDFLAGS: ${{ matrix.ldflags }}
|
||||
|
||||
- name: Check ABI
|
||||
# macOS runner does not contain abigail
|
||||
if: runner.os != 'macOS'
|
||||
run: |
|
||||
sh test/abicheck.sh --refresh_if
|
||||
env:
|
||||
CC: ${{ matrix.compiler }}
|
||||
CFLAGS: ${{ matrix.cflags }}
|
||||
CHOST: ${{ matrix.chost }}
|
||||
LDFLAGS: ${{ matrix.ldflags }}
|
||||
|
||||
- name: Check ABI (compat)
|
||||
# macOS runner does not contain abigail
|
||||
if: runner.os != 'macOS'
|
||||
run: |
|
||||
sh test/abicheck.sh --zlib-compat --refresh_if
|
||||
env:
|
||||
CC: ${{ matrix.compiler }}
|
||||
CFLAGS: ${{ matrix.cflags }}
|
||||
CHOST: ${{ matrix.chost }}
|
||||
LDFLAGS: ${{ matrix.ldflags }}
|
||||
73
libs/zlibng/.github/workflows/release.yml
vendored
Normal file
73
libs/zlibng/.github/workflows/release.yml
vendored
Normal file
@ -0,0 +1,73 @@
|
||||
name: CI Release
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- '*'
|
||||
jobs:
|
||||
ci-cmake:
|
||||
name: ${{ matrix.name }}
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- name: Windows MSVC Win32
|
||||
os: windows-latest
|
||||
compiler: cl
|
||||
cmake-args: -A Win32
|
||||
deploy-name: win32
|
||||
|
||||
- name: Windows MSVC Win32 Compat
|
||||
os: windows-latest
|
||||
compiler: cl
|
||||
cmake-args: -A Win32 -DZLIB_COMPAT=ON
|
||||
deploy-name: win32-compat
|
||||
|
||||
- name: Windows MSVC Win64
|
||||
os: windows-latest
|
||||
compiler: cl
|
||||
cmake-args: -A x64
|
||||
deploy-name: win64
|
||||
|
||||
- name: Windows MSVC Win64 Compat
|
||||
os: windows-latest
|
||||
compiler: cl
|
||||
cmake-args: -A x64 -DZLIB_COMPAT=ON
|
||||
deploy-name: win64-compat
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v1
|
||||
|
||||
- name: Set environment variables
|
||||
shell: bash
|
||||
run: echo "tag=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_ENV
|
||||
|
||||
- name: Generate project files
|
||||
run: |
|
||||
cmake . ${{ matrix.cmake-args }} -DCMAKE_BUILD_TYPE=Release -DZLIB_ENABLE_TESTS=ON -DCMAKE_INSTALL_PREFIX=out -DINSTALL_UTILS=ON
|
||||
env:
|
||||
CC: ${{ matrix.compiler }}
|
||||
CI: true
|
||||
|
||||
- name: Compile source code
|
||||
run: |
|
||||
cmake --build . --config Release --target install
|
||||
|
||||
- name: Package release (Windows)
|
||||
if: runner.os == 'Windows'
|
||||
run: |
|
||||
cd out
|
||||
7z a -tzip ../zlib-ng-${{ matrix.deploy-name }}.zip bin include lib ../LICENSE.md ../README.md
|
||||
|
||||
- name: Upload release (Windows)
|
||||
uses: svenstaro/upload-release-action@v1-release
|
||||
if: runner.os == 'Windows'
|
||||
with:
|
||||
asset_name: zlib-ng-${{ matrix.deploy-name }}.zip
|
||||
file: zlib-ng-${{ matrix.deploy-name }}.zip
|
||||
tag: ${{env.tag}}
|
||||
repo_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
overwrite: true
|
||||
env:
|
||||
GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
|
||||
12
libs/zlibng/.gitignore
vendored
12
libs/zlibng/.gitignore
vendored
@ -13,14 +13,18 @@
|
||||
*.gcno
|
||||
*.gcov
|
||||
|
||||
/adler32_test
|
||||
/adler32_testsh
|
||||
/example
|
||||
/example64
|
||||
/examplesh
|
||||
/libz.so*
|
||||
/libz-ng.so*
|
||||
/makefixed
|
||||
/minigzip
|
||||
/minigzip64
|
||||
/minigzipsh
|
||||
/switchlevels
|
||||
/zlib.pc
|
||||
/zlib-ng.pc
|
||||
/CVE-2003-0107
|
||||
@ -46,8 +50,9 @@ foo.gz
|
||||
CMakeCache.txt
|
||||
CMakeFiles
|
||||
Testing
|
||||
*.cmake
|
||||
/*.cmake
|
||||
*.stackdump
|
||||
*._h
|
||||
zconf.h
|
||||
zconf.h.cmakein
|
||||
zconf.h.included
|
||||
@ -61,6 +66,7 @@ a.out
|
||||
/Makefile
|
||||
/arch/arm/Makefile
|
||||
/arch/generic/Makefile
|
||||
/arch/power/Makefile
|
||||
/arch/x86/Makefile
|
||||
.kdev4
|
||||
*.kdev4
|
||||
@ -71,6 +77,10 @@ a.out
|
||||
/zlib.dir
|
||||
/zlibstatic.dir
|
||||
/win32/Debug
|
||||
/build/
|
||||
/build[.-]*/
|
||||
/btmp[12]/
|
||||
/pkgtmp[12]/
|
||||
|
||||
/.idea
|
||||
/cmake-build-debug
|
||||
|
||||
1
libs/zlibng/.shellcheckrc
Normal file
1
libs/zlibng/.shellcheckrc
Normal file
@ -0,0 +1 @@
|
||||
disable=SC2140,SC2086,SC2046,SC2015,SC1097,SC1035,SC1036,SC1007,SC2154,SC2155,SC2000,SC2034,SC2016,SC1091,SC1090,SC2212,SC2143,SC2129,SC2102,SC2069,SC1041,SC1042,SC1044,SC1046,SC1119,SC1110,SC1111,SC1112,SC1102,SC1105,SC1101,SC1004,SC1003,SC1012,SC2068,SC2065,SC2064,SC2063,SC2059,SC2053,SC2048,SC2044,SC2032,SC2031,SC2030,SC2029,SC2025,SC2024,SC2022,SC2018,SC2019,SC2017,SC2014,SC2013,SC2012,SC2009,SC2001,SC2098,SC2096,SC2094,SC2091,SC2092,SC2088,SC2087,SC2076,SC2072,SC2071,SC2223,SC2221,SC2222,SC2217,SC2207,SC2206,SC2205,SC2190,SC2188,SC2187,SC2185,SC2179,SC2178,SC2174,SC2168,SC2167,SC2163,SC2161,SC2160,SC2153,SC2150,SC2148,SC2147,SC2146,SC2142,SC2139,SC2126,SC2123,SC2120,SC2119,SC2117,SC2114,SC1117,SC2164,SC1083,SC2004,SC2125,SC2128,SC2011,SC1008,SC1019,SC2093,SC1132,SC1129,SC2236,SC2237,SC2231,SC2230,SC2229,SC2106,SC2102,SC2243,SC2244,SC2245,SC2247,SC2248,SC2249,SC2250,SC2251,SC2252,SC2181
|
||||
@ -1,283 +0,0 @@
|
||||
language: c
|
||||
cache: ccache
|
||||
dist: xenial
|
||||
|
||||
env:
|
||||
global:
|
||||
- BUILDDIR=.
|
||||
- MAKER="make -j2"
|
||||
- TESTER="make test"
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- os: windows
|
||||
compiler: clang
|
||||
env:
|
||||
- GENERATOR="cmake . "
|
||||
- MAKER="cmake --build . --config Release"
|
||||
- TESTER="ctest --verbose -C Release"
|
||||
- os: windows
|
||||
compiler: clang
|
||||
env:
|
||||
- GENERATOR="cmake ..\\zlib-ng -DZLIB_COMPAT=ON"
|
||||
- MAKER="cmake --build . --config Release"
|
||||
- TESTER="ctest --verbose -C Release"
|
||||
- BUILDDIR=..\\build
|
||||
- os: windows
|
||||
compiler: gcc
|
||||
env:
|
||||
- GENERATOR="cmake ."
|
||||
- MAKER="cmake --build . --config Release"
|
||||
- TESTER="ctest --verbose -C Release"
|
||||
|
||||
- os: linux
|
||||
compiler: gcc
|
||||
env: GENERATOR="./configure --warn"
|
||||
- os: linux
|
||||
compiler: gcc
|
||||
env: GENERATOR="cmake . -DZLIB_COMPAT=OFF -DWITH_GZFILEOP=ON -DWITH_NEW_STRATEGIES=YES -DWITH_OPTIM=ON"
|
||||
- os: linux
|
||||
compiler: gcc
|
||||
env:
|
||||
- GENERATOR="../zlib-ng/configure --warn --zlib-compat"
|
||||
- BUILDDIR=../build
|
||||
- os: linux
|
||||
compiler: gcc
|
||||
env: GENERATOR="./configure --warn --zlib-compat --without-optimizations --without-new-strategies"
|
||||
- os: linux
|
||||
compiler: gcc
|
||||
env: GENERATOR="cmake ."
|
||||
- os: linux
|
||||
compiler: gcc
|
||||
env:
|
||||
- GENERATOR="cmake ../zlib-ng"
|
||||
- BUILDDIR=../build
|
||||
|
||||
- os: linux
|
||||
compiler: clang
|
||||
env: GENERATOR="./configure --warn --zlib-compat"
|
||||
- os: linux
|
||||
compiler: clang
|
||||
env:
|
||||
- GENERATOR="cmake ../zlib-ng"
|
||||
- BUILDDIR=../build
|
||||
- os: linux
|
||||
compiler: clang
|
||||
env:
|
||||
- GENERATOR="scan-build -v --status-bugs cmake ../zlib-ng"
|
||||
- MAKER="scan-build -v --status-bugs make"
|
||||
- BUILDDIR=../build
|
||||
|
||||
- os: osx
|
||||
compiler: gcc
|
||||
env: GENERATOR="./configure --warn --zlib-compat"
|
||||
- os: osx
|
||||
compiler: gcc
|
||||
env:
|
||||
- GENERATOR="../zlib-ng/configure --warn --zlib-compat"
|
||||
- BUILDDIR=../build
|
||||
- os: osx
|
||||
compiler: gcc
|
||||
env: GENERATOR="cmake ."
|
||||
|
||||
- os: osx
|
||||
compiler: clang
|
||||
env: GENERATOR="./configure --warn --zlib-compat"
|
||||
- os: osx
|
||||
compiler: clang
|
||||
env:
|
||||
- GENERATOR="cmake ../zlib-ng"
|
||||
- BUILDDIR=../build
|
||||
|
||||
# compiling for linux-ppc64le variants
|
||||
- os: linux-ppc64le
|
||||
compiler: gcc
|
||||
env: GENERATOR="cmake ."
|
||||
- os: linux-ppc64le
|
||||
compiler: gcc
|
||||
env:
|
||||
- GENERATOR="cmake ../zlib-ng"
|
||||
- BUILDDIR=../build
|
||||
|
||||
- os: linux-ppc64le
|
||||
compiler: clang
|
||||
env: GENERATOR="./configure --warn --zlib-compat"
|
||||
- os: linux-ppc64le
|
||||
compiler: clang
|
||||
env:
|
||||
- GENERATOR="cmake ../zlib-ng"
|
||||
- BUILDDIR=../build
|
||||
|
||||
# Cross compiling for arm variants
|
||||
- os: linux
|
||||
compiler: aarch64-linux-gnu-gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- qemu
|
||||
- gcc-aarch64-linux-gnu
|
||||
- libc-dev-arm64-cross
|
||||
# For all aarch64 implementations NEON is mandatory, while crypto/crc are not.
|
||||
env:
|
||||
- GENERATOR="./configure --warn --zlib-compat"
|
||||
- CHOST=aarch64-linux-gnu
|
||||
- os: linux
|
||||
compiler: aarch64-linux-gnu-gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- qemu
|
||||
- gcc-aarch64-linux-gnu
|
||||
- libc-dev-arm64-cross
|
||||
# For all aarch64 implementations NEON is mandatory, while crypto/crc are not.
|
||||
env:
|
||||
- GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake . -DZLIB_COMPAT=ON"
|
||||
- MAKER="cmake --build . --config Release"
|
||||
- TESTER="ctest --verbose -C Release"
|
||||
- os: linux
|
||||
compiler: aarch64-linux-gnu-gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- qemu
|
||||
- gcc-aarch64-linux-gnu
|
||||
- libc-dev-arm64-cross
|
||||
env:
|
||||
- GENERATOR="./configure --warn --zlib-compat"
|
||||
- CHOST=aarch64-linux-gnu
|
||||
- os: linux
|
||||
compiler: aarch64-linux-gnu-gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- qemu
|
||||
- gcc-aarch64-linux-gnu
|
||||
- libc-dev-arm64-cross
|
||||
env:
|
||||
- GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-aarch64.cmake ."
|
||||
- MAKER="cmake --build . --config Release"
|
||||
- TESTER="ctest --verbose -C Release"
|
||||
# Hard-float subsets
|
||||
- os: linux
|
||||
compiler: arm-linux-gnueabihf-gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- qemu
|
||||
- gcc-arm-linux-gnueabihf
|
||||
- libc-dev-armhf-cross
|
||||
env:
|
||||
- GENERATOR="./configure --warn"
|
||||
- CHOST=arm-linux-gnueabihf
|
||||
- os: linux
|
||||
compiler: arm-linux-gnueabihf-gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- qemu
|
||||
- gcc-arm-linux-gnueabihf
|
||||
- libc-dev-armhf-cross
|
||||
env:
|
||||
- GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake . -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf"
|
||||
- MAKER="cmake --build . --config Release"
|
||||
- TESTER="ctest --verbose -C Release"
|
||||
- os: linux
|
||||
compiler: arm-linux-gnueabihf-gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- qemu
|
||||
- gcc-arm-linux-gnueabihf
|
||||
- libc-dev-armhf-cross
|
||||
env:
|
||||
- GENERATOR="./configure --warn --zlib-compat --without-neon"
|
||||
- CHOST=arm-linux-gnueabihf
|
||||
- os: linux
|
||||
compiler: arm-linux-gnueabihf-gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- qemu
|
||||
- gcc-arm-linux-gnueabihf
|
||||
- libc-dev-armhf-cross
|
||||
env:
|
||||
- GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake . -DZLIB_COMPAT=ON -DWITH_NEON=OFF -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf"
|
||||
- MAKER="cmake --build . --config Release"
|
||||
- TESTER="ctest --verbose -C Release"
|
||||
- os: linux
|
||||
compiler: arm-linux-gnueabihf-gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- qemu
|
||||
- gcc-arm-linux-gnueabihf
|
||||
- libc-dev-armhf-cross
|
||||
env:
|
||||
- GENERATOR="./configure --warn --zlib-compat"
|
||||
- CHOST=arm-linux-gnueabihf
|
||||
- os: linux
|
||||
compiler: arm-linux-gnueabihf-gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- qemu
|
||||
- gcc-arm-linux-gnueabihf
|
||||
- libc-dev-armhf-cross
|
||||
env:
|
||||
- GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake . -DZLIB_COMPAT=ON -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabihf"
|
||||
- MAKER="cmake --build . --config Release"
|
||||
- TESTER="ctest --verbose -C Release"
|
||||
# Soft-float subset
|
||||
- os: linux
|
||||
compiler: arm-linux-gnueabi-gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- qemu
|
||||
- gcc-arm-linux-gnueabi
|
||||
- libc-dev-armel-cross
|
||||
env:
|
||||
- GENERATOR="./configure"
|
||||
- CHOST=arm-linux-gnueabi
|
||||
- os: linux
|
||||
compiler: arm-linux-gnueabi-gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- qemu
|
||||
- gcc-arm-linux-gnueabi
|
||||
- libc-dev-armel-cross
|
||||
env:
|
||||
- GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake . -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi"
|
||||
- MAKER="cmake --build . --config Release"
|
||||
- TESTER="ctest --verbose -C Release"
|
||||
- os: linux
|
||||
compiler: arm-linux-gnueabi-gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- qemu
|
||||
- gcc-arm-linux-gnueabi
|
||||
- libc-dev-armel-cross
|
||||
env:
|
||||
- GENERATOR="./configure --zlib-compat"
|
||||
- CHOST=arm-linux-gnueabi
|
||||
- os: linux
|
||||
compiler: arm-linux-gnueabi-gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- qemu
|
||||
- gcc-arm-linux-gnueabi
|
||||
- libc-dev-armel-cross
|
||||
env:
|
||||
- GENERATOR="cmake -DCMAKE_TOOLCHAIN_FILE=cmake/toolchain-arm.cmake . -DZLIB_COMPAT=ON -DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi"
|
||||
- MAKER="cmake --build . --config Release"
|
||||
- TESTER="ctest --verbose -C Release"
|
||||
|
||||
script:
|
||||
- mkdir -p $BUILDDIR
|
||||
- cd $BUILDDIR
|
||||
- $GENERATOR
|
||||
- $MAKER
|
||||
- $TESTER
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,55 +0,0 @@
|
||||
CMakeLists.txt cmake build file
|
||||
ChangeLog.zlib history of changes up to the fork from zlib 1.2.11
|
||||
FAQ.zlib Frequently Asked Questions about zlib, as distributed in zlib 1.2.11
|
||||
INDEX this file
|
||||
Makefile dummy Makefile that tells you to ./configure
|
||||
Makefile.in template for Unix Makefile
|
||||
README guess what
|
||||
README.zlib Copy of the original README file distributed in zlib 1.2.11
|
||||
configure configure script for Unix
|
||||
test/example.c zlib usages examples for build testing
|
||||
test/minigzip.c minimal gzip-like functionality for build testing
|
||||
test/infcover.c inf*.c code coverage for build coverage testing
|
||||
treebuild.xml XML description of source file dependencies
|
||||
zconf.h.cmakein zconf.h template for cmake
|
||||
zconf.h.in zconf.h template for configure
|
||||
zlib.3 Man page for zlib
|
||||
zlib.3.pdf Man page in PDF format
|
||||
zlib.map Linux symbol information
|
||||
zlib.pc.in Template for pkg-config descriptor
|
||||
zlib.pc.cmakein zlib.pc template for cmake
|
||||
zlib2ansi perl script to convert source files for C++ compilation
|
||||
|
||||
arch/ architecture-specific code
|
||||
doc/ documentation for formats and algorithms
|
||||
win32/ makefiles for Windows
|
||||
|
||||
zlib public header files (required for library use):
|
||||
zconf.h
|
||||
zlib.h
|
||||
|
||||
private source files used to build the zlib library:
|
||||
adler32.c
|
||||
compress.c
|
||||
crc32.c
|
||||
crc32.h
|
||||
deflate.c
|
||||
deflate.h
|
||||
gzclose.c
|
||||
gzguts.h
|
||||
gzlib.c
|
||||
gzread.c
|
||||
gzwrite.c
|
||||
infback.c
|
||||
inffast.c
|
||||
inffast.h
|
||||
inffixed.h
|
||||
inflate.c
|
||||
inflate.h
|
||||
inftrees.c
|
||||
inftrees.h
|
||||
trees.c
|
||||
trees.h
|
||||
uncompr.c
|
||||
zutil.c
|
||||
zutil.h
|
||||
37
libs/zlibng/INDEX.md
Normal file
37
libs/zlibng/INDEX.md
Normal file
@ -0,0 +1,37 @@
|
||||
Contents
|
||||
--------
|
||||
|
||||
| Name | Description |
|
||||
|:-----------------|:---------------------------------------------------------------|
|
||||
| arch/ | Architecture-specific code |
|
||||
| doc/ | Documentation for formats and algorithms |
|
||||
| test/example.c | Zlib usages examples for build testing |
|
||||
| test/minigzip.c | Minimal gzip-like functionality for build testing |
|
||||
| test/infcover.c | Inflate code coverage for build testing |
|
||||
| win32/ | Shared library version resources for Windows |
|
||||
| CMakeLists.txt | Cmake build script |
|
||||
| configure | Bash configure/build script |
|
||||
| adler32.c | Compute the Adler-32 checksum of a data stream |
|
||||
| chunkset.* | Inline functions to copy small data chunks |
|
||||
| compress.c | Compress a memory buffer |
|
||||
| deflate.* | Compress data using the deflate algorithm |
|
||||
| deflate_fast.c | Compress data using the deflate algorithm with fast strategy |
|
||||
| deflate_medium.c | Compress data using the deflate algorithm with medium strategy |
|
||||
| deflate_slow.c | Compress data using the deflate algorithm with slow strategy |
|
||||
| functable.* | Struct containing function pointers to optimized functions |
|
||||
| gzguts.h | Internal definitions for gzip operations |
|
||||
| gzlib.c | Functions common to reading and writing gzip files |
|
||||
| gzread.c | Read gzip files |
|
||||
| gzwrite.c | Write gzip files |
|
||||
| infback.* | Inflate using a callback interface |
|
||||
| inflate.* | Decompress data |
|
||||
| inffast.* | Decompress data with speed optimizations |
|
||||
| inffixed_tbl.h | Table for decoding fixed codes |
|
||||
| inftrees.h | Generate Huffman trees for efficient decoding |
|
||||
| trees.* | Output deflated data using Huffman coding |
|
||||
| uncompr.c | Decompress a memory buffer |
|
||||
| zconf.h.cmakein | zconf.h template for cmake |
|
||||
| zendian.h | BYTE_ORDER for endian tests |
|
||||
| zlib.3 | Man page for zlib |
|
||||
| zlib.map | Linux symbol information |
|
||||
| zlib.pc.in | Pkg-config template |
|
||||
@ -1,64 +0,0 @@
|
||||
Overview
|
||||
========
|
||||
|
||||
There are several methods for compiling and installing zlib-ng, depending
|
||||
on your favorite operating system and development toolkits.
|
||||
This document will attempt to give a general overview of some of them.
|
||||
|
||||
PS: We do not recommend running 'make install' unless you know what you
|
||||
are doing, as this can override the system default zlib library, and
|
||||
any wrong configuration or incompatability of zlib-ng can make the
|
||||
whole system unusable.
|
||||
|
||||
On linux distros, an alternative way to use zlib-ng instead of zlib
|
||||
for specific programs exist, use LD_PRELOAD.
|
||||
If the program is dynamically linked with zlib, then zlib-ng can take
|
||||
its place without risking system-wide instability. Ex:
|
||||
LD_PRELOAD=/opt/zlib-ng/libz.so.1.2.11.zlib-ng /usr/bin/program
|
||||
|
||||
|
||||
Configure
|
||||
=========
|
||||
|
||||
Using the configure script is currently the main method of setting up the
|
||||
makefiles and preparing for compilation. Configure will attempt to detect
|
||||
the specifics of your system, and enable some of the relevant options for you.
|
||||
|
||||
Configure accepts several command-line options, some of the most important
|
||||
ones are detailed below.
|
||||
|
||||
--zlib-compat
|
||||
This enables options that will ensure that zlib-ng is compiled with all the
|
||||
functions that a standard zlib library contains, you will need to use this
|
||||
if you are going to be using zlib-ng as a drop-in replacement for zlib.
|
||||
|
||||
--without-optimizations
|
||||
This will disable zlib-ng specific optimizations (does not disable strategies).
|
||||
|
||||
--without-new-strategies
|
||||
This will disable specially optimized strategies, such as deflate_quick and
|
||||
deflate_medium.
|
||||
|
||||
Run configure like this:
|
||||
./configure --zlib-compat
|
||||
|
||||
Then you can compile using make:
|
||||
make
|
||||
make test
|
||||
|
||||
|
||||
Cmake
|
||||
=====
|
||||
|
||||
Cmake is an alternative to configure, basically letting you do the same thing,
|
||||
but with different tools and user interfaces.
|
||||
|
||||
Start by initializing cmake:
|
||||
cmake .
|
||||
|
||||
Then you can start the configuration tui to set the wanted options
|
||||
ccmake .
|
||||
|
||||
You can now compile using make:
|
||||
make
|
||||
make test
|
||||
@ -29,8 +29,8 @@ TEST_LIBS=$(LIBNAME1).a
|
||||
LDSHARED=$(CC)
|
||||
LDSHAREDFLAGS=-shared
|
||||
|
||||
VER=1.9.9
|
||||
VER1=1
|
||||
VER=2.0.0-RC2
|
||||
VER1=2
|
||||
|
||||
STATICLIB=$(LIBNAME1).a
|
||||
SHAREDLIB=$(LIBNAME1).so
|
||||
@ -51,7 +51,7 @@ RCOBJS=
|
||||
STRIP=
|
||||
RANLIB=ranlib
|
||||
LDCONFIG=ldconfig
|
||||
LDSHAREDLIBC=-lc
|
||||
LDSHAREDLIBC=
|
||||
EXE=
|
||||
|
||||
SRCDIR=.
|
||||
@ -71,12 +71,64 @@ mandir = ${prefix}/share/man
|
||||
man3dir = ${mandir}/man3
|
||||
pkgconfigdir = ${libdir}/pkgconfig
|
||||
|
||||
OBJZ = adler32.o compress.o crc32.o deflate.o deflate_fast.o deflate_medium.o deflate_slow.o functable.o infback.o inffast.o inflate.o inftrees.o trees.o uncompr.o zutil.o $(ARCH_STATIC_OBJS)
|
||||
OBJG = gzclose.o gzlib.o gzread.o gzwrite.o
|
||||
OBJZ = \
|
||||
adler32.o \
|
||||
chunkset.o \
|
||||
compare258.o \
|
||||
compress.o \
|
||||
crc32.o \
|
||||
crc32_comb.o \
|
||||
deflate.o \
|
||||
deflate_fast.o \
|
||||
deflate_medium.o \
|
||||
deflate_quick.o \
|
||||
deflate_slow.o \
|
||||
functable.o \
|
||||
infback.o \
|
||||
inffast.o \
|
||||
inflate.o \
|
||||
inftrees.o \
|
||||
insert_string.o \
|
||||
trees.o \
|
||||
uncompr.o \
|
||||
zutil.o \
|
||||
$(ARCH_STATIC_OBJS)
|
||||
|
||||
OBJG = \
|
||||
gzlib.o \
|
||||
gzread.o \
|
||||
gzwrite.o
|
||||
|
||||
OBJC = $(OBJZ) $(OBJG)
|
||||
|
||||
PIC_OBJZ = adler32.lo compress.lo crc32.lo deflate.lo deflate_fast.lo deflate_medium.lo deflate_slow.lo functable.lo infback.lo inffast.lo inflate.lo inftrees.lo trees.lo uncompr.lo zutil.lo $(ARCH_SHARED_OBJS)
|
||||
PIC_OBJG = gzclose.lo gzlib.lo gzread.lo gzwrite.lo
|
||||
PIC_OBJZ = \
|
||||
adler32.lo \
|
||||
chunkset.lo \
|
||||
compare258.lo \
|
||||
compress.lo \
|
||||
crc32.lo \
|
||||
crc32_comb.lo \
|
||||
deflate.lo \
|
||||
deflate_fast.lo \
|
||||
deflate_medium.lo \
|
||||
deflate_quick.lo \
|
||||
deflate_slow.lo \
|
||||
functable.lo \
|
||||
infback.lo \
|
||||
inffast.lo \
|
||||
inflate.lo \
|
||||
inftrees.lo \
|
||||
insert_string.lo \
|
||||
trees.lo \
|
||||
uncompr.lo \
|
||||
zutil.lo \
|
||||
$(ARCH_SHARED_OBJS)
|
||||
|
||||
PIC_OBJG = \
|
||||
gzlib.lo \
|
||||
gzread.lo \
|
||||
gzwrite.lo
|
||||
|
||||
PIC_OBJC = $(PIC_OBJZ) $(PIC_OBJG)
|
||||
|
||||
OBJS = $(OBJC)
|
||||
@ -85,11 +137,9 @@ PIC_OBJS = $(PIC_OBJC)
|
||||
|
||||
all: static shared
|
||||
|
||||
static: example$(EXE) minigzip$(EXE) fuzzers
|
||||
static: adler32_test$(EXE) example$(EXE) minigzip$(EXE) fuzzers makefixed$(EXE) maketrees$(EXE) makecrct$(EXE)
|
||||
|
||||
shared: examplesh$(EXE) minigzipsh$(EXE)
|
||||
|
||||
all64: example64$(EXE) minigzip64$(EXE)
|
||||
shared: adler32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE)
|
||||
|
||||
check: test
|
||||
|
||||
@ -181,17 +231,23 @@ $(STATICLIB): $(OBJS)
|
||||
$(AR) $(ARFLAGS) $@ $(OBJS)
|
||||
-@ ($(RANLIB) $@ || true) >/dev/null 2>&1
|
||||
|
||||
adler32_test.o:
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/test/adler32_test.c
|
||||
|
||||
example.o:
|
||||
$(CC) $(CFLAGS) -DWITH_GZFILEOP $(INCLUDES) -c -o $@ $(SRCDIR)/test/example.c
|
||||
|
||||
minigzip.o:
|
||||
$(CC) $(CFLAGS) -DWITH_GZFILEOP $(INCLUDES) -c -o $@ $(SRCDIR)/test/minigzip.c
|
||||
|
||||
example64.o:
|
||||
$(CC) $(CFLAGS) -DWITH_GZFILEOP -D_FILE_OFFSET_BITS=64 $(INCLUDES) -c -o $@ $(SRCDIR)/test/example.c
|
||||
makefixed.o:
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/tools/makefixed.c
|
||||
|
||||
minigzip64.o:
|
||||
$(CC) $(CFLAGS) -DWITH_GZFILEOP -D_FILE_OFFSET_BITS=64 $(INCLUDES) -c -o $@ $(SRCDIR)/test/minigzip.c
|
||||
maketrees.o:
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/tools/maketrees.c
|
||||
|
||||
makecrct.o:
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/tools/makecrct.c
|
||||
|
||||
zlibrc.o: win32/zlib$(SUFFIX)1.rc
|
||||
$(RC) $(RCFLAGS) -o $@ win32/zlib$(SUFFIX)1.rc
|
||||
@ -209,7 +265,7 @@ $(OBJG): %.o: $(SRCDIR)/%.c
|
||||
|
||||
$(SHAREDTARGET): $(PIC_OBJS) $(DEFFILE) $(RCOBJS)
|
||||
ifneq ($(SHAREDTARGET),)
|
||||
$(LDSHARED) $(LDSHAREDFLAGS) $(LDFLAGS) -o $@ $(DEFFILE) $(PIC_OBJS) $(RCOBJS) $(LDSHAREDLIBC)
|
||||
$(LDSHARED) $(CFLAGS) $(LDSHAREDFLAGS) $(LDFLAGS) -o $@ $(DEFFILE) $(PIC_OBJS) $(RCOBJS) $(LDSHAREDLIBC)
|
||||
ifneq ($(STRIP),)
|
||||
$(STRIP) $@
|
||||
endif
|
||||
@ -220,38 +276,56 @@ ifneq ($(SHAREDLIB),$(SHAREDTARGET))
|
||||
endif
|
||||
endif
|
||||
|
||||
adler32_test$(EXE): adler32_test.o $(OBJG) $(STATICLIB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ adler32_test.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
|
||||
ifneq ($(STRIP),)
|
||||
$(STRIP) $@
|
||||
endif
|
||||
|
||||
example$(EXE): example.o $(OBJG) $(STATICLIB)
|
||||
$(CC) $(LDFLAGS) -o $@ example.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ example.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
|
||||
ifneq ($(STRIP),)
|
||||
$(STRIP) $@
|
||||
endif
|
||||
|
||||
minigzip$(EXE): minigzip.o $(OBJG) $(STATICLIB)
|
||||
$(CC) $(LDFLAGS) -o $@ minigzip.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ minigzip.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
|
||||
ifneq ($(STRIP),)
|
||||
$(STRIP) $@
|
||||
endif
|
||||
|
||||
adler32_testsh$(EXE): adler32_test.o $(OBJG) $(SHAREDTARGET)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ adler32_test.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC)
|
||||
ifneq ($(STRIP),)
|
||||
$(STRIP) $@
|
||||
endif
|
||||
|
||||
examplesh$(EXE): example.o $(OBJG) $(SHAREDTARGET)
|
||||
$(CC) $(LDFLAGS) -o $@ example.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ example.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC)
|
||||
ifneq ($(STRIP),)
|
||||
$(STRIP) $@
|
||||
endif
|
||||
|
||||
minigzipsh$(EXE): minigzip.o $(OBJG) $(SHAREDTARGET)
|
||||
$(CC) $(LDFLAGS) -o $@ minigzip.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ minigzip.o $(OBJG) $(SHAREDTARGET) $(LDSHAREDLIBC)
|
||||
ifneq ($(STRIP),)
|
||||
$(STRIP) $@
|
||||
endif
|
||||
|
||||
example64$(EXE): example64.o $(OBJG) $(STATICLIB)
|
||||
$(CC) $(LDFLAGS) -o $@ example64.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
|
||||
makefixed$(EXE): makefixed.o $(OBJG) $(STATICLIB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ makefixed.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
|
||||
ifneq ($(STRIP),)
|
||||
$(STRIP) $@
|
||||
endif
|
||||
|
||||
minigzip64$(EXE): minigzip64.o $(OBJG) $(STATICLIB)
|
||||
$(CC) $(LDFLAGS) -o $@ minigzip64.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
|
||||
maketrees$(EXE): maketrees.o $(OBJG) $(STATICLIB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ maketrees.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
|
||||
ifneq ($(STRIP),)
|
||||
$(STRIP) $@
|
||||
endif
|
||||
|
||||
makecrct$(EXE): makecrct.o $(OBJG) $(STATICLIB)
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ makecrct.o $(OBJG) $(TEST_LIBS) $(LDSHAREDLIBC)
|
||||
ifneq ($(STRIP),)
|
||||
$(STRIP) $@
|
||||
endif
|
||||
@ -326,11 +400,11 @@ clean:
|
||||
@if [ -f $(ARCHDIR)/Makefile ]; then $(MAKE) -C $(ARCHDIR) clean; fi
|
||||
@if [ -f test/Makefile ]; then $(MAKE) -C test clean; fi
|
||||
rm -f *.o *.lo *~ \
|
||||
example$(EXE) minigzip$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
|
||||
example64$(EXE) minigzip64$(EXE) \
|
||||
adler32_test$(EXE) example$(EXE) minigzip$(EXE) \
|
||||
adler32_testsh$(EXE) examplesh$(EXE) minigzipsh$(EXE) \
|
||||
checksum_fuzzer$(EXE) compress_fuzzer$(EXE) example_small_fuzzer$(EXE) example_large_fuzzer$(EXE) \
|
||||
example_flush_fuzzer$(EXE) example_dict_fuzzer$(EXE) minigzip_fuzzer$(EXE) \
|
||||
infcover \
|
||||
infcover makefixed$(EXE) maketrees$(EXE) makecrct$(EXE) \
|
||||
$(STATICLIB) $(IMPORTLIB) $(SHAREDLIB) $(SHAREDLIBV) $(SHAREDLIBM) \
|
||||
foo.gz so_locations \
|
||||
_match.s maketree
|
||||
@ -338,6 +412,8 @@ clean:
|
||||
rm -f *.gcda *.gcno *.gcov
|
||||
rm -f a.out a.exe
|
||||
rm -f *.pc
|
||||
rm -f *._h
|
||||
rm -rf btmp1 btmp2 pkgtmp1 pkgtmp2
|
||||
|
||||
maintainer-clean: distclean
|
||||
distclean: clean
|
||||
|
||||
@ -1,10 +1,41 @@
|
||||
zlib-ng - zlib for the next generation systems
|
||||
## zlib-ng
|
||||
*zlib data compression library for the next generation systems*
|
||||
|
||||
Maintained by Hans Kristian Rosbach
|
||||
aka Dead2 (zlib-ng àt circlestorm dót org)
|
||||
|
||||
|CI|Status|
|
||||
|:-|-|
|
||||
|GitHub Actions|[](https://github.com/zlib-ng/zlib-ng/actions) [](https://github.com/zlib-ng/zlib-ng/actions) [](https://github.com/zlib-ng/zlib-ng/actions)|
|
||||
|Buildkite|[](https://buildkite.com/circlestorm-productions/zlib-ng)|
|
||||
|CodeFactor|[](https://www.codefactor.io/repository/github/zlib-ng/zlib-ng)|
|
||||
|OSS-Fuzz|[](https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:zlib-ng)
|
||||
|Codecov|[](https://codecov.io/github/zlib-ng/zlib-ng/)|
|
||||
|
||||
Fork Motivation and History
|
||||
Features
|
||||
--------
|
||||
|
||||
* Zlib compatible API with support for dual-linking
|
||||
* Modernized native API based on zlib API for ease of porting
|
||||
* Modern C99 syntax and a clean code layout
|
||||
* Deflate medium and quick algorithms based on Intels zlib fork
|
||||
* Support for CPU intrinsics when available
|
||||
* Adler32 implementation using SSSE3, AVX2, Neon & VSX
|
||||
* CRC32-B implementation using PCLMULQDQ & ACLE
|
||||
* Hash table implementation using CRC32-C intrinsics on x86 and ARM
|
||||
* Slide hash implementations using SSE2, AVX2, Neon & VSX
|
||||
* Compare256/258 implementations using SSE4.2 & AVX2
|
||||
* Inflate chunk copying using SSE2, AVX2 & Neon
|
||||
* Support for hardware-accelerated deflate using IBM Z DFLTCC
|
||||
* Unaligned memory read/writes and large bit buffer improvements
|
||||
* Includes improvements from Cloudflare and Intel forks
|
||||
* Configure, CMake, and NMake build system support
|
||||
* Comprehensive set of CMake unit tests
|
||||
* Code sanitizers, fuzzing, and coverage
|
||||
* GitHub Actions continuous integration on Windows, macOS, and Linux
|
||||
* Emulated CI for ARM, AARCH64, PPC, PPC64, SPARC64, S390x using qemu
|
||||
|
||||
Fork Motivation
|
||||
---------------------------
|
||||
|
||||
The motivation for this fork was due to seeing several 3rd party
|
||||
@ -38,17 +69,97 @@ various dead code, all contrib and example code as there is little
|
||||
point in having those in this fork for various reasons.
|
||||
|
||||
A lot of improvements have gone into zlib-ng since its start, and
|
||||
numerous people have contributed both small and big improvements,
|
||||
or valuable testing.
|
||||
numerous people and companies have contributed both small and big
|
||||
improvements, or valuable testing.
|
||||
|
||||
Please read LICENSE.md, it is very simple and very liberal.
|
||||
|
||||
Build
|
||||
-----
|
||||
|
||||
There are two ways to build zlib-ng:
|
||||
|
||||
### Cmake
|
||||
|
||||
To build zlib-ng using the cross-platform makefile generator cmake.
|
||||
|
||||
```
|
||||
cmake .
|
||||
cmake --build . --config Release
|
||||
ctest --verbose -C Release
|
||||
```
|
||||
|
||||
Alternatively, you can use the cmake configuration GUI tool ccmake:
|
||||
|
||||
```
|
||||
ccmake .
|
||||
```
|
||||
|
||||
### Configure
|
||||
|
||||
To build zlib-ng using the bash configure script:
|
||||
|
||||
```
|
||||
./configure
|
||||
make
|
||||
make test
|
||||
```
|
||||
|
||||
Build Options
|
||||
-------------
|
||||
| CMake | configure | Description | Default |
|
||||
|:-------------------------|:-------------------------|:--------------------------------------------------------------------------------------|---------|
|
||||
| ZLIB_COMPAT | --zlib-compat | Compile with zlib compatible API | OFF |
|
||||
| ZLIB_ENABLE_TESTS | | Build test binaries | ON |
|
||||
| WITH_GZFILEOP | --without-gzfileops | Compile with support for gzFile related functions | ON |
|
||||
| WITH_OPTIM | --without-optimizations | Build with optimisations | ON |
|
||||
| WITH_NEW_STRATEGIES | --without-new-strategies | Use new strategies | ON |
|
||||
| WITH_NATIVE_INSTRUCTIONS | --native | Compiles with full instruction set supported on this host (gcc/clang -march=native) | OFF |
|
||||
| WITH_SANITIZER | --with-sanitizer | Build with sanitizer (memory, address, undefined) | OFF |
|
||||
| WITH_FUZZERS | --with-fuzzers | Build test/fuzz | OFF |
|
||||
| WITH_MAINTAINER_WARNINGS | | Build with project maintainer warnings | OFF |
|
||||
| WITH_CODE_COVERAGE | | Enable code coverage reporting | OFF |
|
||||
|
||||
Install
|
||||
-------
|
||||
|
||||
WARNING: We do not recommend manually installing unless you really
|
||||
know what you are doing, because this can potentially override the system
|
||||
default zlib library, and any incompatibility or wrong configuration of
|
||||
zlib-ng can make the whole system unusable, requiring recovery or reinstall.
|
||||
If you still want a manual install, we recommend using the /opt/ path prefix.
|
||||
|
||||
For Linux distros, an alternative way to use zlib-ng (if compiled in
|
||||
zlib-compat mode) instead of zlib, is through the use of the
|
||||
_LD_PRELOAD_ environment variable. If the program is dynamically linked
|
||||
with zlib, then zlib-ng will temporarily be used instead by the program,
|
||||
without risking system-wide instability.
|
||||
|
||||
```
|
||||
LD_PRELOAD=/opt/zlib-ng/libz.so.1.2.11.zlib-ng /usr/bin/program
|
||||
```
|
||||
|
||||
### Cmake
|
||||
|
||||
To install zlib-ng system-wide using cmake:
|
||||
|
||||
```
|
||||
cmake --build . --target install
|
||||
```
|
||||
|
||||
### Configure
|
||||
|
||||
To install zlib-ng system-wide using the configure script:
|
||||
|
||||
```
|
||||
make install
|
||||
```
|
||||
|
||||
Contributing
|
||||
------------
|
||||
|
||||
Zlib-ng is a young project, and we aim to be open to contributions,
|
||||
and we would be delighted to receive pull requests on github.
|
||||
Zlib-ng is a aiming to be open to contributions, and we would be
|
||||
delighted to receive pull requests on github.
|
||||
Just remember that any code you submit must be your own and it must
|
||||
be zlib licensed.
|
||||
Help with testing and reviewing of pull requests etc is also very
|
||||
@ -73,9 +184,23 @@ The deflate and zlib specifications were written by L. Peter Deutsch.
|
||||
zlib was originally created by Jean-loup Gailly (compression)
|
||||
and Mark Adler (decompression).
|
||||
|
||||
Advanced Build Options
|
||||
----------------------
|
||||
|
||||
Build Status
|
||||
------------
|
||||
|
||||
Travis CI: [](https://travis-ci.org/zlib-ng/zlib-ng/)
|
||||
Buildkite: [](https://buildkite.com/circlestorm-productions/zlib-ng)
|
||||
| CMake | configure | Description | Default |
|
||||
|:--------------------------------|:----------------------|:--------------------------------------------------------------------|------------------------|
|
||||
| ZLIB_DUAL_LINK | | Dual link tests with system zlib | OFF |
|
||||
| | --force-sse2 | Assume SSE2 instructions are always available | ON (x86), OFF (x86_64) |
|
||||
| WITH_AVX2 | | Build with AVX2 intrinsics | ON |
|
||||
| WITH_SSE2 | | Build with SSE2 intrinsics | ON |
|
||||
| WITH_SSE4 | | Build with SSE4 intrinsics | ON |
|
||||
| WITH_PCLMULQDQ | | Build with PCLMULQDQ intrinsics | ON |
|
||||
| WITH_ACLE | --without-acle | Build with ACLE intrinsics | ON |
|
||||
| WITH_NEON | --without-neon | Build with NEON intrinsics | ON |
|
||||
| WITH_POWER8 | | Build with POWER8 optimisations | ON |
|
||||
| WITH_DFLTCC_DEFLATE | --with-dfltcc-deflate | Use DEFLATE COMPRESSION CALL instruction for compression on IBM Z | OFF |
|
||||
| WITH_DFLTCC_INFLATE | --with-dfltcc-inflate | Use DEFLATE COMPRESSION CALL instruction for decompression on IBM Z | OFF |
|
||||
| WITH_UNALIGNED | | Allow optimizations that use unaligned reads if safe on current arch| ON |
|
||||
| WITH_INFLATE_STRICT | | Build with strict inflate distance checking | OFF |
|
||||
| WITH_INFLATE_ALLOW_INVALID_DIST | | Build with zero fill for inflate invalid distances | OFF |
|
||||
| INSTALL_UTILS | | Copy minigzip and minideflate during install | OFF |
|
||||
|
||||
@ -1,118 +0,0 @@
|
||||
ZLIB DATA COMPRESSION LIBRARY
|
||||
|
||||
zlib 1.2.11 is a general purpose data compression library. All the code is
|
||||
thread safe. The data format used by the zlib library is described by RFCs
|
||||
(Request for Comments) 1950 to 1952 in the files
|
||||
http://tools.ietf.org/html/rfc1950 (zlib format), rfc1951 (deflate format) and
|
||||
rfc1952 (gzip format).
|
||||
|
||||
All functions of the compression library are documented in the file zlib.h
|
||||
(volunteer to write man pages welcome, contact zlib@gzip.org). A usage example
|
||||
of the library is given in the file test/example.c which also tests that
|
||||
the library is working correctly. Another example is given in the file
|
||||
test/minigzip.c. The compression library itself is composed of all source
|
||||
files in the root directory.
|
||||
|
||||
To compile all files and run the test program, follow the instructions given at
|
||||
the top of Makefile.in. In short "./configure; make test", and if that goes
|
||||
well, "make install" should work for most flavors of Unix. For Windows, use
|
||||
one of the special makefiles in win32/ or contrib/vstudio/ . For VMS, use
|
||||
make_vms.com.
|
||||
|
||||
Questions about zlib should be sent to <zlib@gzip.org>, or to Gilles Vollant
|
||||
<info@winimage.com> for the Windows DLL version. The zlib home page is
|
||||
http://zlib.net/ . Before reporting a problem, please check this site to
|
||||
verify that you have the latest version of zlib; otherwise get the latest
|
||||
version and check whether the problem still exists or not.
|
||||
|
||||
PLEASE read the zlib FAQ http://zlib.net/zlib_faq.html before asking for help.
|
||||
|
||||
Mark Nelson <markn@ieee.org> wrote an article about zlib for the Jan. 1997
|
||||
issue of Dr. Dobb's Journal; a copy of the article is available at
|
||||
http://marknelson.us/1997/01/01/zlib-engine/ .
|
||||
|
||||
The changes made in version 1.2.11 are documented in the file ChangeLog.
|
||||
|
||||
Unsupported third party contributions are provided in directory contrib/ .
|
||||
|
||||
zlib is available in Java using the java.util.zip package, documented at
|
||||
http://java.sun.com/developer/technicalArticles/Programming/compression/ .
|
||||
|
||||
A Perl interface to zlib written by Paul Marquess <pmqs@cpan.org> is available
|
||||
at CPAN (Comprehensive Perl Archive Network) sites, including
|
||||
http://search.cpan.org/~pmqs/IO-Compress-Zlib/ .
|
||||
|
||||
A Python interface to zlib written by A.M. Kuchling <amk@amk.ca> is
|
||||
available in Python 1.5 and later versions, see
|
||||
http://docs.python.org/library/zlib.html .
|
||||
|
||||
zlib is built into tcl: http://wiki.tcl.tk/4610 .
|
||||
|
||||
An experimental package to read and write files in .zip format, written on top
|
||||
of zlib by Gilles Vollant <info@winimage.com>, is available in the
|
||||
contrib/minizip directory of zlib.
|
||||
|
||||
|
||||
Notes for some targets:
|
||||
|
||||
- For Windows DLL versions, please see win32/DLL_FAQ.txt
|
||||
|
||||
- For 64-bit Irix, deflate.c must be compiled without any optimization. With
|
||||
-O, one libpng test fails. The test works in 32 bit mode (with the -n32
|
||||
compiler flag). The compiler bug has been reported to SGI.
|
||||
|
||||
- zlib doesn't work with gcc 2.6.3 on a DEC 3000/300LX under OSF/1 2.1 it works
|
||||
when compiled with cc.
|
||||
|
||||
- On Digital Unix 4.0D (formely OSF/1) on AlphaServer, the cc option -std1 is
|
||||
necessary to get gzprintf working correctly. This is done by configure.
|
||||
|
||||
- zlib doesn't work on HP-UX 9.05 with some versions of /bin/cc. It works with
|
||||
other compilers. Use "make test" to check your compiler.
|
||||
|
||||
- gzdopen is not supported on RISCOS or BEOS.
|
||||
|
||||
- For PalmOs, see http://palmzlib.sourceforge.net/
|
||||
|
||||
|
||||
Acknowledgments:
|
||||
|
||||
The deflate format used by zlib was defined by Phil Katz. The deflate and
|
||||
zlib specifications were written by L. Peter Deutsch. Thanks to all the
|
||||
people who reported problems and suggested various improvements in zlib; they
|
||||
are too numerous to cite here.
|
||||
|
||||
Copyright notice:
|
||||
|
||||
(C) 1995-2017 Jean-loup Gailly and Mark Adler
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
Jean-loup Gailly Mark Adler
|
||||
jloup@gzip.org madler@alumni.caltech.edu
|
||||
|
||||
If you use the zlib library in a product, we would appreciate *not* receiving
|
||||
lengthy legal documents to sign. The sources are provided for free but without
|
||||
warranty of any kind. The library has been entirely written by Jean-loup
|
||||
Gailly and Mark Adler; it does not include third-party code. We make all
|
||||
contributions to and distributions of this project solely in our personal
|
||||
capacity, and are not conveying any rights to any intellectual property of
|
||||
any third parties.
|
||||
|
||||
If you redistribute modified sources, we would appreciate that you include in
|
||||
the file ChangeLog history information documenting your changes. Please read
|
||||
the FAQ for more information on the distribution of modified source versions.
|
||||
@ -3,24 +3,13 @@
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
/* @(#) $Id$ */
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zutil.h"
|
||||
#include "functable.h"
|
||||
#include "adler32_p.h"
|
||||
|
||||
uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len);
|
||||
static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len2);
|
||||
|
||||
#define DO1(buf, i) {adler += (buf)[i]; sum2 += adler;}
|
||||
#define DO2(buf, i) DO1(buf, i); DO1(buf, i+1);
|
||||
#define DO4(buf, i) DO2(buf, i); DO2(buf, i+2);
|
||||
#define DO8(buf, i) DO4(buf, i); DO4(buf, i+4);
|
||||
#define DO16(buf) DO8(buf, 0); DO8(buf, 8);
|
||||
|
||||
/* ========================================================================= */
|
||||
uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
|
||||
Z_INTERNAL uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
|
||||
uint32_t sum2;
|
||||
unsigned n;
|
||||
|
||||
@ -29,15 +18,15 @@ uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
|
||||
adler &= 0xffff;
|
||||
|
||||
/* in case user likes doing a byte at a time, keep it fast */
|
||||
if (len == 1)
|
||||
if (UNLIKELY(len == 1))
|
||||
return adler32_len_1(adler, buf, sum2);
|
||||
|
||||
/* initial Adler-32 value (deferred check for len == 1 speed) */
|
||||
if (buf == NULL)
|
||||
if (UNLIKELY(buf == NULL))
|
||||
return 1L;
|
||||
|
||||
/* in case short lengths are provided, keep it somewhat fast */
|
||||
if (len < 16)
|
||||
if (UNLIKELY(len < 16))
|
||||
return adler32_len_16(adler, buf, len, sum2);
|
||||
|
||||
/* do length NMAX blocks -- requires just one modulo operation */
|
||||
@ -50,15 +39,15 @@ uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
|
||||
#endif
|
||||
do {
|
||||
#ifdef UNROLL_MORE
|
||||
DO16(buf); /* 16 sums unrolled */
|
||||
DO16(adler, sum2, buf); /* 16 sums unrolled */
|
||||
buf += 16;
|
||||
#else
|
||||
DO8(buf, 0); /* 8 sums unrolled */
|
||||
DO8(adler, sum2, buf, 0); /* 8 sums unrolled */
|
||||
buf += 8;
|
||||
#endif
|
||||
} while (--n);
|
||||
MOD(adler);
|
||||
MOD(sum2);
|
||||
adler %= BASE;
|
||||
sum2 %= BASE;
|
||||
}
|
||||
|
||||
/* do remaining bytes (less than NMAX, still just one modulo) */
|
||||
@ -66,12 +55,12 @@ uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
|
||||
#ifdef UNROLL_MORE
|
||||
while (len >= 16) {
|
||||
len -= 16;
|
||||
DO16(buf);
|
||||
DO16(adler, sum2, buf);
|
||||
buf += 16;
|
||||
#else
|
||||
while (len >= 8) {
|
||||
len -= 8;
|
||||
DO8(buf, 0);
|
||||
DO8(adler, sum2, buf, 0);
|
||||
buf += 8;
|
||||
#endif
|
||||
}
|
||||
@ -80,22 +69,34 @@ uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len) {
|
||||
adler += *buf++;
|
||||
sum2 += adler;
|
||||
}
|
||||
MOD(adler);
|
||||
MOD(sum2);
|
||||
adler %= BASE;
|
||||
sum2 %= BASE;
|
||||
}
|
||||
|
||||
/* return recombined sums */
|
||||
return adler | (sum2 << 16);
|
||||
}
|
||||
|
||||
uint32_t ZEXPORT PREFIX(adler32_z)(uint32_t adler, const unsigned char *buf, size_t len) {
|
||||
#ifdef ZLIB_COMPAT
|
||||
unsigned long Z_EXPORT PREFIX(adler32_z)(unsigned long adler, const unsigned char *buf, size_t len) {
|
||||
return (unsigned long)functable.adler32((uint32_t)adler, buf, len);
|
||||
}
|
||||
#else
|
||||
uint32_t Z_EXPORT PREFIX(adler32_z)(uint32_t adler, const unsigned char *buf, size_t len) {
|
||||
return functable.adler32(adler, buf, len);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ========================================================================= */
|
||||
uint32_t ZEXPORT PREFIX(adler32)(uint32_t adler, const unsigned char *buf, uint32_t len) {
|
||||
#ifdef ZLIB_COMPAT
|
||||
unsigned long Z_EXPORT PREFIX(adler32)(unsigned long adler, const unsigned char *buf, unsigned int len) {
|
||||
return (unsigned long)functable.adler32((uint32_t)adler, buf, len);
|
||||
}
|
||||
#else
|
||||
uint32_t Z_EXPORT PREFIX(adler32)(uint32_t adler, const unsigned char *buf, uint32_t len) {
|
||||
return functable.adler32(adler, buf, len);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ========================================================================= */
|
||||
static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len2) {
|
||||
@ -108,11 +109,11 @@ static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len
|
||||
return 0xffffffff;
|
||||
|
||||
/* the derivation of this formula is left as an exercise for the reader */
|
||||
MOD63(len2); /* assumes len2 >= 0 */
|
||||
len2 %= BASE; /* assumes len2 >= 0 */
|
||||
rem = (unsigned)len2;
|
||||
sum1 = adler1 & 0xffff;
|
||||
sum2 = rem * sum1;
|
||||
MOD(sum2);
|
||||
sum2 %= BASE;
|
||||
sum1 += (adler2 & 0xffff) + BASE - 1;
|
||||
sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
|
||||
if (sum1 >= BASE) sum1 -= BASE;
|
||||
@ -123,10 +124,16 @@ static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len
|
||||
}
|
||||
|
||||
/* ========================================================================= */
|
||||
uint32_t ZEXPORT PREFIX(adler32_combine)(uint32_t adler1, uint32_t adler2, z_off_t len2) {
|
||||
return adler32_combine_(adler1, adler2, len2);
|
||||
#ifdef ZLIB_COMPAT
|
||||
unsigned long Z_EXPORT PREFIX(adler32_combine)(unsigned long adler1, unsigned long adler2, z_off_t len2) {
|
||||
return (unsigned long)adler32_combine_((uint32_t)adler1, (uint32_t)adler2, len2);
|
||||
}
|
||||
|
||||
uint32_t ZEXPORT PREFIX(adler32_combine64)(uint32_t adler1, uint32_t adler2, z_off64_t len2) {
|
||||
unsigned long Z_EXPORT PREFIX4(adler32_combine)(unsigned long adler1, unsigned long adler2, z_off64_t len2) {
|
||||
return (unsigned long)adler32_combine_((uint32_t)adler1, (uint32_t)adler2, len2);
|
||||
}
|
||||
#else
|
||||
uint32_t Z_EXPORT PREFIX4(adler32_combine)(uint32_t adler1, uint32_t adler2, z_off64_t len2) {
|
||||
return adler32_combine_(adler1, adler2, len2);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -12,45 +12,11 @@
|
||||
#define NMAX 5552
|
||||
/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
|
||||
|
||||
/* use NO_DIVIDE if your processor does not do division in hardware --
|
||||
try it both ways to see which is faster */
|
||||
#ifdef NO_DIVIDE
|
||||
/* note that this assumes BASE is 65521, where 65536 % 65521 == 15
|
||||
(thank you to John Reiser for pointing this out) */
|
||||
# define CHOP(a) \
|
||||
do { \
|
||||
uint32_t tmp = a >> 16; \
|
||||
a &= 0xffff; \
|
||||
a += (tmp << 4) - tmp; \
|
||||
} while (0)
|
||||
# define MOD28(a) \
|
||||
do { \
|
||||
CHOP(a); \
|
||||
if (a >= BASE) a -= BASE; \
|
||||
} while (0)
|
||||
# define MOD(a) \
|
||||
do { \
|
||||
CHOP(a); \
|
||||
MOD28(a); \
|
||||
} while (0)
|
||||
# define MOD63(a) \
|
||||
do { /* this assumes a is not negative */ \
|
||||
z_off64_t tmp = a >> 32; \
|
||||
a &= 0xffffffffL; \
|
||||
a += (tmp << 8) - (tmp << 5) + tmp; \
|
||||
tmp = a >> 16; \
|
||||
a &= 0xffffL; \
|
||||
a += (tmp << 4) - tmp; \
|
||||
tmp = a >> 16; \
|
||||
a &= 0xffffL; \
|
||||
a += (tmp << 4) - tmp; \
|
||||
if (a >= BASE) a -= BASE; \
|
||||
} while (0)
|
||||
#else
|
||||
# define MOD(a) a %= BASE
|
||||
# define MOD28(a) a %= BASE
|
||||
# define MOD63(a) a %= BASE
|
||||
#endif
|
||||
#define DO1(sum1, sum2, buf, i) {(sum1) += buf[(i)]; (sum2) += (sum1);}
|
||||
#define DO2(sum1, sum2, buf, i) {DO1(sum1, sum2, buf, i); DO1(sum1, sum2, buf, i+1);}
|
||||
#define DO4(sum1, sum2, buf, i) {DO2(sum1, sum2, buf, i); DO2(sum1, sum2, buf, i+2);}
|
||||
#define DO8(sum1, sum2, buf, i) {DO4(sum1, sum2, buf, i); DO4(sum1, sum2, buf, i+4);}
|
||||
#define DO16(sum1, sum2, buf) {DO8(sum1, sum2, buf, 0); DO8(sum1, sum2, buf, 8);}
|
||||
|
||||
static inline uint32_t adler32_len_1(uint32_t adler, const unsigned char *buf, uint32_t sum2) {
|
||||
adler += buf[0];
|
||||
@ -70,8 +36,18 @@ static inline uint32_t adler32_len_16(uint32_t adler, const unsigned char *buf,
|
||||
}
|
||||
if (adler >= BASE)
|
||||
adler -= BASE;
|
||||
MOD28(sum2); /* only added so many BASE's */
|
||||
sum2 %= BASE; /* only added so many BASE's */
|
||||
return adler | (sum2 << 16);
|
||||
}
|
||||
|
||||
static inline uint32_t adler32_len_64(uint32_t adler, const unsigned char *buf, size_t len, uint32_t sum2) {
|
||||
while (len >= 16) {
|
||||
len -= 16;
|
||||
DO16(adler, sum2, buf);
|
||||
buf += 16;
|
||||
}
|
||||
/* Process tail (len < 16). */
|
||||
return adler32_len_16(adler, buf, len, sum2);
|
||||
}
|
||||
|
||||
#endif /* ADLER32_P_H */
|
||||
|
||||
@ -6,19 +6,27 @@ CC=
|
||||
CFLAGS=
|
||||
SFLAGS=
|
||||
INCLUDES=
|
||||
ACLEFLAG=
|
||||
NEONFLAG=
|
||||
SUFFIX=
|
||||
|
||||
SRCDIR=.
|
||||
SRCTOP=../..
|
||||
TOPDIR=$(SRCTOP)
|
||||
|
||||
all: adler32_neon.o adler32_neon.lo armfeature.o armfeature.lo crc32_acle.o crc32_acle.lo fill_window_arm.o fill_window_arm.lo insert_string_acle.o insert_string_acle.lo
|
||||
all: \
|
||||
adler32_neon.o adler32_neon.lo \
|
||||
armfeature.o armfeature.lo \
|
||||
chunkset_neon.o chunkset_neon.lo \
|
||||
crc32_acle.o crc32_acle.lo \
|
||||
slide_neon.o slide_neon.lo \
|
||||
insert_string_acle.o insert_string_acle.lo
|
||||
|
||||
adler32_neon.o:
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
|
||||
$(CC) $(CFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
|
||||
|
||||
adler32_neon.lo:
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
|
||||
$(CC) $(SFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
|
||||
|
||||
armfeature.o:
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c
|
||||
@ -26,23 +34,29 @@ armfeature.o:
|
||||
armfeature.lo:
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c
|
||||
|
||||
chunkset_neon.o:
|
||||
$(CC) $(CFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_neon.c
|
||||
|
||||
chunkset_neon.lo:
|
||||
$(CC) $(SFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_neon.c
|
||||
|
||||
crc32_acle.o:
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
|
||||
$(CC) $(CFLAGS) $(ACLEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
|
||||
|
||||
crc32_acle.lo:
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
|
||||
$(CC) $(SFLAGS) $(ACLEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
|
||||
|
||||
fill_window_arm.o:
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_arm.c
|
||||
slide_neon.o:
|
||||
$(CC) $(CFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_neon.c
|
||||
|
||||
fill_window_arm.lo:
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_arm.c
|
||||
slide_neon.lo:
|
||||
$(CC) $(SFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_neon.c
|
||||
|
||||
insert_string_acle.o:
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
|
||||
$(CC) $(CFLAGS) $(ACLEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
|
||||
|
||||
insert_string_acle.lo:
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
|
||||
$(CC) $(SFLAGS) $(ACLEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
|
||||
|
||||
mostlyclean: clean
|
||||
clean:
|
||||
|
||||
@ -2,24 +2,16 @@
|
||||
* Copyright (C) 2017 ARM Holdings Inc.
|
||||
* Author: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
#include "adler32_neon.h"
|
||||
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
|
||||
#include <arm_neon.h>
|
||||
#include "adler32_p.h"
|
||||
#ifdef ARM_NEON_ADLER32
|
||||
#ifdef _M_ARM64
|
||||
# include <arm64_neon.h>
|
||||
#else
|
||||
# include <arm_neon.h>
|
||||
#endif
|
||||
#include "../../zutil.h"
|
||||
#include "../../adler32_p.h"
|
||||
|
||||
static void NEON_accum32(uint32_t *s, const unsigned char *buf, size_t len) {
|
||||
static const uint8_t taps[32] = {
|
||||
@ -109,7 +101,7 @@ uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len) {
|
||||
|
||||
for (i = 0; i < len; i += n) {
|
||||
if ((i + n) > len)
|
||||
n = len - i;
|
||||
n = (int)(len - i);
|
||||
|
||||
if (n < 16)
|
||||
break;
|
||||
|
||||
@ -1,29 +0,0 @@
|
||||
/* Copyright (C) 1995-2011, 2016 Mark Adler
|
||||
* Copyright (C) 2017 ARM Holdings Inc.
|
||||
* Author: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
#ifndef __ADLER32_NEON__
|
||||
#define __ADLER32_NEON__
|
||||
|
||||
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
|
||||
// Depending on the compiler flavor, size_t may be defined in one or the other header. See:
|
||||
// http://stackoverflow.com/questions/26410466/gcc-linaro-compiler-throws-error-unknown-type-name-size-t
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len);
|
||||
#endif
|
||||
#endif
|
||||
@ -8,6 +8,6 @@
|
||||
extern int arm_cpu_has_neon;
|
||||
extern int arm_cpu_has_crc32;
|
||||
|
||||
void ZLIB_INTERNAL arm_check_features(void);
|
||||
void Z_INTERNAL arm_check_features(void);
|
||||
|
||||
#endif /* ARM_H_ */
|
||||
|
||||
@ -1,50 +1,69 @@
|
||||
#include "zutil.h"
|
||||
#include "../../zutil.h"
|
||||
|
||||
#if defined(__linux__)
|
||||
# include <sys/auxv.h>
|
||||
# include <asm/hwcap.h>
|
||||
# include <sys/auxv.h>
|
||||
# include <asm/hwcap.h>
|
||||
#elif defined(__FreeBSD__) && defined(__aarch64__)
|
||||
# include <machine/armreg.h>
|
||||
# ifndef ID_AA64ISAR0_CRC32_VAL
|
||||
# define ID_AA64ISAR0_CRC32_VAL ID_AA64ISAR0_CRC32
|
||||
# endif
|
||||
#elif defined(__APPLE__)
|
||||
# include <sys/sysctl.h>
|
||||
#elif defined(_WIN32)
|
||||
# include <winapifamily.h>
|
||||
# include <winapifamily.h>
|
||||
#endif
|
||||
|
||||
static int arm_has_crc32() {
|
||||
#if defined(__linux__) && defined(HWCAP2_CRC32)
|
||||
return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0 ? 1 : 0;
|
||||
return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0 ? 1 : 0;
|
||||
#elif defined(__FreeBSD__) && defined(__aarch64__)
|
||||
return getenv("QEMU_EMULATING") == NULL
|
||||
&& ID_AA64ISAR0_CRC32_VAL(READ_SPECIALREG(id_aa64isar0_el1)) >= ID_AA64ISAR0_CRC32_BASE;
|
||||
#elif defined(__APPLE__)
|
||||
int hascrc32;
|
||||
size_t size = sizeof(hascrc32);
|
||||
return sysctlbyname("hw.optional.armv8_crc32", &hascrc32, &size, NULL, 0) == 0
|
||||
&& hascrc32 == 1;
|
||||
#elif defined(ARM_NOCHECK_ACLE)
|
||||
return 1;
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* AArch64 has neon. */
|
||||
#if !defined(__aarch64__)
|
||||
static inline int arm_has_neon()
|
||||
{
|
||||
#if defined(__linux__) && defined(HWCAP_NEON)
|
||||
#if !defined(__aarch64__) && !defined(_M_ARM64)
|
||||
static inline int arm_has_neon() {
|
||||
#if defined(__linux__) && defined(HWCAP_NEON)
|
||||
return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0 ? 1 : 0;
|
||||
#elif defined(_M_ARM) && defined(WINAPI_FAMILY_PARTITION)
|
||||
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP)
|
||||
#elif defined(__APPLE__)
|
||||
int hasneon;
|
||||
size_t size = sizeof(hasneon);
|
||||
return sysctlbyname("hw.optional.neon", &hasneon, &size, NULL, 0) == 0
|
||||
&& hasneon == 1;
|
||||
#elif defined(_M_ARM) && defined(WINAPI_FAMILY_PARTITION)
|
||||
# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP)
|
||||
return 1; /* Always supported */
|
||||
#endif
|
||||
#endif
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(ARM_NOCHECK_NEON)
|
||||
#if defined(ARM_NOCHECK_NEON)
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
ZLIB_INTERNAL int arm_cpu_has_neon;
|
||||
ZLIB_INTERNAL int arm_cpu_has_crc32;
|
||||
|
||||
void ZLIB_INTERNAL arm_check_features(void) {
|
||||
#if defined(__aarch64__)
|
||||
arm_cpu_has_neon = 1; /* always available */
|
||||
#else
|
||||
arm_cpu_has_neon = arm_has_neon();
|
||||
return 0;
|
||||
#endif
|
||||
arm_cpu_has_crc32 = arm_has_crc32();
|
||||
}
|
||||
#endif
|
||||
|
||||
Z_INTERNAL int arm_cpu_has_neon;
|
||||
Z_INTERNAL int arm_cpu_has_crc32;
|
||||
|
||||
void Z_INTERNAL arm_check_features(void) {
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
arm_cpu_has_neon = 1; /* always available */
|
||||
#else
|
||||
arm_cpu_has_neon = arm_has_neon();
|
||||
#endif
|
||||
arm_cpu_has_crc32 = arm_has_crc32();
|
||||
}
|
||||
|
||||
54
libs/zlibng/arch/arm/chunkset_neon.c
Normal file
54
libs/zlibng/arch/arm/chunkset_neon.c
Normal file
@ -0,0 +1,54 @@
|
||||
/* chunkset_neon.c -- NEON inline functions to copy small data chunks.
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifdef ARM_NEON_CHUNKSET
|
||||
#ifdef _M_ARM64
|
||||
# include <arm64_neon.h>
|
||||
#else
|
||||
# include <arm_neon.h>
|
||||
#endif
|
||||
#include "../../zbuild.h"
|
||||
#include "../../zutil.h"
|
||||
|
||||
typedef uint8x16_t chunk_t;
|
||||
|
||||
#define HAVE_CHUNKMEMSET_1
|
||||
#define HAVE_CHUNKMEMSET_2
|
||||
#define HAVE_CHUNKMEMSET_4
|
||||
#define HAVE_CHUNKMEMSET_8
|
||||
|
||||
static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
|
||||
*chunk = vld1q_dup_u8(from);
|
||||
}
|
||||
|
||||
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
||||
*chunk = vreinterpretq_u8_s16(vdupq_n_s16(*(int16_t *)from));
|
||||
}
|
||||
|
||||
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
||||
*chunk = vreinterpretq_u8_s32(vdupq_n_s32(*(int32_t *)from));
|
||||
}
|
||||
|
||||
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
||||
*chunk = vcombine_u8(vld1_u8(from), vld1_u8(from));
|
||||
}
|
||||
|
||||
#define CHUNKSIZE chunksize_neon
|
||||
#define CHUNKCOPY chunkcopy_neon
|
||||
#define CHUNKCOPY_SAFE chunkcopy_safe_neon
|
||||
#define CHUNKUNROLL chunkunroll_neon
|
||||
#define CHUNKMEMSET chunkmemset_neon
|
||||
#define CHUNKMEMSET_SAFE chunkmemset_safe_neon
|
||||
|
||||
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
|
||||
*chunk = vld1q_u8(s);
|
||||
}
|
||||
|
||||
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
|
||||
vst1q_u8(out, *chunk);
|
||||
}
|
||||
|
||||
#include "chunkset_tpl.h"
|
||||
|
||||
#endif
|
||||
@ -5,21 +5,16 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef __ARM_FEATURE_CRC32
|
||||
# include <arm_acle.h>
|
||||
# ifdef ZLIB_COMPAT
|
||||
# include <zconf.h>
|
||||
# else
|
||||
# include <zconf-ng.h>
|
||||
# endif
|
||||
# ifdef __linux__
|
||||
# include <stddef.h>
|
||||
# endif
|
||||
#ifdef ARM_ACLE_CRC_HASH
|
||||
#ifndef _MSC_VER
|
||||
# include <arm_acle.h>
|
||||
#endif
|
||||
#include "../../zutil.h"
|
||||
|
||||
uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
|
||||
register uint32_t c;
|
||||
register const uint16_t *buf2;
|
||||
register const uint32_t *buf4;
|
||||
Z_REGISTER uint32_t c;
|
||||
Z_REGISTER const uint16_t *buf2;
|
||||
Z_REGISTER const uint32_t *buf4;
|
||||
|
||||
c = ~crc;
|
||||
if (len && ((ptrdiff_t)buf & 1)) {
|
||||
@ -36,7 +31,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
|
||||
buf4 = (const uint32_t *) buf;
|
||||
}
|
||||
|
||||
# if defined(__aarch64__)
|
||||
#if defined(__aarch64__)
|
||||
if ((len > sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) {
|
||||
c = __crc32w(c, *buf4++);
|
||||
len -= sizeof(uint32_t);
|
||||
@ -44,7 +39,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
|
||||
|
||||
const uint64_t *buf8 = (const uint64_t *) buf4;
|
||||
|
||||
# ifdef UNROLL_MORE
|
||||
#ifdef UNROLL_MORE
|
||||
while (len >= 4 * sizeof(uint64_t)) {
|
||||
c = __crc32d(c, *buf8++);
|
||||
c = __crc32d(c, *buf8++);
|
||||
@ -52,7 +47,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
|
||||
c = __crc32d(c, *buf8++);
|
||||
len -= 4 * sizeof(uint64_t);
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
while (len >= sizeof(uint64_t)) {
|
||||
c = __crc32d(c, *buf8++);
|
||||
@ -74,7 +69,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
|
||||
}
|
||||
|
||||
buf = (const unsigned char *) buf2;
|
||||
# else /* __aarch64__ */
|
||||
#else /* __aarch64__ */
|
||||
|
||||
# ifdef UNROLL_MORE
|
||||
while (len >= 8 * sizeof(uint32_t)) {
|
||||
@ -103,7 +98,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
|
||||
} else {
|
||||
buf = (const unsigned char *) buf4;
|
||||
}
|
||||
# endif /* __aarch64__ */
|
||||
#endif /* __aarch64__ */
|
||||
|
||||
if (len) {
|
||||
c = __crc32b(c, *buf);
|
||||
@ -112,4 +107,4 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
|
||||
c = ~c;
|
||||
return c;
|
||||
}
|
||||
#endif /* __ARM_FEATURE_CRC32 */
|
||||
#endif
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
static __forceinline unsigned long __builtin_ctzl(unsigned long value) {
|
||||
return _arm_clz(_arm_rbit(value));
|
||||
return _arm_clz(_arm_rbit(value));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@ -1,169 +0,0 @@
|
||||
/* fill_window_arm.c -- Optimized hash table shifting for ARM with support for NEON instructions
|
||||
* Copyright (C) 2017 Mika T. Lindqvist
|
||||
*
|
||||
* Authors:
|
||||
* Mika T. Lindqvist <postmaster@raasu.org>
|
||||
* Jun He <jun.he@arm.com>
|
||||
*
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
/* @(#) $Id$ */
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "deflate.h"
|
||||
#include "deflate_p.h"
|
||||
#include "functable.h"
|
||||
|
||||
extern ZLIB_INTERNAL int read_buf(PREFIX3(stream) *strm, unsigned char *buf, unsigned size);
|
||||
|
||||
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
|
||||
#include <arm_neon.h>
|
||||
|
||||
/* SIMD version of hash_chain rebase */
|
||||
static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
|
||||
register uint16x8_t v, *p;
|
||||
register size_t n;
|
||||
|
||||
size_t size = entries*sizeof(table[0]);
|
||||
Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
|
||||
|
||||
Assert(sizeof(Pos) == 2, "Wrong Pos size");
|
||||
v = vdupq_n_u16(window_size);
|
||||
|
||||
p = (uint16x8_t *)table;
|
||||
n = size / (sizeof(uint16x8_t) * 8);
|
||||
do {
|
||||
p[0] = vqsubq_u16(p[0], v);
|
||||
p[1] = vqsubq_u16(p[1], v);
|
||||
p[2] = vqsubq_u16(p[2], v);
|
||||
p[3] = vqsubq_u16(p[3], v);
|
||||
p[4] = vqsubq_u16(p[4], v);
|
||||
p[5] = vqsubq_u16(p[5], v);
|
||||
p[6] = vqsubq_u16(p[6], v);
|
||||
p[7] = vqsubq_u16(p[7], v);
|
||||
p += 8;
|
||||
} while (--n);
|
||||
}
|
||||
#else
|
||||
/* generic version for hash rebase */
|
||||
static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
|
||||
unsigned int i;
|
||||
for (i = 0; i < entries; i++) {
|
||||
table[i] = (table[i] >= window_size) ? (table[i] - window_size) : NIL;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void fill_window_arm(deflate_state *s) {
|
||||
register unsigned n;
|
||||
unsigned long more; /* Amount of free space at the end of the window. */
|
||||
unsigned int wsize = s->w_size;
|
||||
|
||||
Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
|
||||
|
||||
do {
|
||||
more = s->window_size - s->lookahead - s->strstart;
|
||||
|
||||
/* If the window is almost full and there is insufficient lookahead,
|
||||
* move the upper half to the lower one to make room in the upper half.
|
||||
*/
|
||||
if (s->strstart >= wsize+MAX_DIST(s)) {
|
||||
memcpy(s->window, s->window+wsize, wsize);
|
||||
s->match_start -= wsize;
|
||||
s->strstart -= wsize; /* we now have strstart >= MAX_DIST */
|
||||
s->block_start -= wsize;
|
||||
|
||||
/* Slide the hash table (could be avoided with 32 bit values
|
||||
at the expense of memory usage). We slide even when level == 0
|
||||
to keep the hash table consistent if we switch back to level > 0
|
||||
later. (Using level 0 permanently is not an optimal usage of
|
||||
zlib, so we don't care about this pathological case.)
|
||||
*/
|
||||
|
||||
slide_hash_chain(s->head, s->hash_size, wsize);
|
||||
slide_hash_chain(s->prev, wsize, wsize);
|
||||
more += wsize;
|
||||
}
|
||||
if (s->strm->avail_in == 0)
|
||||
break;
|
||||
|
||||
/* If there was no sliding:
|
||||
* strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
|
||||
* more == window_size - lookahead - strstart
|
||||
* => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
|
||||
* => more >= window_size - 2*WSIZE + 2
|
||||
* In the BIG_MEM or MMAP case (not yet supported),
|
||||
* window_size == input_size + MIN_LOOKAHEAD &&
|
||||
* strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
|
||||
* Otherwise, window_size == 2*WSIZE so more >= 2.
|
||||
* If there was sliding, more >= WSIZE. So in all cases, more >= 2.
|
||||
*/
|
||||
Assert(more >= 2, "more < 2");
|
||||
|
||||
n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
|
||||
s->lookahead += n;
|
||||
|
||||
/* Initialize the hash value now that we have some input: */
|
||||
if (s->lookahead + s->insert >= MIN_MATCH) {
|
||||
unsigned int str = s->strstart - s->insert;
|
||||
unsigned int insert_cnt = s->insert;
|
||||
unsigned int slen;
|
||||
|
||||
s->ins_h = s->window[str];
|
||||
|
||||
if (unlikely(s->lookahead < MIN_MATCH))
|
||||
insert_cnt += s->lookahead - MIN_MATCH;
|
||||
slen = insert_cnt;
|
||||
if (str >= (MIN_MATCH - 2))
|
||||
{
|
||||
str += 2 - MIN_MATCH;
|
||||
insert_cnt += MIN_MATCH - 2;
|
||||
}
|
||||
if (insert_cnt > 0)
|
||||
{
|
||||
functable.insert_string(s, str, insert_cnt);
|
||||
s->insert -= slen;
|
||||
}
|
||||
}
|
||||
/* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
|
||||
* but this is not important since only literal bytes will be emitted.
|
||||
*/
|
||||
} while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
|
||||
|
||||
/* If the WIN_INIT bytes after the end of the current data have never been
|
||||
* written, then zero those bytes in order to avoid memory check reports of
|
||||
* the use of uninitialized (or uninitialised as Julian writes) bytes by
|
||||
* the longest match routines. Update the high water mark for the next
|
||||
* time through here. WIN_INIT is set to MAX_MATCH since the longest match
|
||||
* routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
|
||||
*/
|
||||
if (s->high_water < s->window_size) {
|
||||
unsigned long curr = s->strstart + (unsigned long)s->lookahead;
|
||||
unsigned long init;
|
||||
|
||||
if (s->high_water < curr) {
|
||||
/* Previous high water mark below current data -- zero WIN_INIT
|
||||
* bytes or up to end of window, whichever is less.
|
||||
*/
|
||||
init = s->window_size - curr;
|
||||
if (init > WIN_INIT)
|
||||
init = WIN_INIT;
|
||||
memset(s->window + curr, 0, init);
|
||||
s->high_water = curr + init;
|
||||
} else if (s->high_water < curr + WIN_INIT) {
|
||||
/* High water mark at or above current data, but below current data
|
||||
* plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
|
||||
* to end of window, whichever is less.
|
||||
*/
|
||||
init = curr + WIN_INIT;
|
||||
if (init > s->window_size)
|
||||
init = s->window_size;
|
||||
init -= s->high_water;
|
||||
memset(s->window + s->high_water, 0, init);
|
||||
s->high_water += init;
|
||||
}
|
||||
}
|
||||
|
||||
Assert((unsigned long)s->strstart <= s->window_size - MIN_LOOKAHEAD, "not enough room for search");
|
||||
}
|
||||
@ -5,49 +5,18 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH)
|
||||
#include <arm_acle.h>
|
||||
#include "zbuild.h"
|
||||
#include "deflate.h"
|
||||
|
||||
/* ===========================================================================
|
||||
* Insert string str in the dictionary and set match_head to the previous head
|
||||
* of the hash chain (the most recent string with same hash key). Return
|
||||
* the previous length of the hash chain.
|
||||
* IN assertion: all calls to to INSERT_STRING are made with consecutive
|
||||
* input characters and the first MIN_MATCH bytes of str are valid
|
||||
* (except for the last MIN_MATCH-1 bytes of the input file).
|
||||
*/
|
||||
Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count) {
|
||||
Pos p, lp, ret;
|
||||
|
||||
if (unlikely(count == 0)) {
|
||||
return s->prev[str & s->w_mask];
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
lp = str + count - 1; /* last position */
|
||||
|
||||
for (p = str; p <= lp; p++) {
|
||||
uint32_t val, h, hm;
|
||||
memcpy(&val, &s->window[p], sizeof(val));
|
||||
|
||||
if (s->level >= TRIGGER_LEVEL)
|
||||
val &= 0xFFFFFF;
|
||||
|
||||
h = __crc32w(0, val);
|
||||
hm = h & s->hash_mask;
|
||||
|
||||
Pos head = s->head[hm];
|
||||
if (head != p) {
|
||||
s->prev[p & s->w_mask] = head;
|
||||
s->head[hm] = p;
|
||||
if (p == lp)
|
||||
ret = head;
|
||||
} else if (p == lp) {
|
||||
ret = p;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
#ifdef ARM_ACLE_CRC_HASH
|
||||
#ifndef _MSC_VER
|
||||
# include <arm_acle.h>
|
||||
#endif
|
||||
#include "../../zbuild.h"
|
||||
#include "../../deflate.h"
|
||||
|
||||
#define UPDATE_HASH(s, h, val) \
|
||||
h = __crc32w(0, val)
|
||||
|
||||
#define INSERT_STRING insert_string_acle
|
||||
#define QUICK_INSERT_STRING quick_insert_string_acle
|
||||
|
||||
#include "../../insert_string_tpl.h"
|
||||
#endif
|
||||
|
||||
52
libs/zlibng/arch/arm/slide_neon.c
Normal file
52
libs/zlibng/arch/arm/slide_neon.c
Normal file
@ -0,0 +1,52 @@
|
||||
/* slide_neon.c -- Optimized hash table shifting for ARM with support for NEON instructions
|
||||
* Copyright (C) 2017-2020 Mika T. Lindqvist
|
||||
*
|
||||
* Authors:
|
||||
* Mika T. Lindqvist <postmaster@raasu.org>
|
||||
* Jun He <jun.he@arm.com>
|
||||
*
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#if defined(ARM_NEON_SLIDEHASH)
|
||||
#ifdef _M_ARM64
|
||||
# include <arm64_neon.h>
|
||||
#else
|
||||
# include <arm_neon.h>
|
||||
#endif
|
||||
#include "../../zbuild.h"
|
||||
#include "../../deflate.h"
|
||||
|
||||
/* SIMD version of hash_chain rebase */
|
||||
static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
|
||||
Z_REGISTER uint16x8_t v, *p;
|
||||
Z_REGISTER size_t n;
|
||||
|
||||
size_t size = entries*sizeof(table[0]);
|
||||
Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
|
||||
|
||||
Assert(sizeof(Pos) == 2, "Wrong Pos size");
|
||||
v = vdupq_n_u16(window_size);
|
||||
|
||||
p = (uint16x8_t *)table;
|
||||
n = size / (sizeof(uint16x8_t) * 8);
|
||||
do {
|
||||
p[0] = vqsubq_u16(p[0], v);
|
||||
p[1] = vqsubq_u16(p[1], v);
|
||||
p[2] = vqsubq_u16(p[2], v);
|
||||
p[3] = vqsubq_u16(p[3], v);
|
||||
p[4] = vqsubq_u16(p[4], v);
|
||||
p[5] = vqsubq_u16(p[5], v);
|
||||
p[6] = vqsubq_u16(p[6], v);
|
||||
p[7] = vqsubq_u16(p[7], v);
|
||||
p += 8;
|
||||
} while (--n);
|
||||
}
|
||||
|
||||
Z_INTERNAL void slide_hash_neon(deflate_state *s) {
|
||||
unsigned int wsize = s->w_size;
|
||||
|
||||
slide_hash_chain(s->head, HASH_SIZE, wsize);
|
||||
slide_hash_chain(s->prev, wsize, wsize);
|
||||
}
|
||||
#endif
|
||||
49
libs/zlibng/arch/power/Makefile.in
Normal file
49
libs/zlibng/arch/power/Makefile.in
Normal file
@ -0,0 +1,49 @@
|
||||
# Makefile for POWER-specific files
|
||||
# Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
|
||||
# For conditions of distribution and use, see copyright notice in zlib.h
|
||||
|
||||
CC=
|
||||
CFLAGS=
|
||||
SFLAGS=
|
||||
INCLUDES=
|
||||
SUFFIX=
|
||||
|
||||
SRCDIR=.
|
||||
SRCTOP=../..
|
||||
TOPDIR=$(SRCTOP)
|
||||
|
||||
P8FLAGS=-mcpu=power8
|
||||
|
||||
all: power.o \
|
||||
power.lo \
|
||||
adler32_power8.o \
|
||||
adler32_power8.lo \
|
||||
slide_hash_power8.o \
|
||||
slide_hash_power8.lo
|
||||
|
||||
power.o:
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power.c
|
||||
|
||||
power.lo:
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power.c
|
||||
|
||||
adler32_power8.o:
|
||||
$(CC) $(CFLAGS) $(P8FLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_power8.c
|
||||
|
||||
adler32_power8.lo:
|
||||
$(CC) $(SFLAGS) $(P8FLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_power8.c
|
||||
|
||||
slide_hash_power8.o:
|
||||
$(CC) $(CFLAGS) $(P8FLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_power8.c
|
||||
|
||||
slide_hash_power8.lo:
|
||||
$(CC) $(SFLAGS) $(P8FLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_hash_power8.c
|
||||
|
||||
mostlyclean: clean
|
||||
clean:
|
||||
rm -f *.o *.lo *~
|
||||
rm -rf objs
|
||||
rm -f *.gcda *.gcno *.gcov
|
||||
|
||||
distclean:
|
||||
rm -f Makefile
|
||||
154
libs/zlibng/arch/power/adler32_power8.c
Normal file
154
libs/zlibng/arch/power/adler32_power8.c
Normal file
@ -0,0 +1,154 @@
|
||||
/* Adler32 for POWER8 using VSX instructions.
|
||||
* Copyright (C) 2020 IBM Corporation
|
||||
* Author: Rogerio Alves <rcardoso@linux.ibm.com>
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*
|
||||
* Calculate adler32 checksum for 16 bytes at once using POWER8+ VSX (vector)
|
||||
* instructions.
|
||||
*
|
||||
* If adler32 do 1 byte at time on the first iteration s1 is s1_0 (_n means
|
||||
* iteration n) is the initial value of adler - at start _0 is 1 unless
|
||||
* adler initial value is different than 1. So s1_1 = s1_0 + c[0] after
|
||||
* the first calculation. For the iteration s1_2 = s1_1 + c[1] and so on.
|
||||
* Hence, for iteration N, s1_N = s1_(N-1) + c[N] is the value of s1 on
|
||||
* after iteration N.
|
||||
*
|
||||
* Therefore, for s2 and iteration N, s2_N = s2_0 + N*s1_N + N*c[0] +
|
||||
* N-1*c[1] + ... + c[N]
|
||||
*
|
||||
* In a more general way:
|
||||
*
|
||||
* s1_N = s1_0 + sum(i=1 to N)c[i]
|
||||
* s2_N = s2_0 + N*s1 + sum (i=1 to N)(N-i+1)*c[i]
|
||||
*
|
||||
* Where s1_N, s2_N are the values for s1, s2 after N iterations. So if we
|
||||
* can process N-bit at time we can do this at once.
|
||||
*
|
||||
* Since VSX can support 16-bit vector instructions, we can process
|
||||
* 16-bit at time using N = 16 we have:
|
||||
*
|
||||
* s1 = s1_16 = s1_(16-1) + c[16] = s1_0 + sum(i=1 to 16)c[i]
|
||||
* s2 = s2_16 = s2_0 + 16*s1 + sum(i=1 to 16)(16-i+1)*c[i]
|
||||
*
|
||||
* After the first iteration we calculate the adler32 checksum for 16 bytes.
|
||||
*
|
||||
* For more background about adler32 please check the RFC:
|
||||
* https://www.ietf.org/rfc/rfc1950.txt
|
||||
*/
|
||||
|
||||
#ifdef POWER8_VSX_ADLER32
|
||||
|
||||
#include <altivec.h>
|
||||
#include "zbuild.h"
|
||||
#include "zutil.h"
|
||||
#include "adler32_p.h"
|
||||
|
||||
/* Vector across sum unsigned int (saturate). */
|
||||
inline vector unsigned int vec_sumsu(vector unsigned int __a, vector unsigned int __b) {
|
||||
__b = vec_sld(__a, __a, 8);
|
||||
__b = vec_add(__b, __a);
|
||||
__a = vec_sld(__b, __b, 4);
|
||||
__a = vec_add(__a, __b);
|
||||
|
||||
return __a;
|
||||
}
|
||||
|
||||
uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, size_t len) {
|
||||
uint32_t s1 = adler & 0xffff;
|
||||
uint32_t s2 = (adler >> 16) & 0xffff;
|
||||
|
||||
/* in case user likes doing a byte at a time, keep it fast */
|
||||
if (UNLIKELY(len == 1))
|
||||
return adler32_len_1(s1, buf, s2);
|
||||
|
||||
/* If buffer is empty or len=0 we need to return adler initial value. */
|
||||
if (UNLIKELY(buf == NULL))
|
||||
return 1;
|
||||
|
||||
/* This is faster than VSX code for len < 64. */
|
||||
if (len < 64)
|
||||
return adler32_len_64(s1, buf, len, s2);
|
||||
|
||||
/* Use POWER VSX instructions for len >= 64. */
|
||||
const vector unsigned int v_zeros = { 0 };
|
||||
const vector unsigned char v_mul = {16, 15, 14, 13, 12, 11, 10, 9, 8, 7,
|
||||
6, 5, 4, 3, 2, 1};
|
||||
const vector unsigned char vsh = vec_splat_u8(4);
|
||||
const vector unsigned int vmask = {0xffffffff, 0x0, 0x0, 0x0};
|
||||
vector unsigned int vs1 = { 0 };
|
||||
vector unsigned int vs2 = { 0 };
|
||||
vector unsigned int vs1_save = { 0 };
|
||||
vector unsigned int vsum1, vsum2;
|
||||
vector unsigned char vbuf;
|
||||
int n;
|
||||
|
||||
vs1[0] = s1;
|
||||
vs2[0] = s2;
|
||||
|
||||
/* Do length bigger than NMAX in blocks of NMAX size. */
|
||||
while (len >= NMAX) {
|
||||
len -= NMAX;
|
||||
n = NMAX / 16;
|
||||
do {
|
||||
vbuf = vec_xl(0, (unsigned char *) buf);
|
||||
vsum1 = vec_sum4s(vbuf, v_zeros); /* sum(i=1 to 16) buf[i]. */
|
||||
/* sum(i=1 to 16) buf[i]*(16-i+1). */
|
||||
vsum2 = vec_msum(vbuf, v_mul, v_zeros);
|
||||
/* Save vs1. */
|
||||
vs1_save = vec_add(vs1_save, vs1);
|
||||
/* Accumulate the sums. */
|
||||
vs1 = vec_add(vsum1, vs1);
|
||||
vs2 = vec_add(vsum2, vs2);
|
||||
|
||||
buf += 16;
|
||||
} while (--n);
|
||||
/* Once each block of NMAX size. */
|
||||
vs1 = vec_sumsu(vs1, vsum1);
|
||||
vs1_save = vec_sll(vs1_save, vsh); /* 16*vs1_save. */
|
||||
vs2 = vec_add(vs1_save, vs2);
|
||||
vs2 = vec_sumsu(vs2, vsum2);
|
||||
|
||||
/* vs1[0] = (s1_i + sum(i=1 to 16)buf[i]) mod 65521. */
|
||||
vs1[0] = vs1[0] % BASE;
|
||||
/* vs2[0] = s2_i + 16*s1_save +
|
||||
sum(i=1 to 16)(16-i+1)*buf[i] mod 65521. */
|
||||
vs2[0] = vs2[0] % BASE;
|
||||
|
||||
vs1 = vec_and(vs1, vmask);
|
||||
vs2 = vec_and(vs2, vmask);
|
||||
vs1_save = v_zeros;
|
||||
}
|
||||
|
||||
/* len is less than NMAX one modulo is needed. */
|
||||
if (len >= 16) {
|
||||
while (len >= 16) {
|
||||
len -= 16;
|
||||
|
||||
vbuf = vec_xl(0, (unsigned char *) buf);
|
||||
|
||||
vsum1 = vec_sum4s(vbuf, v_zeros); /* sum(i=1 to 16) buf[i]. */
|
||||
/* sum(i=1 to 16) buf[i]*(16-i+1). */
|
||||
vsum2 = vec_msum(vbuf, v_mul, v_zeros);
|
||||
/* Save vs1. */
|
||||
vs1_save = vec_add(vs1_save, vs1);
|
||||
/* Accumulate the sums. */
|
||||
vs1 = vec_add(vsum1, vs1);
|
||||
vs2 = vec_add(vsum2, vs2);
|
||||
|
||||
buf += 16;
|
||||
}
|
||||
/* Since the size will be always less than NMAX we do this once. */
|
||||
vs1 = vec_sumsu(vs1, vsum1);
|
||||
vs1_save = vec_sll(vs1_save, vsh); /* 16*vs1_save. */
|
||||
vs2 = vec_add(vs1_save, vs2);
|
||||
vs2 = vec_sumsu(vs2, vsum2);
|
||||
}
|
||||
/* Copy result back to s1, s2 (mod 65521). */
|
||||
s1 = vs1[0] % BASE;
|
||||
s2 = vs2[0] % BASE;
|
||||
|
||||
/* Process tail (len < 16).and return */
|
||||
return adler32_len_16(s1, buf, len, s2);
|
||||
}
|
||||
|
||||
#endif /* POWER8_VSX_ADLER32 */
|
||||
19
libs/zlibng/arch/power/power.c
Normal file
19
libs/zlibng/arch/power/power.c
Normal file
@ -0,0 +1,19 @@
|
||||
/* POWER feature check
|
||||
* Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include <sys/auxv.h>
|
||||
#include "../../zutil.h"
|
||||
|
||||
Z_INTERNAL int power_cpu_has_arch_2_07;
|
||||
|
||||
void Z_INTERNAL power_check_features(void) {
|
||||
unsigned long hwcap2;
|
||||
hwcap2 = getauxval(AT_HWCAP2);
|
||||
|
||||
#ifdef POWER8
|
||||
if (hwcap2 & PPC_FEATURE2_ARCH_2_07)
|
||||
power_cpu_has_arch_2_07 = 1;
|
||||
#endif
|
||||
}
|
||||
13
libs/zlibng/arch/power/power.h
Normal file
13
libs/zlibng/arch/power/power.h
Normal file
@ -0,0 +1,13 @@
|
||||
/* power.h -- check for POWER CPU features
|
||||
* Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef POWER_H_
|
||||
#define POWER_H_
|
||||
|
||||
extern int power_cpu_has_arch_2_07;
|
||||
|
||||
void Z_INTERNAL power_check_features(void);
|
||||
|
||||
#endif /* POWER_H_ */
|
||||
60
libs/zlibng/arch/power/slide_hash_power8.c
Normal file
60
libs/zlibng/arch/power/slide_hash_power8.c
Normal file
@ -0,0 +1,60 @@
|
||||
/* Optimized slide_hash for POWER processors
|
||||
* Copyright (C) 2019-2020 IBM Corporation
|
||||
* Author: Matheus Castanho <msc@linux.ibm.com>
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifdef POWER8_VSX_SLIDEHASH
|
||||
|
||||
#include <altivec.h>
|
||||
#include "zbuild.h"
|
||||
#include "deflate.h"
|
||||
|
||||
static inline void slide_hash_power8_loop(deflate_state *s, unsigned n_elems, Pos *table_end) {
|
||||
vector unsigned short vw, vm, *vp;
|
||||
unsigned chunks;
|
||||
|
||||
/* Each vector register (chunk) corresponds to 128 bits == 8 Posf,
|
||||
* so instead of processing each of the n_elems in the hash table
|
||||
* individually, we can do it in chunks of 8 with vector instructions.
|
||||
*
|
||||
* This function is only called from slide_hash_power8(), and both calls
|
||||
* pass n_elems as a power of 2 higher than 2^7, as defined by
|
||||
* deflateInit2_(), so n_elems will always be a multiple of 8. */
|
||||
chunks = n_elems >> 3;
|
||||
Assert(n_elems % 8 == 0, "Weird hash table size!");
|
||||
|
||||
/* This type casting is safe since s->w_size is always <= 64KB
|
||||
* as defined by deflateInit2_() and Posf == unsigned short */
|
||||
vw[0] = (Pos) s->w_size;
|
||||
vw = vec_splat(vw,0);
|
||||
|
||||
vp = (vector unsigned short *) table_end;
|
||||
|
||||
do {
|
||||
/* Processing 8 elements at a time */
|
||||
vp--;
|
||||
vm = *vp;
|
||||
|
||||
/* This is equivalent to: m >= w_size ? m - w_size : 0
|
||||
* Since we are using a saturated unsigned subtraction, any
|
||||
* values that are > w_size will be set to 0, while the others
|
||||
* will be subtracted by w_size. */
|
||||
*vp = vec_subs(vm,vw);
|
||||
} while (--chunks);
|
||||
}
|
||||
|
||||
void Z_INTERNAL slide_hash_power8(deflate_state *s) {
|
||||
unsigned int n;
|
||||
Pos *p;
|
||||
|
||||
n = HASH_SIZE;
|
||||
p = &s->head[n];
|
||||
slide_hash_power8_loop(s,n,p);
|
||||
|
||||
n = s->w_size;
|
||||
p = &s->prev[n];
|
||||
slide_hash_power8_loop(s,n,p);
|
||||
}
|
||||
|
||||
#endif /* POWER8_VSX_SLIDEHASH */
|
||||
@ -1,6 +1,7 @@
|
||||
This directory contains IBM Z DEFLATE CONVERSION CALL support for
|
||||
zlib-ng. In order to enable it, the following build commands should be
|
||||
used:
|
||||
# Introduction
|
||||
|
||||
This directory contains SystemZ deflate hardware acceleration support.
|
||||
It can be enabled using the following build commands:
|
||||
|
||||
$ ./configure --with-dfltcc-deflate --with-dfltcc-inflate
|
||||
$ make
|
||||
@ -10,60 +11,206 @@ or
|
||||
$ cmake -DWITH_DFLTCC_DEFLATE=1 -DWITH_DFLTCC_INFLATE=1 .
|
||||
$ make
|
||||
|
||||
When built like this, zlib-ng would compress in hardware on level 1,
|
||||
and in software on all other levels. Decompression will always happen
|
||||
in hardware. In order to enable DFLTCC compression for levels 1-6 (i.e.
|
||||
to make it used by default) one could add -DDFLTCC_LEVEL_MASK=0x7e to
|
||||
CFLAGS when building zlib-ng.
|
||||
When built like this, zlib-ng would compress using hardware on level 1,
|
||||
and using software on all other levels. Decompression will always happen
|
||||
in hardware. In order to enable hardware compression for levels 1-6
|
||||
(i.e. to make it used by default) one could add
|
||||
`-DDFLTCC_LEVEL_MASK=0x7e` to CFLAGS when building zlib-ng.
|
||||
|
||||
Two DFLTCC compression calls produce the same results only when they
|
||||
both are made on machines of the same generation, and when the
|
||||
respective buffers have the same offset relative to the start of the
|
||||
page. Therefore care should be taken when using hardware compression
|
||||
when reproducible results are desired.
|
||||
SystemZ deflate hardware acceleration is available on [IBM z15](
|
||||
https://www.ibm.com/products/z15) and newer machines under the name [
|
||||
"Integrated Accelerator for zEnterprise Data Compression"](
|
||||
https://www.ibm.com/support/z-content-solutions/compression/). The
|
||||
programming interface to it is a machine instruction called DEFLATE
|
||||
CONVERSION CALL (DFLTCC). It is documented in Chapter 26 of [Principles
|
||||
of Operation](http://publibfp.dhe.ibm.com/epubs/pdf/a227832c.pdf). Both
|
||||
the code and the rest of this document refer to this feature simply as
|
||||
"DFLTCC".
|
||||
|
||||
# Performance
|
||||
|
||||
Performance figures are published [here](
|
||||
https://github.com/iii-i/zlib-ng/wiki/Performance-with-dfltcc-patch-applied-and-dfltcc-support-built-on-dfltcc-enabled-machine
|
||||
). The compression speed-up can be as high as 110x and the decompression
|
||||
speed-up can be as high as 15x.
|
||||
|
||||
# Limitations
|
||||
|
||||
Two DFLTCC compression calls with identical inputs are not guaranteed to
|
||||
produce identical outputs. Therefore care should be taken when using
|
||||
hardware compression when reproducible results are desired. In
|
||||
particular, zlib-ng-specific `zng_deflateSetParams` call allows setting
|
||||
`Z_DEFLATE_REPRODUCIBLE` parameter, which disables DFLTCC support for a
|
||||
particular stream.
|
||||
|
||||
DFLTCC does not support every single zlib-ng feature, in particular:
|
||||
|
||||
* inflate(Z_BLOCK) and inflate(Z_TREES)
|
||||
* inflateMark()
|
||||
* inflatePrime()
|
||||
* deflateParams() after the first deflate() call
|
||||
* `inflate(Z_BLOCK)` and `inflate(Z_TREES)`
|
||||
* `inflateMark()`
|
||||
* `inflatePrime()`
|
||||
* `inflateSyncPoint()`
|
||||
|
||||
When used, these functions will either switch to software, or, in case
|
||||
this is not possible, gracefully fail.
|
||||
|
||||
All SystemZ-specific code lives in a separate file and is integrated
|
||||
with the rest of zlib-ng using hook macros, which are explained below.
|
||||
# Code structure
|
||||
|
||||
All SystemZ-specific code lives in `arch/s390` directory and is
|
||||
integrated with the rest of zlib-ng using hook macros.
|
||||
|
||||
## Hook macros
|
||||
|
||||
DFLTCC takes as arguments a parameter block, an input buffer, an output
|
||||
buffer and a window. ZALLOC_STATE, ZFREE_STATE, ZCOPY_STATE,
|
||||
ZALLOC_WINDOW and TRY_FREE_WINDOW macros encapsulate allocation details
|
||||
for the parameter block (which is allocated alongside zlib-ng state)
|
||||
and the window (which must be page-aligned).
|
||||
buffer and a window. `ZALLOC_STATE()`, `ZFREE_STATE()`, `ZCOPY_STATE()`,
|
||||
`ZALLOC_WINDOW()` and `TRY_FREE_WINDOW()` macros encapsulate allocation
|
||||
details for the parameter block (which is allocated alongside zlib-ng
|
||||
state) and the window (which must be page-aligned).
|
||||
|
||||
While for inflate software and hardware window formats match, this is
|
||||
not the case for deflate. Therefore, deflateSetDictionary and
|
||||
deflateGetDictionary need special handling, which is triggered using
|
||||
the DEFLATE_SET_DICTIONARY_HOOK and DEFLATE_GET_DICTIONARY_HOOK macros.
|
||||
While inflate software and hardware window formats match, this is not
|
||||
the case for deflate. Therefore, `deflateSetDictionary()` and
|
||||
`deflateGetDictionary()` need special handling, which is triggered using
|
||||
`DEFLATE_SET_DICTIONARY_HOOK()` and `DEFLATE_GET_DICTIONARY_HOOK()`
|
||||
macros.
|
||||
|
||||
deflateResetKeep() and inflateResetKeep() update the DFLTCC parameter
|
||||
block using DEFLATE_RESET_KEEP_HOOK and INFLATE_RESET_KEEP_HOOK macros.
|
||||
`deflateResetKeep()` and `inflateResetKeep()` update the DFLTCC
|
||||
parameter block using `DEFLATE_RESET_KEEP_HOOK()` and
|
||||
`INFLATE_RESET_KEEP_HOOK()` macros.
|
||||
|
||||
DEFLATE_PARAMS_HOOK, INFLATE_PRIME_HOOK and INFLATE_MARK_HOOK macros
|
||||
make the unsupported deflateParams(), inflatePrime() and inflateMark()
|
||||
calls fail gracefully.
|
||||
`INFLATE_PRIME_HOOK()`, `INFLATE_MARK_HOOK()` and
|
||||
`INFLATE_SYNC_POINT_HOOK()` macros make the respective unsupported
|
||||
calls gracefully fail.
|
||||
|
||||
`DEFLATE_PARAMS_HOOK()` implements switching between hardware and
|
||||
software compression mid-stream using `deflateParams()`. Switching
|
||||
normally entails flushing the current block, which might not be possible
|
||||
in low memory situations. `deflateParams()` uses `DEFLATE_DONE()` hook
|
||||
in order to detect and gracefully handle such situations.
|
||||
|
||||
The algorithm implemented in hardware has different compression ratio
|
||||
than the one implemented in software. DEFLATE_BOUND_ADJUST_COMPLEN and
|
||||
DEFLATE_NEED_CONSERVATIVE_BOUND macros make deflateBound() return the
|
||||
correct results for the hardware implementation.
|
||||
than the one implemented in software. `DEFLATE_BOUND_ADJUST_COMPLEN()`
|
||||
and `DEFLATE_NEED_CONSERVATIVE_BOUND()` macros make `deflateBound()`
|
||||
return the correct results for the hardware implementation.
|
||||
|
||||
Actual compression and decompression are handled by DEFLATE_HOOK and
|
||||
INFLATE_TYPEDO_HOOK macros. Since inflation with DFLTCC manages the
|
||||
window on its own, calling updatewindow() is suppressed using
|
||||
INFLATE_NEED_UPDATEWINDOW() macro.
|
||||
Actual compression and decompression are handled by `DEFLATE_HOOK()` and
|
||||
`INFLATE_TYPEDO_HOOK()` macros. Since inflation with DFLTCC manages the
|
||||
window on its own, calling `updatewindow()` is suppressed using
|
||||
`INFLATE_NEED_UPDATEWINDOW()` macro.
|
||||
|
||||
In addition to compression, DFLTCC computes CRC-32 and Adler-32
|
||||
checksums, therefore, whenever it's used, software checksumming is
|
||||
suppressed using DEFLATE_NEED_CHECKSUM and INFLATE_NEED_CHECKSUM
|
||||
suppressed using `DEFLATE_NEED_CHECKSUM()` and `INFLATE_NEED_CHECKSUM()`
|
||||
macros.
|
||||
|
||||
While software always produces reproducible compression results, this
|
||||
is not the case for DFLTCC. Therefore, zlib-ng users are given the
|
||||
ability to specify whether or not reproducible compression results
|
||||
are required. While it is always possible to specify this setting
|
||||
before the compression begins, it is not always possible to do so in
|
||||
the middle of a deflate stream - the exact conditions for that are
|
||||
determined by `DEFLATE_CAN_SET_REPRODUCIBLE()` macro.
|
||||
|
||||
## SystemZ-specific code
|
||||
|
||||
When zlib-ng is built with DFLTCC, the hooks described above are
|
||||
converted to calls to functions, which are implemented in
|
||||
`arch/s390/dfltcc_*` files. The functions can be grouped in three broad
|
||||
categories:
|
||||
|
||||
* Base DFLTCC support, e.g. wrapping the machine instruction -
|
||||
`dfltcc()` and allocating aligned memory - `dfltcc_alloc_state()`.
|
||||
* Translating between software and hardware data formats, e.g.
|
||||
`dfltcc_deflate_set_dictionary()`.
|
||||
* Translating between software and hardware state machines, e.g.
|
||||
`dfltcc_deflate()` and `dfltcc_inflate()`.
|
||||
|
||||
The functions from the first two categories are fairly simple, however,
|
||||
various quirks in both software and hardware state machines make the
|
||||
functions from the third category quite complicated.
|
||||
|
||||
### `dfltcc_deflate()` function
|
||||
|
||||
This function is called by `deflate()` and has the following
|
||||
responsibilities:
|
||||
|
||||
* Checking whether DFLTCC can be used with the current stream. If this
|
||||
is not the case, then it returns `0`, making `deflate()` use some
|
||||
other function in order to compress in software. Otherwise it returns
|
||||
`1`.
|
||||
* Block management and Huffman table generation. DFLTCC ends blocks only
|
||||
when explicitly instructed to do so by the software. Furthermore,
|
||||
whether to use fixed or dynamic Huffman tables must also be determined
|
||||
by the software. Since looking at data in order to gather statistics
|
||||
would negate performance benefits, the following approach is used: the
|
||||
first `DFLTCC_FIRST_FHT_BLOCK_SIZE` bytes are placed into a fixed
|
||||
block, and every next `DFLTCC_BLOCK_SIZE` bytes are placed into
|
||||
dynamic blocks.
|
||||
* Writing EOBS. Block Closing Control bit in the parameter block
|
||||
instructs DFLTCC to write EOBS, however, certain conditions need to be
|
||||
met: input data length must be non-zero or Continuation Flag must be
|
||||
set. To put this in simpler terms, DFLTCC will silently refuse to
|
||||
write EOBS if this is the only thing that it is asked to do. Since the
|
||||
code has to be able to emit EOBS in software anyway, in order to avoid
|
||||
tricky corner cases Block Closing Control is never used. Whether to
|
||||
write EOBS is instead controlled by `soft_bcc` variable.
|
||||
* Triggering block post-processing. Depending on flush mode, `deflate()`
|
||||
must perform various additional actions when a block or a stream ends.
|
||||
`dfltcc_deflate()` informs `deflate()` about this using
|
||||
`block_state *result` parameter.
|
||||
* Converting software state fields into hardware parameter block fields,
|
||||
and vice versa. For example, `wrap` and Check Value Type or `bi_valid`
|
||||
and Sub-Byte Boundary. Certain fields cannot be translated and must
|
||||
persist untouched in the parameter block between calls, for example,
|
||||
Continuation Flag or Continuation State Buffer.
|
||||
* Handling flush modes and low-memory situations. These aspects are
|
||||
quite intertwined and pervasive. The general idea here is that the
|
||||
code must not do anything in software - whether explicitly by e.g.
|
||||
calling `send_eobs()`, or implicitly - by returning to `deflate()`
|
||||
with certain return and `*result` values, when Continuation Flag is
|
||||
set.
|
||||
* Ending streams. When a new block is started and flush mode is
|
||||
`Z_FINISH`, Block Header Final parameter block bit is used to mark
|
||||
this block as final. However, sometimes an empty final block is
|
||||
needed, and, unfortunately, just like with EOBS, DFLTCC will silently
|
||||
refuse to do this. The general idea of DFLTCC implementation is to
|
||||
rely as much as possible on the existing code. Here in order to do
|
||||
this, the code pretends that it does not support DFLTCC, which makes
|
||||
`deflate()` call a software compression function, which writes an
|
||||
empty final block. Whether this is required is controlled by
|
||||
`need_empty_block` variable.
|
||||
* Error handling. This is simply converting
|
||||
Operation-Ending-Supplemental Code to string. Errors can only happen
|
||||
due to things like memory corruption, and therefore they don't affect
|
||||
the `deflate()` return code.
|
||||
|
||||
### `dfltcc_inflate()` function
|
||||
|
||||
This function is called by `inflate()` from the `TYPEDO` state (that is,
|
||||
when all the metadata is parsed and the stream is positioned at the type
|
||||
bits of deflate block header) and it's responsible for the following:
|
||||
|
||||
* Falling back to software when flush mode is `Z_BLOCK` or `Z_TREES`.
|
||||
Unfortunately, there is no way to ask DFLTCC to stop decompressing on
|
||||
block or tree boundary.
|
||||
* `inflate()` decompression loop management. This is controlled using
|
||||
the return value, which can be either `DFLTCC_INFLATE_BREAK` or
|
||||
`DFLTCC_INFLATE_CONTINUE`.
|
||||
* Converting software state fields into hardware parameter block fields,
|
||||
and vice versa. For example, `whave` and History Length or `wnext` and
|
||||
History Offset.
|
||||
* Ending streams. This instructs `inflate()` to return `Z_STREAM_END`
|
||||
and is controlled by `last` state field.
|
||||
* Error handling. Like deflate, error handling comprises
|
||||
Operation-Ending-Supplemental Code to string conversion. Unlike
|
||||
deflate, errors may happen due to bad inputs, therefore they are
|
||||
propagated to `inflate()` by setting `mode` field to `MEM` or `BAD`.
|
||||
|
||||
# Testing
|
||||
|
||||
Given complexity of DFLTCC machine instruction, it is not clear whether
|
||||
QEMU TCG will ever support it. At the time of writing, one has to have
|
||||
access to an IBM z15+ VM or LPAR in order to test DFLTCC support. Since
|
||||
DFLTCC is a non-privileged instruction, neither special VM/LPAR
|
||||
configuration nor root are required.
|
||||
|
||||
Still, zlib-ng CI has a few QEMU TCG-based configurations that check
|
||||
whether fallback to software is working.
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
/* dfltcc_deflate.c - IBM Z DEFLATE CONVERSION CALL general support. */
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "../../zbuild.h"
|
||||
#include "dfltcc_common.h"
|
||||
#include "dfltcc_detail.h"
|
||||
|
||||
@ -12,20 +12,31 @@
|
||||
`posix_memalign' is not an option. Thus, we overallocate and take the
|
||||
aligned portion of the buffer.
|
||||
*/
|
||||
static inline int is_dfltcc_enabled(void)
|
||||
{
|
||||
static inline int is_dfltcc_enabled(void) {
|
||||
uint64_t facilities[(DFLTCC_FACILITY / 64) + 1];
|
||||
register uint8_t r0 __asm__("r0");
|
||||
Z_REGISTER uint8_t r0 __asm__("r0");
|
||||
|
||||
memset(facilities, 0, sizeof(facilities));
|
||||
r0 = sizeof(facilities) / sizeof(facilities[0]) - 1;
|
||||
__asm__ volatile("stfle %[facilities]\n" : [facilities] "=Q" (facilities), [r0] "+r" (r0) :: "cc");
|
||||
/* STFLE is supported since z9-109 and only in z/Architecture mode. When
|
||||
* compiling with -m31, gcc defaults to ESA mode, however, since the kernel
|
||||
* is 64-bit, it's always z/Architecture mode at runtime.
|
||||
*/
|
||||
__asm__ volatile(
|
||||
#ifndef __clang__
|
||||
".machinemode push\n"
|
||||
".machinemode zarch\n"
|
||||
#endif
|
||||
"stfle %[facilities]\n"
|
||||
#ifndef __clang__
|
||||
".machinemode pop\n"
|
||||
#endif
|
||||
: [facilities] "=Q" (facilities), [r0] "+r" (r0) :: "cc");
|
||||
return is_bit_set((const char *)facilities, DFLTCC_FACILITY);
|
||||
}
|
||||
|
||||
void ZLIB_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size)
|
||||
{
|
||||
struct dfltcc_state *dfltcc_state = (struct dfltcc_state *)((char *)strm->state + size);
|
||||
void Z_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size) {
|
||||
struct dfltcc_state *dfltcc_state = (struct dfltcc_state *)((char *)strm->state + ALIGN_UP(size, 8));
|
||||
struct dfltcc_qaf_param *param = (struct dfltcc_qaf_param *)&dfltcc_state->param;
|
||||
|
||||
/* Initialize available functions */
|
||||
@ -47,24 +58,17 @@ void ZLIB_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size)
|
||||
dfltcc_state->param.ribm = DFLTCC_RIBM;
|
||||
}
|
||||
|
||||
void ZLIB_INTERNAL *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt items, uInt size)
|
||||
{
|
||||
Assert((items * size) % 8 == 0,
|
||||
"The size of zlib-ng state must be a multiple of 8");
|
||||
return ZALLOC(strm, items * size + sizeof(struct dfltcc_state), sizeof(unsigned char));
|
||||
void Z_INTERNAL *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt items, uInt size) {
|
||||
return ZALLOC(strm, ALIGN_UP(items * size, 8) + sizeof(struct dfltcc_state), sizeof(unsigned char));
|
||||
}
|
||||
|
||||
void ZLIB_INTERNAL dfltcc_copy_state(void *dst, const void *src, uInt size)
|
||||
{
|
||||
memcpy(dst, src, size + sizeof(struct dfltcc_state));
|
||||
void Z_INTERNAL dfltcc_copy_state(void *dst, const void *src, uInt size) {
|
||||
memcpy(dst, src, ALIGN_UP(size, 8) + sizeof(struct dfltcc_state));
|
||||
}
|
||||
|
||||
static const int PAGE_ALIGN = 0x1000;
|
||||
|
||||
#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1))
|
||||
|
||||
void ZLIB_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt size)
|
||||
{
|
||||
void Z_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt size) {
|
||||
void *p;
|
||||
void *w;
|
||||
|
||||
@ -79,8 +83,7 @@ void ZLIB_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt
|
||||
return w;
|
||||
}
|
||||
|
||||
void ZLIB_INTERNAL dfltcc_free_window(PREFIX3(streamp) strm, void *w)
|
||||
{
|
||||
void Z_INTERNAL dfltcc_free_window(PREFIX3(streamp) strm, void *w) {
|
||||
if (w)
|
||||
ZFREE(strm, *(void **)((unsigned char *)w - sizeof(void *)));
|
||||
}
|
||||
|
||||
@ -2,17 +2,17 @@
|
||||
#define DFLTCC_COMMON_H
|
||||
|
||||
#ifdef ZLIB_COMPAT
|
||||
#include "zlib.h"
|
||||
#include "../../zlib.h"
|
||||
#else
|
||||
#include "zlib-ng.h"
|
||||
#include "../../zlib-ng.h"
|
||||
#endif
|
||||
#include "zutil.h"
|
||||
#include "../../zutil.h"
|
||||
|
||||
void ZLIB_INTERNAL *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt items, uInt size);
|
||||
void ZLIB_INTERNAL dfltcc_copy_state(void *dst, const void *src, uInt size);
|
||||
void ZLIB_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size);
|
||||
void ZLIB_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt size);
|
||||
void ZLIB_INTERNAL dfltcc_free_window(PREFIX3(streamp) strm, void *w);
|
||||
void Z_INTERNAL *dfltcc_alloc_state(PREFIX3(streamp) strm, uInt items, uInt size);
|
||||
void Z_INTERNAL dfltcc_copy_state(void *dst, const void *src, uInt size);
|
||||
void Z_INTERNAL dfltcc_reset(PREFIX3(streamp) strm, uInt size);
|
||||
void Z_INTERNAL *dfltcc_alloc_window(PREFIX3(streamp) strm, uInt items, uInt size);
|
||||
void Z_INTERNAL dfltcc_free_window(PREFIX3(streamp) strm, void *w);
|
||||
|
||||
#define ZALLOC_STATE dfltcc_alloc_state
|
||||
|
||||
|
||||
@ -13,27 +13,26 @@
|
||||
$ make
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zutil.h"
|
||||
#include "deflate.h"
|
||||
#include "../../zbuild.h"
|
||||
#include "../../zutil.h"
|
||||
#include "../../deflate.h"
|
||||
#include "../../trees_emit.h"
|
||||
#include "dfltcc_deflate.h"
|
||||
#include "dfltcc_detail.h"
|
||||
|
||||
static inline int dfltcc_are_params_ok(int level, uInt window_bits, int strategy, uint16_t level_mask)
|
||||
{
|
||||
return (level_mask & ((uint16_t)1 << level)) != 0 &&
|
||||
(window_bits == HB_BITS) &&
|
||||
(strategy == Z_FIXED || strategy == Z_DEFAULT_STRATEGY);
|
||||
}
|
||||
|
||||
|
||||
int ZLIB_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm)
|
||||
{
|
||||
static inline int dfltcc_can_deflate_with_params(PREFIX3(streamp) strm, int level, uInt window_bits, int strategy,
|
||||
int reproducible) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
|
||||
|
||||
/* Unsupported compression settings */
|
||||
if (!dfltcc_are_params_ok(state->level, state->w_bits, state->strategy, dfltcc_state->level_mask))
|
||||
if ((dfltcc_state->level_mask & (1 << level)) == 0)
|
||||
return 0;
|
||||
if (window_bits != HB_BITS)
|
||||
return 0;
|
||||
if (strategy != Z_FIXED && strategy != Z_DEFAULT_STRATEGY)
|
||||
return 0;
|
||||
if (reproducible)
|
||||
return 0;
|
||||
|
||||
/* Unsupported hardware */
|
||||
@ -45,8 +44,13 @@ int ZLIB_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline void dfltcc_gdht(PREFIX3(streamp) strm)
|
||||
{
|
||||
int Z_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
|
||||
return dfltcc_can_deflate_with_params(strm, state->level, state->w_bits, state->strategy, state->reproducible);
|
||||
}
|
||||
|
||||
static inline void dfltcc_gdht(PREFIX3(streamp) strm) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
|
||||
size_t avail_in = strm->avail_in;
|
||||
@ -54,8 +58,7 @@ static inline void dfltcc_gdht(PREFIX3(streamp) strm)
|
||||
dfltcc(DFLTCC_GDHT, param, NULL, NULL, &strm->next_in, &avail_in, NULL);
|
||||
}
|
||||
|
||||
static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm)
|
||||
{
|
||||
static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
|
||||
size_t avail_in = strm->avail_in;
|
||||
@ -72,11 +75,10 @@ static inline dfltcc_cc dfltcc_cmpr(PREFIX3(streamp) strm)
|
||||
return cc;
|
||||
}
|
||||
|
||||
static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0 *param)
|
||||
{
|
||||
static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0 *param) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
|
||||
send_bits(state, bi_reverse(param->eobs >> (15 - param->eobl), param->eobl), param->eobl);
|
||||
send_bits(state, bi_reverse(param->eobs >> (15 - param->eobl), param->eobl), param->eobl, state->bi_buf, state->bi_valid);
|
||||
flush_pending(strm);
|
||||
if (state->pending != 0) {
|
||||
/* The remaining data is located in pending_out[0:pending]. If someone
|
||||
@ -93,8 +95,7 @@ static inline void send_eobs(PREFIX3(streamp) strm, const struct dfltcc_param_v0
|
||||
#endif
|
||||
}
|
||||
|
||||
int ZLIB_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *result)
|
||||
{
|
||||
int Z_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *result) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
|
||||
struct dfltcc_param_v0 *param = &dfltcc_state->param;
|
||||
@ -104,31 +105,38 @@ int ZLIB_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *
|
||||
int soft_bcc;
|
||||
int no_flush;
|
||||
|
||||
if (!dfltcc_can_deflate(strm))
|
||||
if (!dfltcc_can_deflate(strm)) {
|
||||
/* Clear history. */
|
||||
if (flush == Z_FULL_FLUSH)
|
||||
param->hl = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
again:
|
||||
masked_avail_in = 0;
|
||||
soft_bcc = 0;
|
||||
no_flush = flush == Z_NO_FLUSH;
|
||||
|
||||
/* Trailing empty block. Switch to software, except when Continuation Flag
|
||||
* is set, which means that DFLTCC has buffered some output in the
|
||||
* parameter block and needs to be called again in order to flush it.
|
||||
/* No input data. Return, except when Continuation Flag is set, which means
|
||||
* that DFLTCC has buffered some output in the parameter block and needs to
|
||||
* be called again in order to flush it.
|
||||
*/
|
||||
if (flush == Z_FINISH && strm->avail_in == 0 && !param->cf) {
|
||||
if (param->bcf) {
|
||||
/* A block is still open, and the hardware does not support closing
|
||||
* blocks without adding data. Thus, close it manually.
|
||||
*/
|
||||
if (strm->avail_in == 0 && !param->cf) {
|
||||
/* A block is still open, and the hardware does not support closing
|
||||
* blocks without adding data. Thus, close it manually.
|
||||
*/
|
||||
if (!no_flush && param->bcf) {
|
||||
send_eobs(strm, param);
|
||||
param->bcf = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (strm->avail_in == 0 && !param->cf) {
|
||||
*result = need_more;
|
||||
/* Let one of deflate_* functions write a trailing empty block. */
|
||||
if (flush == Z_FINISH)
|
||||
return 0;
|
||||
/* Clear history. */
|
||||
if (flush == Z_FULL_FLUSH)
|
||||
param->hl = 0;
|
||||
/* Trigger block post-processing if necessary. */
|
||||
*result = no_flush ? need_more : block_done;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -154,13 +162,18 @@ again:
|
||||
send_eobs(strm, param);
|
||||
param->bcf = 0;
|
||||
dfltcc_state->block_threshold = strm->total_in + dfltcc_state->block_size;
|
||||
if (strm->avail_out == 0) {
|
||||
*result = need_more;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* No space for compressed data. If we proceed, dfltcc_cmpr() will return
|
||||
* DFLTCC_CC_OP1_TOO_SHORT without buffering header bits, but we will still
|
||||
* set BCF=1, which is wrong. Avoid complications and return early.
|
||||
*/
|
||||
if (strm->avail_out == 0) {
|
||||
*result = need_more;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* The caller gave us too much data. Pass only one block worth of
|
||||
* uncompressed data to DFLTCC and mask the rest, so that on the next
|
||||
* iteration we start a new block.
|
||||
@ -180,7 +193,7 @@ again:
|
||||
param->cvt = state->wrap == 2 ? CVT_CRC32 : CVT_ADLER32;
|
||||
if (!no_flush)
|
||||
/* We need to close a block. Always do this in software - when there is
|
||||
* no input data, the hardware will not nohor BCC. */
|
||||
* no input data, the hardware will not honor BCC. */
|
||||
soft_bcc = 1;
|
||||
if (flush == Z_FINISH && !param->bcf)
|
||||
/* We are about to open a BFINAL block, set Block Header Final bit
|
||||
@ -195,8 +208,8 @@ again:
|
||||
param->sbb = (unsigned int)state->bi_valid;
|
||||
if (param->sbb > 0)
|
||||
*strm->next_out = (unsigned char)state->bi_buf;
|
||||
if (param->hl)
|
||||
param->nt = 0; /* Honor history */
|
||||
/* Honor history and check value */
|
||||
param->nt = 0;
|
||||
param->cv = state->wrap == 2 ? ZSWAP32(strm->adler) : strm->adler;
|
||||
|
||||
/* When opening a block, choose a Huffman-Table Type */
|
||||
@ -277,31 +290,60 @@ again:
|
||||
fly with deflateParams, we need to convert between hardware and software
|
||||
window formats.
|
||||
*/
|
||||
int ZLIB_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strategy)
|
||||
{
|
||||
static int dfltcc_was_deflate_used(PREFIX3(streamp) strm) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
|
||||
|
||||
return strm->total_in > 0 || param->nt == 0 || param->hl > 0;
|
||||
}
|
||||
|
||||
int Z_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strategy, int *flush) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
|
||||
struct dfltcc_param_v0 *param = &dfltcc_state->param;
|
||||
int could_deflate = dfltcc_can_deflate(strm);
|
||||
int can_deflate = dfltcc_are_params_ok(level, state->w_bits, strategy, dfltcc_state->level_mask);
|
||||
int can_deflate = dfltcc_can_deflate_with_params(strm, level, state->w_bits, strategy, state->reproducible);
|
||||
|
||||
if (can_deflate == could_deflate)
|
||||
/* We continue to work in the same mode - no changes needed */
|
||||
return Z_OK;
|
||||
|
||||
if (strm->total_in == 0 && param->nt == 1 && param->hl == 0)
|
||||
if (!dfltcc_was_deflate_used(strm))
|
||||
/* DFLTCC was not used yet - no changes needed */
|
||||
return Z_OK;
|
||||
|
||||
/* Switching between hardware and software is not implemented */
|
||||
return Z_STREAM_ERROR;
|
||||
/* For now, do not convert between window formats - simply get rid of the old data instead */
|
||||
*flush = Z_FULL_FLUSH;
|
||||
return Z_OK;
|
||||
}
|
||||
|
||||
int Z_INTERNAL dfltcc_deflate_done(PREFIX3(streamp) strm, int flush) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
|
||||
struct dfltcc_param_v0 *param = &dfltcc_state->param;
|
||||
|
||||
/* When deflate(Z_FULL_FLUSH) is called with small avail_out, it might
|
||||
* close the block without resetting the compression state. Detect this
|
||||
* situation and return that deflation is not done.
|
||||
*/
|
||||
if (flush == Z_FULL_FLUSH && strm->avail_out == 0)
|
||||
return 0;
|
||||
|
||||
/* Return that deflation is not done if DFLTCC is used and either it
|
||||
* buffered some data (Continuation Flag is set), or has not written EOBS
|
||||
* yet (Block-Continuation Flag is set).
|
||||
*/
|
||||
return !dfltcc_can_deflate(strm) || (!param->cf && !param->bcf);
|
||||
}
|
||||
|
||||
int Z_INTERNAL dfltcc_can_set_reproducible(PREFIX3(streamp) strm, int reproducible) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
|
||||
return reproducible != state->reproducible && !dfltcc_was_deflate_used(strm);
|
||||
}
|
||||
|
||||
/*
|
||||
Preloading history.
|
||||
*/
|
||||
static void append_history(struct dfltcc_param_v0 *param, unsigned char *history, const unsigned char *buf, uInt count)
|
||||
{
|
||||
static void append_history(struct dfltcc_param_v0 *param, unsigned char *history, const unsigned char *buf, uInt count) {
|
||||
size_t offset;
|
||||
size_t n;
|
||||
|
||||
@ -331,20 +373,19 @@ static void append_history(struct dfltcc_param_v0 *param, unsigned char *history
|
||||
}
|
||||
}
|
||||
|
||||
int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm,
|
||||
const unsigned char *dictionary, uInt dict_length)
|
||||
{
|
||||
int Z_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm,
|
||||
const unsigned char *dictionary, uInt dict_length) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
|
||||
struct dfltcc_param_v0 *param = &dfltcc_state->param;
|
||||
|
||||
append_history(param, state->window, dictionary, dict_length);
|
||||
state->strstart = 1; /* Add FDICT to zlib header */
|
||||
state->block_start = state->strstart; /* Make deflate_stored happy */
|
||||
return Z_OK;
|
||||
}
|
||||
|
||||
int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length)
|
||||
{
|
||||
int Z_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt *dict_length) {
|
||||
deflate_state *state = (deflate_state *)strm->state;
|
||||
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
|
||||
struct dfltcc_param_v0 *param = &dfltcc_state->param;
|
||||
|
||||
@ -3,12 +3,14 @@
|
||||
|
||||
#include "dfltcc_common.h"
|
||||
|
||||
int ZLIB_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm);
|
||||
int ZLIB_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *result);
|
||||
int ZLIB_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strategy);
|
||||
int ZLIB_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm,
|
||||
int Z_INTERNAL dfltcc_can_deflate(PREFIX3(streamp) strm);
|
||||
int Z_INTERNAL dfltcc_deflate(PREFIX3(streamp) strm, int flush, block_state *result);
|
||||
int Z_INTERNAL dfltcc_deflate_params(PREFIX3(streamp) strm, int level, int strategy, int *flush);
|
||||
int Z_INTERNAL dfltcc_deflate_done(PREFIX3(streamp) strm, int flush);
|
||||
int Z_INTERNAL dfltcc_can_set_reproducible(PREFIX3(streamp) strm, int reproducible);
|
||||
int Z_INTERNAL dfltcc_deflate_set_dictionary(PREFIX3(streamp) strm,
|
||||
const unsigned char *dictionary, uInt dict_length);
|
||||
int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt* dict_length);
|
||||
int Z_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned char *dictionary, uInt* dict_length);
|
||||
|
||||
#define DEFLATE_SET_DICTIONARY_HOOK(strm, dict, dict_len) \
|
||||
do { \
|
||||
@ -25,15 +27,17 @@ int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned
|
||||
#define DEFLATE_RESET_KEEP_HOOK(strm) \
|
||||
dfltcc_reset((strm), sizeof(deflate_state))
|
||||
|
||||
#define DEFLATE_PARAMS_HOOK(strm, level, strategy) \
|
||||
#define DEFLATE_PARAMS_HOOK(strm, level, strategy, hook_flush) \
|
||||
do { \
|
||||
int err; \
|
||||
\
|
||||
err = dfltcc_deflate_params((strm), (level), (strategy)); \
|
||||
err = dfltcc_deflate_params((strm), (level), (strategy), (hook_flush)); \
|
||||
if (err == Z_STREAM_ERROR) \
|
||||
return err; \
|
||||
} while (0)
|
||||
|
||||
#define DEFLATE_DONE dfltcc_deflate_done
|
||||
|
||||
#define DEFLATE_BOUND_ADJUST_COMPLEN(strm, complen, source_len) \
|
||||
do { \
|
||||
if (dfltcc_can_deflate((strm))) \
|
||||
@ -47,4 +51,6 @@ int ZLIB_INTERNAL dfltcc_deflate_get_dictionary(PREFIX3(streamp) strm, unsigned
|
||||
|
||||
#define DEFLATE_NEED_CHECKSUM(strm) (!dfltcc_can_deflate((strm)))
|
||||
|
||||
#define DEFLATE_CAN_SET_REPRODUCIBLE dfltcc_can_set_reproducible
|
||||
|
||||
#endif
|
||||
|
||||
@ -46,18 +46,17 @@ typedef enum {
|
||||
#define DFLTCC_FACILITY 151
|
||||
|
||||
static inline dfltcc_cc dfltcc(int fn, void *param,
|
||||
unsigned char **op1, size_t *len1, const unsigned char **op2, size_t *len2, void *hist)
|
||||
{
|
||||
unsigned char **op1, size_t *len1, z_const unsigned char **op2, size_t *len2, void *hist) {
|
||||
unsigned char *t2 = op1 ? *op1 : NULL;
|
||||
size_t t3 = len1 ? *len1 : 0;
|
||||
const unsigned char *t4 = op2 ? *op2 : NULL;
|
||||
z_const unsigned char *t4 = op2 ? *op2 : NULL;
|
||||
size_t t5 = len2 ? *len2 : 0;
|
||||
register int r0 __asm__("r0") = fn;
|
||||
register void *r1 __asm__("r1") = param;
|
||||
register unsigned char *r2 __asm__("r2") = t2;
|
||||
register size_t r3 __asm__("r3") = t3;
|
||||
register const unsigned char *r4 __asm__("r4") = t4;
|
||||
register size_t r5 __asm__("r5") = t5;
|
||||
Z_REGISTER int r0 __asm__("r0") = fn;
|
||||
Z_REGISTER void *r1 __asm__("r1") = param;
|
||||
Z_REGISTER unsigned char *r2 __asm__("r2") = t2;
|
||||
Z_REGISTER size_t r3 __asm__("r3") = t3;
|
||||
Z_REGISTER z_const unsigned char *r4 __asm__("r4") = t4;
|
||||
Z_REGISTER size_t r5 __asm__("r5") = t5;
|
||||
int cc;
|
||||
|
||||
__asm__ volatile(
|
||||
@ -108,13 +107,11 @@ struct dfltcc_qaf_param {
|
||||
|
||||
static_assert(sizeof(struct dfltcc_qaf_param) == 32, sizeof_struct_dfltcc_qaf_param_is_32);
|
||||
|
||||
static inline int is_bit_set(const char *bits, int n)
|
||||
{
|
||||
static inline int is_bit_set(const char *bits, int n) {
|
||||
return bits[n / 8] & (1 << (7 - (n % 8)));
|
||||
}
|
||||
|
||||
static inline void clear_bit(char *bits, int n)
|
||||
{
|
||||
static inline void clear_bit(char *bits, int n) {
|
||||
bits[n / 8] &= ~(1 << (7 - (n % 8)));
|
||||
}
|
||||
|
||||
@ -175,8 +172,7 @@ struct dfltcc_param_v0 {
|
||||
|
||||
static_assert(sizeof(struct dfltcc_param_v0) == 1536, sizeof_struct_dfltcc_param_v0_is_1536);
|
||||
|
||||
static inline const char *oesc_msg(char *buf, int oesc)
|
||||
{
|
||||
static inline z_const char *oesc_msg(char *buf, int oesc) {
|
||||
if (oesc == 0x00)
|
||||
return NULL; /* Successful completion */
|
||||
else {
|
||||
@ -198,4 +194,6 @@ struct dfltcc_state {
|
||||
char msg[64]; /* Buffer for strm->msg */
|
||||
};
|
||||
|
||||
#define GET_DFLTCC_STATE(state) ((struct dfltcc_state *)((state) + 1))
|
||||
#define ALIGN_UP(p, size) (__typeof__(p))(((uintptr_t)(p) + ((size) - 1)) & ~((size) - 1))
|
||||
|
||||
#define GET_DFLTCC_STATE(state) ((struct dfltcc_state *)((char *)(state) + ALIGN_UP(sizeof(*state), 8)))
|
||||
|
||||
@ -13,15 +13,14 @@
|
||||
$ make
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zutil.h"
|
||||
#include "inftrees.h"
|
||||
#include "inflate.h"
|
||||
#include "../../zbuild.h"
|
||||
#include "../../zutil.h"
|
||||
#include "../../inftrees.h"
|
||||
#include "../../inflate.h"
|
||||
#include "dfltcc_inflate.h"
|
||||
#include "dfltcc_detail.h"
|
||||
|
||||
int ZLIB_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm)
|
||||
{
|
||||
int Z_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm) {
|
||||
struct inflate_state *state = (struct inflate_state *)strm->state;
|
||||
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
|
||||
|
||||
@ -33,8 +32,7 @@ int ZLIB_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm)
|
||||
return is_bit_set(dfltcc_state->af.fns, DFLTCC_XPND) && is_bit_set(dfltcc_state->af.fmts, DFLTCC_FMT0);
|
||||
}
|
||||
|
||||
static inline dfltcc_cc dfltcc_xpnd(PREFIX3(streamp) strm)
|
||||
{
|
||||
static inline dfltcc_cc dfltcc_xpnd(PREFIX3(streamp) strm) {
|
||||
struct inflate_state *state = (struct inflate_state *)strm->state;
|
||||
struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
|
||||
size_t avail_in = strm->avail_in;
|
||||
@ -49,8 +47,7 @@ static inline dfltcc_cc dfltcc_xpnd(PREFIX3(streamp) strm)
|
||||
return cc;
|
||||
}
|
||||
|
||||
dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret)
|
||||
{
|
||||
dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret) {
|
||||
struct inflate_state *state = (struct inflate_state *)strm->state;
|
||||
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
|
||||
struct dfltcc_param_v0 *param = &dfltcc_state->param;
|
||||
@ -115,16 +112,14 @@ dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int fl
|
||||
DFLTCC_INFLATE_BREAK : DFLTCC_INFLATE_CONTINUE;
|
||||
}
|
||||
|
||||
int ZLIB_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm)
|
||||
{
|
||||
int Z_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm) {
|
||||
struct inflate_state *state = (struct inflate_state *)strm->state;
|
||||
struct dfltcc_param_v0 *param = &GET_DFLTCC_STATE(state)->param;
|
||||
|
||||
return !param->nt;
|
||||
}
|
||||
|
||||
int ZLIB_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm)
|
||||
{
|
||||
int Z_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm) {
|
||||
struct inflate_state *state = (struct inflate_state *)strm->state;
|
||||
struct dfltcc_state *dfltcc_state = GET_DFLTCC_STATE(state);
|
||||
|
||||
|
||||
@ -3,15 +3,15 @@
|
||||
|
||||
#include "dfltcc_common.h"
|
||||
|
||||
int ZLIB_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm);
|
||||
int Z_INTERNAL dfltcc_can_inflate(PREFIX3(streamp) strm);
|
||||
typedef enum {
|
||||
DFLTCC_INFLATE_CONTINUE,
|
||||
DFLTCC_INFLATE_BREAK,
|
||||
DFLTCC_INFLATE_SOFTWARE,
|
||||
} dfltcc_inflate_action;
|
||||
dfltcc_inflate_action ZLIB_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret);
|
||||
int ZLIB_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm);
|
||||
int ZLIB_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm);
|
||||
dfltcc_inflate_action Z_INTERNAL dfltcc_inflate(PREFIX3(streamp) strm, int flush, int *ret);
|
||||
int Z_INTERNAL dfltcc_was_inflate_used(PREFIX3(streamp) strm);
|
||||
int Z_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm);
|
||||
|
||||
#define INFLATE_RESET_KEEP_HOOK(strm) \
|
||||
dfltcc_reset((strm), sizeof(struct inflate_state))
|
||||
@ -41,4 +41,9 @@ int ZLIB_INTERNAL dfltcc_inflate_disable(PREFIX3(streamp) strm);
|
||||
if (dfltcc_was_inflate_used((strm))) return -(1L << 16); \
|
||||
} while (0)
|
||||
|
||||
#define INFLATE_SYNC_POINT_HOOK(strm) \
|
||||
do { \
|
||||
if (dfltcc_was_inflate_used((strm))) return Z_STREAM_ERROR; \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,3 +0,0 @@
|
||||
fill_window_sse.c SSE2 optimized fill_window
|
||||
deflate_quick.c SSE4 optimized deflate strategy for use as level 1
|
||||
crc_folding.c SSE4 + PCLMULQDQ optimized CRC folding implementation
|
||||
8
libs/zlibng/arch/x86/INDEX.md
Normal file
8
libs/zlibng/arch/x86/INDEX.md
Normal file
@ -0,0 +1,8 @@
|
||||
Contents
|
||||
--------
|
||||
|
||||
|Name|Description|
|
||||
|:-|:-|
|
||||
|deflate_quick.c|SSE4 optimized deflate strategy for use as level 1|
|
||||
|crc_folding.c|SSE4 + PCLMULQDQ optimized CRC folding implementation|
|
||||
|slide_sse2.c|SSE2 optimized slide_hash|
|
||||
@ -8,7 +8,9 @@ SFLAGS=
|
||||
INCLUDES=
|
||||
SUFFIX=
|
||||
|
||||
AVX2FLAG=-mavx2
|
||||
SSE2FLAG=-msse2
|
||||
SSSE3FLAG=-mssse3
|
||||
SSE4FLAG=-msse4
|
||||
PCLMULFLAG=-mpclmul
|
||||
|
||||
@ -16,7 +18,18 @@ SRCDIR=.
|
||||
SRCTOP=../..
|
||||
TOPDIR=$(SRCTOP)
|
||||
|
||||
all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo
|
||||
all: \
|
||||
x86.o x86.lo \
|
||||
adler32_avx.o adler32.lo \
|
||||
adler32_ssse3.o adler32_ssse3.lo \
|
||||
chunkset_avx.o chunkset_avx.lo \
|
||||
chunkset_sse.o chunkset_sse.lo \
|
||||
compare258_avx.o compare258_avx.lo \
|
||||
compare258_sse.o compare258_sse.lo \
|
||||
insert_string_sse.o insert_string_sse.lo \
|
||||
crc_folding.o crc_folding.lo \
|
||||
slide_avx.o slide_avx.lo \
|
||||
slide_sse.o slide_sse.lo
|
||||
|
||||
x86.o:
|
||||
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
|
||||
@ -24,17 +37,29 @@ x86.o:
|
||||
x86.lo:
|
||||
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
|
||||
|
||||
fill_window_sse.o:
|
||||
$(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_sse.c
|
||||
chunkset_avx.o:
|
||||
$(CC) $(CFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_avx.c
|
||||
|
||||
fill_window_sse.lo:
|
||||
$(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_sse.c
|
||||
chunkset_avx.lo:
|
||||
$(CC) $(SFLAGS) $(AVX2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_avx.c
|
||||
|
||||
deflate_quick.o:
|
||||
$(CC) $(CFLAGS) $(SSE4FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/deflate_quick.c
|
||||
chunkset_sse.o:
|
||||
$(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse.c
|
||||
|
||||
deflate_quick.lo:
|
||||
$(CC) $(SFLAGS) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/deflate_quick.c
|
||||
chunkset_sse.lo:
|
||||
$(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_sse.c
|
||||
|
||||
compare258_avx.o:
|
||||
$(CC) $(CFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_avx.c
|
||||
|
||||
compare258_avx.lo:
|
||||
$(CC) $(SFLAGS) $(AVX2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_avx.c
|
||||
|
||||
compare258_sse.o:
|
||||
$(CC) $(CFLAGS) $(SSE4FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_sse.c
|
||||
|
||||
compare258_sse.lo:
|
||||
$(CC) $(SFLAGS) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/compare258_sse.c
|
||||
|
||||
insert_string_sse.o:
|
||||
$(CC) $(CFLAGS) $(SSE4FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse.c
|
||||
@ -48,6 +73,30 @@ crc_folding.o:
|
||||
crc_folding.lo:
|
||||
$(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c
|
||||
|
||||
slide_avx.o:
|
||||
$(CC) $(CFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_avx.c
|
||||
|
||||
slide_avx.lo:
|
||||
$(CC) $(SFLAGS) $(AVX2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_avx.c
|
||||
|
||||
slide_sse.o:
|
||||
$(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c
|
||||
|
||||
slide_sse.lo:
|
||||
$(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/slide_sse.c
|
||||
|
||||
adler32_avx.o: $(SRCDIR)/adler32_avx.c
|
||||
$(CC) $(CFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_avx.c
|
||||
|
||||
adler32_avx.lo: $(SRCDIR)/adler32_avx.c
|
||||
$(CC) $(SFLAGS) $(AVX2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_avx.c
|
||||
|
||||
adler32_ssse3.o: $(SRCDIR)/adler32_ssse3.c
|
||||
$(CC) $(CFLAGS) $(SSSE3FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_ssse3.c
|
||||
|
||||
adler32_ssse3.lo: $(SRCDIR)/adler32_ssse3.c
|
||||
$(CC) $(SFLAGS) $(SSSE3FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_ssse3.c
|
||||
|
||||
mostlyclean: clean
|
||||
clean:
|
||||
rm -f *.o *.lo *~
|
||||
|
||||
117
libs/zlibng/arch/x86/adler32_avx.c
Normal file
117
libs/zlibng/arch/x86/adler32_avx.c
Normal file
@ -0,0 +1,117 @@
|
||||
/* adler32.c -- compute the Adler-32 checksum of a data stream
|
||||
* Copyright (C) 1995-2011 Mark Adler
|
||||
* Authors:
|
||||
* Brian Bockelman <bockelman@gmail.com>
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "../../zbuild.h"
|
||||
#include "../../zutil.h"
|
||||
|
||||
#include "../../adler32_p.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#ifdef X86_AVX2_ADLER32
|
||||
|
||||
Z_INTERNAL uint32_t adler32_avx2(uint32_t adler, const unsigned char *buf, size_t len) {
|
||||
uint32_t sum2;
|
||||
|
||||
/* split Adler-32 into component sums */
|
||||
sum2 = (adler >> 16) & 0xffff;
|
||||
adler &= 0xffff;
|
||||
|
||||
/* in case user likes doing a byte at a time, keep it fast */
|
||||
if (UNLIKELY(len == 1))
|
||||
return adler32_len_1(adler, buf, sum2);
|
||||
|
||||
/* initial Adler-32 value (deferred check for len == 1 speed) */
|
||||
if (UNLIKELY(buf == NULL))
|
||||
return 1L;
|
||||
|
||||
/* in case short lengths are provided, keep it somewhat fast */
|
||||
if (UNLIKELY(len < 16))
|
||||
return adler32_len_16(adler, buf, len, sum2);
|
||||
|
||||
uint32_t ALIGNED_(32) s1[8], s2[8];
|
||||
|
||||
memset(s1, 0, sizeof(s1)); s1[7] = adler; // TODO: would a masked load be faster?
|
||||
memset(s2, 0, sizeof(s2)); s2[7] = sum2;
|
||||
|
||||
char ALIGNED_(32) dot1[32] = \
|
||||
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
__m256i dot1v = _mm256_load_si256((__m256i*)dot1);
|
||||
char ALIGNED_(32) dot2[32] = \
|
||||
{32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
|
||||
16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
|
||||
__m256i dot2v = _mm256_load_si256((__m256i*)dot2);
|
||||
short ALIGNED_(32) dot3[16] = \
|
||||
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
__m256i dot3v = _mm256_load_si256((__m256i*)dot3);
|
||||
|
||||
// We will need to multiply by
|
||||
char ALIGNED_(32) shift[16] = {5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
__m128i shiftv = _mm_load_si128((__m128i*)shift);
|
||||
|
||||
while (len >= 32) {
|
||||
__m256i vs1 = _mm256_load_si256((__m256i*)s1);
|
||||
__m256i vs2 = _mm256_load_si256((__m256i*)s2);
|
||||
__m256i vs1_0 = vs1;
|
||||
|
||||
int k = (len < NMAX ? (int)len : NMAX);
|
||||
k -= k % 32;
|
||||
len -= k;
|
||||
|
||||
while (k >= 32) {
|
||||
/*
|
||||
vs1 = adler + sum(c[i])
|
||||
vs2 = sum2 + 16 vs1 + sum( (16-i+1) c[i] )
|
||||
*/
|
||||
__m256i vbuf = _mm256_loadu_si256((__m256i*)buf);
|
||||
buf += 32;
|
||||
k -= 32;
|
||||
|
||||
__m256i v_short_sum1 = _mm256_maddubs_epi16(vbuf, dot1v); // multiply-add, resulting in 8 shorts.
|
||||
__m256i vsum1 = _mm256_madd_epi16(v_short_sum1, dot3v); // sum 8 shorts to 4 int32_t;
|
||||
__m256i v_short_sum2 = _mm256_maddubs_epi16(vbuf, dot2v);
|
||||
vs1 = _mm256_add_epi32(vsum1, vs1);
|
||||
__m256i vsum2 = _mm256_madd_epi16(v_short_sum2, dot3v);
|
||||
vs1_0 = _mm256_sll_epi32(vs1_0, shiftv);
|
||||
vsum2 = _mm256_add_epi32(vsum2, vs2);
|
||||
vs2 = _mm256_add_epi32(vsum2, vs1_0);
|
||||
vs1_0 = vs1;
|
||||
}
|
||||
|
||||
// At this point, we have partial sums stored in vs1 and vs2. There are AVX512 instructions that
|
||||
// would allow us to sum these quickly (VP4DPWSSD). For now, just unpack and move on.
|
||||
uint32_t ALIGNED_(32) s1_unpack[8];
|
||||
uint32_t ALIGNED_(32) s2_unpack[8];
|
||||
|
||||
_mm256_store_si256((__m256i*)s1_unpack, vs1);
|
||||
_mm256_store_si256((__m256i*)s2_unpack, vs2);
|
||||
|
||||
adler = (s1_unpack[0] % BASE) + (s1_unpack[1] % BASE) + (s1_unpack[2] % BASE) + (s1_unpack[3] % BASE) +
|
||||
(s1_unpack[4] % BASE) + (s1_unpack[5] % BASE) + (s1_unpack[6] % BASE) + (s1_unpack[7] % BASE);
|
||||
adler %= BASE;
|
||||
s1[7] = adler;
|
||||
|
||||
sum2 = (s2_unpack[0] % BASE) + (s2_unpack[1] % BASE) + (s2_unpack[2] % BASE) + (s2_unpack[3] % BASE) +
|
||||
(s2_unpack[4] % BASE) + (s2_unpack[5] % BASE) + (s2_unpack[6] % BASE) + (s2_unpack[7] % BASE);
|
||||
sum2 %= BASE;
|
||||
s2[7] = sum2;
|
||||
}
|
||||
|
||||
while (len) {
|
||||
len--;
|
||||
adler += *buf++;
|
||||
sum2 += adler;
|
||||
}
|
||||
adler %= BASE;
|
||||
sum2 %= BASE;
|
||||
|
||||
/* return recombined sums */
|
||||
return adler | (sum2 << 16);
|
||||
}
|
||||
|
||||
#endif
|
||||
118
libs/zlibng/arch/x86/adler32_ssse3.c
Normal file
118
libs/zlibng/arch/x86/adler32_ssse3.c
Normal file
@ -0,0 +1,118 @@
|
||||
/* adler32.c -- compute the Adler-32 checksum of a data stream
|
||||
* Copyright (C) 1995-2011 Mark Adler
|
||||
* Authors:
|
||||
* Brian Bockelman <bockelman@gmail.com>
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "../../zbuild.h"
|
||||
#include "../../zutil.h"
|
||||
|
||||
#include "../../adler32_p.h"
|
||||
|
||||
#ifdef X86_SSSE3_ADLER32
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
Z_INTERNAL uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, size_t len) {
|
||||
uint32_t sum2;
|
||||
|
||||
/* split Adler-32 into component sums */
|
||||
sum2 = (adler >> 16) & 0xffff;
|
||||
adler &= 0xffff;
|
||||
|
||||
/* in case user likes doing a byte at a time, keep it fast */
|
||||
if (UNLIKELY(len == 1))
|
||||
return adler32_len_1(adler, buf, sum2);
|
||||
|
||||
/* initial Adler-32 value (deferred check for len == 1 speed) */
|
||||
if (UNLIKELY(buf == NULL))
|
||||
return 1L;
|
||||
|
||||
/* in case short lengths are provided, keep it somewhat fast */
|
||||
if (UNLIKELY(len < 16))
|
||||
return adler32_len_16(adler, buf, len, sum2);
|
||||
|
||||
uint32_t ALIGNED_(16) s1[4], s2[4];
|
||||
|
||||
s1[0] = s1[1] = s1[2] = 0; s1[3] = adler;
|
||||
s2[0] = s2[1] = s2[2] = 0; s2[3] = sum2;
|
||||
|
||||
char ALIGNED_(16) dot1[16] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||
__m128i dot1v = _mm_load_si128((__m128i*)dot1);
|
||||
char ALIGNED_(16) dot2[16] = {16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1};
|
||||
__m128i dot2v = _mm_load_si128((__m128i*)dot2);
|
||||
short ALIGNED_(16) dot3[8] = {1, 1, 1, 1, 1, 1, 1, 1};
|
||||
__m128i dot3v = _mm_load_si128((__m128i*)dot3);
|
||||
|
||||
// We will need to multiply by
|
||||
//char ALIGNED_(16) shift[4] = {0, 0, 0, 4}; //{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4};
|
||||
|
||||
char ALIGNED_(16) shift[16] = {4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
__m128i shiftv = _mm_load_si128((__m128i*)shift);
|
||||
|
||||
while (len >= 16) {
|
||||
__m128i vs1 = _mm_load_si128((__m128i*)s1);
|
||||
__m128i vs2 = _mm_load_si128((__m128i*)s2);
|
||||
__m128i vs1_0 = vs1;
|
||||
|
||||
int k = (len < NMAX ? (int)len : NMAX);
|
||||
k -= k % 16;
|
||||
len -= k;
|
||||
|
||||
while (k >= 16) {
|
||||
/*
|
||||
vs1 = adler + sum(c[i])
|
||||
vs2 = sum2 + 16 vs1 + sum( (16-i+1) c[i] )
|
||||
|
||||
NOTE: 256-bit equivalents are:
|
||||
_mm256_maddubs_epi16 <- operates on 32 bytes to 16 shorts
|
||||
_mm256_madd_epi16 <- Sums 16 shorts to 8 int32_t.
|
||||
We could rewrite the below to use 256-bit instructions instead of 128-bit.
|
||||
*/
|
||||
__m128i vbuf = _mm_loadu_si128((__m128i*)buf);
|
||||
buf += 16;
|
||||
k -= 16;
|
||||
|
||||
__m128i v_short_sum1 = _mm_maddubs_epi16(vbuf, dot1v); // multiply-add, resulting in 8 shorts.
|
||||
__m128i vsum1 = _mm_madd_epi16(v_short_sum1, dot3v); // sum 8 shorts to 4 int32_t;
|
||||
__m128i v_short_sum2 = _mm_maddubs_epi16(vbuf, dot2v);
|
||||
vs1 = _mm_add_epi32(vsum1, vs1);
|
||||
__m128i vsum2 = _mm_madd_epi16(v_short_sum2, dot3v);
|
||||
vs1_0 = _mm_sll_epi32(vs1_0, shiftv);
|
||||
vsum2 = _mm_add_epi32(vsum2, vs2);
|
||||
vs2 = _mm_add_epi32(vsum2, vs1_0);
|
||||
vs1_0 = vs1;
|
||||
}
|
||||
|
||||
// At this point, we have partial sums stored in vs1 and vs2. There are AVX512 instructions that
|
||||
// would allow us to sum these quickly (VP4DPWSSD). For now, just unpack and move on.
|
||||
|
||||
uint32_t ALIGNED_(16) s1_unpack[4];
|
||||
uint32_t ALIGNED_(16) s2_unpack[4];
|
||||
|
||||
_mm_store_si128((__m128i*)s1_unpack, vs1);
|
||||
_mm_store_si128((__m128i*)s2_unpack, vs2);
|
||||
|
||||
adler = (s1_unpack[0] % BASE) + (s1_unpack[1] % BASE) + (s1_unpack[2] % BASE) + (s1_unpack[3] % BASE);
|
||||
adler %= BASE;
|
||||
s1[3] = adler;
|
||||
|
||||
sum2 = (s2_unpack[0] % BASE) + (s2_unpack[1] % BASE) + (s2_unpack[2] % BASE) + (s2_unpack[3] % BASE);
|
||||
sum2 %= BASE;
|
||||
s2[3] = sum2;
|
||||
}
|
||||
|
||||
while (len) {
|
||||
len--;
|
||||
adler += *buf++;
|
||||
sum2 += adler;
|
||||
}
|
||||
adler %= BASE;
|
||||
sum2 %= BASE;
|
||||
|
||||
/* return recombined sums */
|
||||
return adler | (sum2 << 16);
|
||||
}
|
||||
|
||||
#endif
|
||||
50
libs/zlibng/arch/x86/chunkset_avx.c
Normal file
50
libs/zlibng/arch/x86/chunkset_avx.c
Normal file
@ -0,0 +1,50 @@
|
||||
/* chunkset_avx.c -- AVX inline functions to copy small data chunks.
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
#include "zbuild.h"
|
||||
#include "zutil.h"
|
||||
|
||||
#ifdef X86_AVX_CHUNKSET
|
||||
#include <immintrin.h>
|
||||
|
||||
typedef __m256i chunk_t;
|
||||
|
||||
#define HAVE_CHUNKMEMSET_1
|
||||
#define HAVE_CHUNKMEMSET_2
|
||||
#define HAVE_CHUNKMEMSET_4
|
||||
#define HAVE_CHUNKMEMSET_8
|
||||
|
||||
static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
|
||||
*chunk = _mm256_set1_epi8(*(int8_t *)from);
|
||||
}
|
||||
|
||||
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
||||
*chunk = _mm256_set1_epi16(*(int16_t *)from);
|
||||
}
|
||||
|
||||
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
||||
*chunk = _mm256_set1_epi32(*(int32_t *)from);
|
||||
}
|
||||
|
||||
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
||||
*chunk = _mm256_set1_epi64x(*(int64_t *)from);
|
||||
}
|
||||
|
||||
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
|
||||
*chunk = _mm256_loadu_si256((__m256i *)s);
|
||||
}
|
||||
|
||||
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
|
||||
_mm256_storeu_si256((__m256i *)out, *chunk);
|
||||
}
|
||||
|
||||
#define CHUNKSIZE chunksize_avx
|
||||
#define CHUNKCOPY chunkcopy_avx
|
||||
#define CHUNKCOPY_SAFE chunkcopy_safe_avx
|
||||
#define CHUNKUNROLL chunkunroll_avx
|
||||
#define CHUNKMEMSET chunkmemset_avx
|
||||
#define CHUNKMEMSET_SAFE chunkmemset_safe_avx
|
||||
|
||||
#include "chunkset_tpl.h"
|
||||
|
||||
#endif
|
||||
51
libs/zlibng/arch/x86/chunkset_sse.c
Normal file
51
libs/zlibng/arch/x86/chunkset_sse.c
Normal file
@ -0,0 +1,51 @@
|
||||
/* chunkset_sse.c -- SSE inline functions to copy small data chunks.
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zutil.h"
|
||||
|
||||
#ifdef X86_SSE2
|
||||
#include <immintrin.h>
|
||||
|
||||
typedef __m128i chunk_t;
|
||||
|
||||
#define HAVE_CHUNKMEMSET_1
|
||||
#define HAVE_CHUNKMEMSET_2
|
||||
#define HAVE_CHUNKMEMSET_4
|
||||
#define HAVE_CHUNKMEMSET_8
|
||||
|
||||
static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
|
||||
*chunk = _mm_set1_epi8(*(int8_t *)from);
|
||||
}
|
||||
|
||||
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
|
||||
*chunk = _mm_set1_epi16(*(int16_t *)from);
|
||||
}
|
||||
|
||||
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
||||
*chunk = _mm_set1_epi32(*(int32_t *)from);
|
||||
}
|
||||
|
||||
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
||||
*chunk = _mm_set1_epi64x(*(int64_t *)from);
|
||||
}
|
||||
|
||||
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
|
||||
*chunk = _mm_loadu_si128((__m128i *)s);
|
||||
}
|
||||
|
||||
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
|
||||
_mm_storeu_si128((__m128i *)out, *chunk);
|
||||
}
|
||||
|
||||
#define CHUNKSIZE chunksize_sse2
|
||||
#define CHUNKCOPY chunkcopy_sse2
|
||||
#define CHUNKCOPY_SAFE chunkcopy_safe_sse2
|
||||
#define CHUNKUNROLL chunkunroll_sse2
|
||||
#define CHUNKMEMSET chunkmemset_sse2
|
||||
#define CHUNKMEMSET_SAFE chunkmemset_safe_sse2
|
||||
|
||||
#include "chunkset_tpl.h"
|
||||
|
||||
#endif
|
||||
67
libs/zlibng/arch/x86/compare258_avx.c
Normal file
67
libs/zlibng/arch/x86/compare258_avx.c
Normal file
@ -0,0 +1,67 @@
|
||||
/* compare258_avx.c -- AVX2 version of compare258
|
||||
* Copyright Mika T. Lindqvist <postmaster@raasu.org>
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "../../zbuild.h"
|
||||
#include "../../zutil.h"
|
||||
|
||||
#include "fallback_builtins.h"
|
||||
|
||||
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
|
||||
|
||||
#include <immintrin.h>
|
||||
#ifdef _MSC_VER
|
||||
# include <nmmintrin.h>
|
||||
#endif
|
||||
|
||||
/* UNALIGNED_OK, AVX2 intrinsic comparison */
|
||||
static inline uint32_t compare256_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) {
|
||||
uint32_t len = 0;
|
||||
|
||||
do {
|
||||
__m256i ymm_src0, ymm_src1, ymm_cmp;
|
||||
ymm_src0 = _mm256_loadu_si256((__m256i*)src0);
|
||||
ymm_src1 = _mm256_loadu_si256((__m256i*)src1);
|
||||
ymm_cmp = _mm256_cmpeq_epi8(ymm_src0, ymm_src1); /* non-identical bytes = 00, identical bytes = FF */
|
||||
unsigned mask = (unsigned)_mm256_movemask_epi8(ymm_cmp);
|
||||
if (mask != 0xFFFFFFFF) {
|
||||
uint32_t match_byte = (uint32_t)__builtin_ctz(~mask); /* Invert bits so identical = 0 */
|
||||
return len + match_byte;
|
||||
}
|
||||
|
||||
src0 += 32, src1 += 32, len += 32;
|
||||
|
||||
ymm_src0 = _mm256_loadu_si256((__m256i*)src0);
|
||||
ymm_src1 = _mm256_loadu_si256((__m256i*)src1);
|
||||
ymm_cmp = _mm256_cmpeq_epi8(ymm_src0, ymm_src1);
|
||||
mask = (unsigned)_mm256_movemask_epi8(ymm_cmp);
|
||||
if (mask != 0xFFFFFFFF) {
|
||||
uint32_t match_byte = (uint32_t)__builtin_ctz(~mask);
|
||||
return len + match_byte;
|
||||
}
|
||||
|
||||
src0 += 32, src1 += 32, len += 32;
|
||||
} while (len < 256);
|
||||
|
||||
return 256;
|
||||
}
|
||||
|
||||
static inline uint32_t compare258_unaligned_avx2_static(const unsigned char *src0, const unsigned char *src1) {
|
||||
if (*(uint16_t *)src0 != *(uint16_t *)src1)
|
||||
return (*src0 == *src1);
|
||||
|
||||
return compare256_unaligned_avx2_static(src0+2, src1+2) + 2;
|
||||
}
|
||||
|
||||
Z_INTERNAL uint32_t compare258_unaligned_avx2(const unsigned char *src0, const unsigned char *src1) {
|
||||
return compare258_unaligned_avx2_static(src0, src1);
|
||||
}
|
||||
|
||||
#define LONGEST_MATCH longest_match_unaligned_avx2
|
||||
#define COMPARE256 compare256_unaligned_avx2_static
|
||||
#define COMPARE258 compare258_unaligned_avx2_static
|
||||
|
||||
#include "match_tpl.h"
|
||||
|
||||
#endif
|
||||
74
libs/zlibng/arch/x86/compare258_sse.c
Normal file
74
libs/zlibng/arch/x86/compare258_sse.c
Normal file
@ -0,0 +1,74 @@
|
||||
/* compare258_sse.c -- SSE4.2 version of compare258
|
||||
*
|
||||
* Copyright (C) 2013 Intel Corporation. All rights reserved.
|
||||
* Authors:
|
||||
* Wajdi Feghali <wajdi.k.feghali@intel.com>
|
||||
* Jim Guilford <james.guilford@intel.com>
|
||||
* Vinodh Gopal <vinodh.gopal@intel.com>
|
||||
* Erdinc Ozturk <erdinc.ozturk@intel.com>
|
||||
* Jim Kukunas <james.t.kukunas@linux.intel.com>
|
||||
*
|
||||
* Portions are Copyright (C) 2016 12Sided Technology, LLC.
|
||||
* Author:
|
||||
* Phil Vachon <pvachon@12sidedtech.com>
|
||||
*
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "../../zbuild.h"
|
||||
#include "../../zutil.h"
|
||||
|
||||
#ifdef X86_SSE42_CMP_STR
|
||||
|
||||
#include <immintrin.h>
|
||||
#ifdef _MSC_VER
|
||||
# include <nmmintrin.h>
|
||||
#endif
|
||||
|
||||
/* UNALIGNED_OK, SSE4.2 intrinsic comparison */
|
||||
static inline uint32_t compare256_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) {
|
||||
uint32_t len = 0;
|
||||
|
||||
do {
|
||||
#define mode _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_EACH | _SIDD_NEGATIVE_POLARITY
|
||||
__m128i xmm_src0, xmm_src1;
|
||||
uint32_t ret;
|
||||
|
||||
xmm_src0 = _mm_loadu_si128((__m128i *)src0);
|
||||
xmm_src1 = _mm_loadu_si128((__m128i *)src1);
|
||||
ret = (uint32_t)_mm_cmpestri(xmm_src0, 16, xmm_src1, 16, mode);
|
||||
if (_mm_cmpestrc(xmm_src0, 16, xmm_src1, 16, mode)) {
|
||||
return len + ret;
|
||||
}
|
||||
src0 += 16, src1 += 16, len += 16;
|
||||
|
||||
xmm_src0 = _mm_loadu_si128((__m128i *)src0);
|
||||
xmm_src1 = _mm_loadu_si128((__m128i *)src1);
|
||||
ret = (uint32_t)_mm_cmpestri(xmm_src0, 16, xmm_src1, 16, mode);
|
||||
if (_mm_cmpestrc(xmm_src0, 16, xmm_src1, 16, mode)) {
|
||||
return len + ret;
|
||||
}
|
||||
src0 += 16, src1 += 16, len += 16;
|
||||
} while (len < 256);
|
||||
|
||||
return 256;
|
||||
}
|
||||
|
||||
static inline uint32_t compare258_unaligned_sse4_static(const unsigned char *src0, const unsigned char *src1) {
|
||||
if (*(uint16_t *)src0 != *(uint16_t *)src1)
|
||||
return (*src0 == *src1);
|
||||
|
||||
return compare256_unaligned_sse4_static(src0+2, src1+2) + 2;
|
||||
}
|
||||
|
||||
Z_INTERNAL uint32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1) {
|
||||
return compare258_unaligned_sse4_static(src0, src1);
|
||||
}
|
||||
|
||||
#define LONGEST_MATCH longest_match_unaligned_sse4
|
||||
#define COMPARE256 compare256_unaligned_sse4_static
|
||||
#define COMPARE258 compare258_unaligned_sse4_static
|
||||
|
||||
#include "match_tpl.h"
|
||||
|
||||
#endif
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ
|
||||
* Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ
|
||||
* instruction.
|
||||
*
|
||||
* A white paper describing this algorithm can be found at:
|
||||
@ -18,14 +18,14 @@
|
||||
|
||||
#ifdef X86_PCLMULQDQ_CRC
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "../../zbuild.h"
|
||||
#include <inttypes.h>
|
||||
#include <immintrin.h>
|
||||
#include <wmmintrin.h>
|
||||
|
||||
#include "crc_folding.h"
|
||||
|
||||
ZLIB_INTERNAL void crc_fold_init(deflate_state *const s) {
|
||||
Z_INTERNAL void crc_fold_init(deflate_state *const s) {
|
||||
/* CRC_SAVE */
|
||||
_mm_storeu_si128((__m128i *)s->crc0 + 0, _mm_cvtsi32_si128(0x9db42487));
|
||||
_mm_storeu_si128((__m128i *)s->crc0 + 1, _mm_setzero_si128());
|
||||
@ -227,9 +227,10 @@ static void partial_fold(const size_t len, __m128i *xmm_crc0, __m128i *xmm_crc1,
|
||||
*xmm_crc3 = _mm_castps_si128(ps_res);
|
||||
}
|
||||
|
||||
ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, const unsigned char *src, long len) {
|
||||
Z_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, const unsigned char *src, long len) {
|
||||
unsigned long algn_diff;
|
||||
__m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3;
|
||||
char ALIGNED_(16) partial_buf[16] = { 0 };
|
||||
|
||||
/* CRC_LOAD */
|
||||
__m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0);
|
||||
@ -241,11 +242,14 @@ ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, con
|
||||
if (len < 16) {
|
||||
if (len == 0)
|
||||
return;
|
||||
xmm_crc_part = _mm_loadu_si128((__m128i *)src);
|
||||
|
||||
memcpy(partial_buf, src, len);
|
||||
xmm_crc_part = _mm_loadu_si128((const __m128i *)partial_buf);
|
||||
memcpy(dst, partial_buf, len);
|
||||
goto partial;
|
||||
}
|
||||
|
||||
algn_diff = (0 - (uintptr_t)src) & 0xF;
|
||||
algn_diff = ((uintptr_t)16 - ((uintptr_t)src & 0xF)) & 0xF;
|
||||
if (algn_diff) {
|
||||
xmm_crc_part = _mm_loadu_si128((__m128i *)src);
|
||||
_mm_storeu_si128((__m128i *)dst, xmm_crc_part);
|
||||
@ -255,6 +259,8 @@ ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, con
|
||||
len -= algn_diff;
|
||||
|
||||
partial_fold(algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part);
|
||||
} else {
|
||||
xmm_crc_part = _mm_setzero_si128();
|
||||
}
|
||||
|
||||
while ((len -= 64) >= 0) {
|
||||
@ -305,7 +311,7 @@ ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, con
|
||||
goto done;
|
||||
|
||||
dst += 48;
|
||||
xmm_crc_part = _mm_load_si128((__m128i *)src + 3);
|
||||
memcpy(&xmm_crc_part, (__m128i *)src + 3, len);
|
||||
} else if (len + 32 >= 0) {
|
||||
len += 32;
|
||||
|
||||
@ -324,7 +330,7 @@ ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, con
|
||||
goto done;
|
||||
|
||||
dst += 32;
|
||||
xmm_crc_part = _mm_load_si128((__m128i *)src + 2);
|
||||
memcpy(&xmm_crc_part, (__m128i *)src + 2, len);
|
||||
} else if (len + 48 >= 0) {
|
||||
len += 48;
|
||||
|
||||
@ -340,16 +346,18 @@ ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, con
|
||||
goto done;
|
||||
|
||||
dst += 16;
|
||||
xmm_crc_part = _mm_load_si128((__m128i *)src + 1);
|
||||
memcpy(&xmm_crc_part, (__m128i *)src + 1, len);
|
||||
} else {
|
||||
len += 64;
|
||||
if (len == 0)
|
||||
goto done;
|
||||
xmm_crc_part = _mm_load_si128((__m128i *)src);
|
||||
memcpy(&xmm_crc_part, src, len);
|
||||
}
|
||||
|
||||
_mm_storeu_si128((__m128i *)partial_buf, xmm_crc_part);
|
||||
memcpy(dst, partial_buf, len);
|
||||
|
||||
partial:
|
||||
_mm_storeu_si128((__m128i *)dst, xmm_crc_part);
|
||||
partial_fold(len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part);
|
||||
done:
|
||||
/* CRC_SAVE */
|
||||
@ -377,7 +385,7 @@ static const unsigned ALIGNED_(16) crc_mask2[4] = {
|
||||
0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
|
||||
};
|
||||
|
||||
uint32_t ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s) {
|
||||
uint32_t Z_INTERNAL crc_fold_512to32(deflate_state *const s) {
|
||||
const __m128i xmm_mask = _mm_load_si128((__m128i *)crc_mask);
|
||||
const __m128i xmm_mask2 = _mm_load_si128((__m128i *)crc_mask2);
|
||||
|
||||
@ -447,4 +455,3 @@ uint32_t ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s) {
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@ -10,10 +10,10 @@
|
||||
#ifndef CRC_FOLDING_H_
|
||||
#define CRC_FOLDING_H_
|
||||
|
||||
#include "deflate.h"
|
||||
#include "../../deflate.h"
|
||||
|
||||
ZLIB_INTERNAL void crc_fold_init(deflate_state *const);
|
||||
ZLIB_INTERNAL uint32_t crc_fold_512to32(deflate_state *const);
|
||||
ZLIB_INTERNAL void crc_fold_copy(deflate_state *const, unsigned char *, const unsigned char *, long);
|
||||
Z_INTERNAL void crc_fold_init(deflate_state *const);
|
||||
Z_INTERNAL uint32_t crc_fold_512to32(deflate_state *const);
|
||||
Z_INTERNAL void crc_fold_copy(deflate_state *const, unsigned char *, const unsigned char *, long);
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,25 +0,0 @@
|
||||
#ifndef X86_CTZL_H
|
||||
#define X86_CTZL_H
|
||||
|
||||
#include <intrin.h>
|
||||
#ifdef X86_CPUID
|
||||
# include "x86.h"
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
/* This is not a general purpose replacement for __builtin_ctzl. The function expects that value is != 0
|
||||
* Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward is not checked
|
||||
*/
|
||||
static __forceinline unsigned long __builtin_ctzl(unsigned long value)
|
||||
{
|
||||
#ifdef X86_CPUID
|
||||
if (x86_cpu_has_tzcnt)
|
||||
return _tzcnt_u32(value);
|
||||
#endif
|
||||
unsigned long trailing_zero;
|
||||
_BitScanForward(&trailing_zero, value);
|
||||
return trailing_zero;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,175 +0,0 @@
|
||||
/*
|
||||
* Fill Window with SSE2-optimized hash shifting
|
||||
*
|
||||
* Copyright (C) 2013 Intel Corporation
|
||||
* Authors:
|
||||
* Arjan van de Ven <arjan@linux.intel.com>
|
||||
* Jim Kukunas <james.t.kukunas@linux.intel.com>
|
||||
*
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
#ifdef X86_SSE2
|
||||
|
||||
#include "zbuild.h"
|
||||
#include <immintrin.h>
|
||||
#include "deflate.h"
|
||||
#include "deflate_p.h"
|
||||
#include "functable.h"
|
||||
|
||||
extern int read_buf(PREFIX3(stream) *strm, unsigned char *buf, unsigned size);
|
||||
|
||||
ZLIB_INTERNAL void fill_window_sse(deflate_state *s) {
|
||||
const __m128i xmm_wsize = _mm_set1_epi16(s->w_size);
|
||||
|
||||
register unsigned n;
|
||||
register Pos *p;
|
||||
unsigned more; /* Amount of free space at the end of the window. */
|
||||
unsigned int wsize = s->w_size;
|
||||
|
||||
Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
|
||||
|
||||
do {
|
||||
more = (unsigned)(s->window_size -(unsigned long)s->lookahead -(unsigned long)s->strstart);
|
||||
|
||||
/* Deal with !@#$% 64K limit: */
|
||||
if (sizeof(int) <= 2) {
|
||||
if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
|
||||
more = wsize;
|
||||
|
||||
} else if (more == (unsigned)(-1)) {
|
||||
/* Very unlikely, but possible on 16 bit machine if
|
||||
* strstart == 0 && lookahead == 1 (input done a byte at time)
|
||||
*/
|
||||
more--;
|
||||
}
|
||||
}
|
||||
|
||||
/* If the window is almost full and there is insufficient lookahead,
|
||||
* move the upper half to the lower one to make room in the upper half.
|
||||
*/
|
||||
if (s->strstart >= wsize+MAX_DIST(s)) {
|
||||
memcpy(s->window, s->window+wsize, (unsigned)wsize);
|
||||
s->match_start = (s->match_start >= wsize) ? s->match_start - wsize : 0;
|
||||
s->strstart -= wsize; /* we now have strstart >= MAX_DIST */
|
||||
s->block_start -= (long) wsize;
|
||||
|
||||
/* Slide the hash table (could be avoided with 32 bit values
|
||||
at the expense of memory usage). We slide even when level == 0
|
||||
to keep the hash table consistent if we switch back to level > 0
|
||||
later. (Using level 0 permanently is not an optimal usage of
|
||||
zlib, so we don't care about this pathological case.)
|
||||
*/
|
||||
n = s->hash_size;
|
||||
p = &s->head[n];
|
||||
p -= 8;
|
||||
do {
|
||||
__m128i value, result;
|
||||
|
||||
value = _mm_loadu_si128((__m128i *)p);
|
||||
result = _mm_subs_epu16(value, xmm_wsize);
|
||||
_mm_storeu_si128((__m128i *)p, result);
|
||||
|
||||
p -= 8;
|
||||
n -= 8;
|
||||
} while (n > 0);
|
||||
|
||||
n = wsize;
|
||||
p = &s->prev[n];
|
||||
p -= 8;
|
||||
do {
|
||||
__m128i value, result;
|
||||
|
||||
value = _mm_loadu_si128((__m128i *)p);
|
||||
result = _mm_subs_epu16(value, xmm_wsize);
|
||||
_mm_storeu_si128((__m128i *)p, result);
|
||||
|
||||
p -= 8;
|
||||
n -= 8;
|
||||
} while (n > 0);
|
||||
more += wsize;
|
||||
}
|
||||
if (s->strm->avail_in == 0) break;
|
||||
|
||||
/* If there was no sliding:
|
||||
* strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
|
||||
* more == window_size - lookahead - strstart
|
||||
* => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
|
||||
* => more >= window_size - 2*WSIZE + 2
|
||||
* In the BIG_MEM or MMAP case (not yet supported),
|
||||
* window_size == input_size + MIN_LOOKAHEAD &&
|
||||
* strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
|
||||
* Otherwise, window_size == 2*WSIZE so more >= 2.
|
||||
* If there was sliding, more >= WSIZE. So in all cases, more >= 2.
|
||||
*/
|
||||
Assert(more >= 2, "more < 2");
|
||||
|
||||
n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
|
||||
s->lookahead += n;
|
||||
|
||||
/* Initialize the hash value now that we have some input: */
|
||||
if (s->lookahead + s->insert >= MIN_MATCH) {
|
||||
unsigned int str = s->strstart - s->insert;
|
||||
s->ins_h = s->window[str];
|
||||
if (str >= 1)
|
||||
functable.insert_string(s, str + 2 - MIN_MATCH, 1);
|
||||
#if MIN_MATCH != 3
|
||||
#error Call insert_string() MIN_MATCH-3 more times
|
||||
while (s->insert) {
|
||||
functable.insert_string(s, str, 1);
|
||||
str++;
|
||||
s->insert--;
|
||||
if (s->lookahead + s->insert < MIN_MATCH)
|
||||
break;
|
||||
}
|
||||
#else
|
||||
unsigned int count;
|
||||
if (unlikely(s->lookahead == 1)){
|
||||
count = s->insert - 1;
|
||||
}else{
|
||||
count = s->insert;
|
||||
}
|
||||
functable.insert_string(s, str, count);
|
||||
s->insert -= count;
|
||||
#endif
|
||||
}
|
||||
/* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
|
||||
* but this is not important since only literal bytes will be emitted.
|
||||
*/
|
||||
} while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
|
||||
|
||||
/* If the WIN_INIT bytes after the end of the current data have never been
|
||||
* written, then zero those bytes in order to avoid memory check reports of
|
||||
* the use of uninitialized (or uninitialised as Julian writes) bytes by
|
||||
* the longest match routines. Update the high water mark for the next
|
||||
* time through here. WIN_INIT is set to MAX_MATCH since the longest match
|
||||
* routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
|
||||
*/
|
||||
if (s->high_water < s->window_size) {
|
||||
unsigned long curr = s->strstart + (unsigned long)(s->lookahead);
|
||||
unsigned long init;
|
||||
|
||||
if (s->high_water < curr) {
|
||||
/* Previous high water mark below current data -- zero WIN_INIT
|
||||
* bytes or up to end of window, whichever is less.
|
||||
*/
|
||||
init = s->window_size - curr;
|
||||
if (init > WIN_INIT)
|
||||
init = WIN_INIT;
|
||||
memset(s->window + curr, 0, (unsigned)init);
|
||||
s->high_water = curr + init;
|
||||
} else if (s->high_water < (unsigned long)curr + WIN_INIT) {
|
||||
/* High water mark at or above current data, but below current data
|
||||
* plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
|
||||
* to end of window, whichever is less.
|
||||
*/
|
||||
init = (unsigned long)curr + WIN_INIT - s->high_water;
|
||||
if (init > s->window_size - s->high_water)
|
||||
init = s->window_size - s->high_water;
|
||||
memset(s->window + s->high_water, 0, (unsigned)init);
|
||||
s->high_water += init;
|
||||
}
|
||||
}
|
||||
|
||||
Assert((unsigned long)s->strstart <= s->window_size - MIN_LOOKAHEAD, "not enough room for search");
|
||||
}
|
||||
#endif
|
||||
@ -5,52 +5,42 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "deflate.h"
|
||||
|
||||
/* ===========================================================================
|
||||
* Insert string str in the dictionary and set match_head to the previous head
|
||||
* of the hash chain (the most recent string with same hash key). Return
|
||||
* the previous length of the hash chain.
|
||||
* IN assertion: all calls to to INSERT_STRING are made with consecutive
|
||||
* input characters and the first MIN_MATCH bytes of str are valid
|
||||
* (except for the last MIN_MATCH-1 bytes of the input file).
|
||||
*/
|
||||
#ifdef X86_SSE4_2_CRC_HASH
|
||||
ZLIB_INTERNAL Pos insert_string_sse(deflate_state *const s, const Pos str, unsigned int count) {
|
||||
Pos ret = 0;
|
||||
unsigned int idx;
|
||||
unsigned int *ip, val, h;
|
||||
|
||||
for (idx = 0; idx < count; idx++) {
|
||||
ip = (unsigned *)&s->window[str+idx];
|
||||
memcpy(&val, ip, sizeof(val));
|
||||
h = 0;
|
||||
|
||||
if (s->level >= TRIGGER_LEVEL)
|
||||
val &= 0xFFFFFF;
|
||||
|
||||
#include "../../zbuild.h"
|
||||
#include <immintrin.h>
|
||||
#ifdef _MSC_VER
|
||||
h = _mm_crc32_u32(h, val);
|
||||
#elif defined(X86_SSE4_2_CRC_INTRIN)
|
||||
h = __builtin_ia32_crc32si(h, val);
|
||||
# include <nmmintrin.h>
|
||||
#endif
|
||||
#include "../../deflate.h"
|
||||
|
||||
#ifdef X86_SSE42_CRC_INTRIN
|
||||
# ifdef _MSC_VER
|
||||
# define UPDATE_HASH(s, h, val)\
|
||||
h = _mm_crc32_u32(h, val)
|
||||
# else
|
||||
# define UPDATE_HASH(s, h, val)\
|
||||
h = __builtin_ia32_crc32si(h, val)
|
||||
# endif
|
||||
#else
|
||||
__asm__ __volatile__ (
|
||||
"crc32 %1,%0\n\t"
|
||||
: "+r" (h)
|
||||
: "r" (val)
|
||||
);
|
||||
#endif
|
||||
Pos head = s->head[h & s->hash_mask];
|
||||
if (head != str+idx) {
|
||||
s->prev[(str+idx) & s->w_mask] = head;
|
||||
s->head[h & s->hash_mask] = str+idx;
|
||||
if (idx == count-1)
|
||||
ret = head;
|
||||
} else if (idx == count - 1) {
|
||||
ret = str + idx;
|
||||
}
|
||||
# ifdef _MSC_VER
|
||||
# define UPDATE_HASH(s, h, val) {\
|
||||
__asm mov edx, h\
|
||||
__asm mov eax, val\
|
||||
__asm crc32 eax, edx\
|
||||
__asm mov val, eax\
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
# else
|
||||
# define UPDATE_HASH(s, h, val) \
|
||||
__asm__ __volatile__ (\
|
||||
"crc32 %1,%0\n\t"\
|
||||
: "+r" (h)\
|
||||
: "r" (val)\
|
||||
);
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define INSERT_STRING insert_string_sse4
|
||||
#define QUICK_INSERT_STRING quick_insert_string_sse4
|
||||
|
||||
#ifdef X86_SSE42_CRC_HASH
|
||||
# include "../../insert_string_tpl.h"
|
||||
#endif
|
||||
|
||||
47
libs/zlibng/arch/x86/slide_avx.c
Normal file
47
libs/zlibng/arch/x86/slide_avx.c
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
* AVX2 optimized hash slide, based on Intel's slide_sse implementation
|
||||
*
|
||||
* Copyright (C) 2017 Intel Corporation
|
||||
* Authors:
|
||||
* Arjan van de Ven <arjan@linux.intel.com>
|
||||
* Jim Kukunas <james.t.kukunas@linux.intel.com>
|
||||
* Mika T. Lindqvist <postmaster@raasu.org>
|
||||
*
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
#include "../../zbuild.h"
|
||||
#include "../../deflate.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
Z_INTERNAL void slide_hash_avx2(deflate_state *s) {
|
||||
Pos *p;
|
||||
unsigned n;
|
||||
uint16_t wsize = (uint16_t)s->w_size;
|
||||
const __m256i ymm_wsize = _mm256_set1_epi16((short)wsize);
|
||||
|
||||
n = HASH_SIZE;
|
||||
p = &s->head[n] - 16;
|
||||
do {
|
||||
__m256i value, result;
|
||||
|
||||
value = _mm256_loadu_si256((__m256i *)p);
|
||||
result= _mm256_subs_epu16(value, ymm_wsize);
|
||||
_mm256_storeu_si256((__m256i *)p, result);
|
||||
p -= 16;
|
||||
n -= 16;
|
||||
} while (n > 0);
|
||||
|
||||
n = wsize;
|
||||
p = &s->prev[n] - 16;
|
||||
do {
|
||||
__m256i value, result;
|
||||
|
||||
value = _mm256_loadu_si256((__m256i *)p);
|
||||
result= _mm256_subs_epu16(value, ymm_wsize);
|
||||
_mm256_storeu_si256((__m256i *)p, result);
|
||||
|
||||
p -= 16;
|
||||
n -= 16;
|
||||
} while (n > 0);
|
||||
}
|
||||
46
libs/zlibng/arch/x86/slide_sse.c
Normal file
46
libs/zlibng/arch/x86/slide_sse.c
Normal file
@ -0,0 +1,46 @@
|
||||
/*
|
||||
* SSE optimized hash slide
|
||||
*
|
||||
* Copyright (C) 2017 Intel Corporation
|
||||
* Authors:
|
||||
* Arjan van de Ven <arjan@linux.intel.com>
|
||||
* Jim Kukunas <james.t.kukunas@linux.intel.com>
|
||||
*
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
#include "../../zbuild.h"
|
||||
#include "../../deflate.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
Z_INTERNAL void slide_hash_sse2(deflate_state *s) {
|
||||
Pos *p;
|
||||
unsigned n;
|
||||
uint16_t wsize = (uint16_t)s->w_size;
|
||||
const __m128i xmm_wsize = _mm_set1_epi16((short)wsize);
|
||||
|
||||
n = HASH_SIZE;
|
||||
p = &s->head[n] - 8;
|
||||
do {
|
||||
__m128i value, result;
|
||||
|
||||
value = _mm_loadu_si128((__m128i *)p);
|
||||
result= _mm_subs_epu16(value, xmm_wsize);
|
||||
_mm_storeu_si128((__m128i *)p, result);
|
||||
p -= 8;
|
||||
n -= 8;
|
||||
} while (n > 0);
|
||||
|
||||
n = wsize;
|
||||
p = &s->prev[n] - 8;
|
||||
do {
|
||||
__m128i value, result;
|
||||
|
||||
value = _mm_loadu_si128((__m128i *)p);
|
||||
result= _mm_subs_epu16(value, xmm_wsize);
|
||||
_mm_storeu_si128((__m128i *)p, result);
|
||||
|
||||
p -= 8;
|
||||
n -= 8;
|
||||
} while (n > 0);
|
||||
}
|
||||
@ -8,61 +8,73 @@
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "zutil.h"
|
||||
#include "../../zutil.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
# include <intrin.h>
|
||||
#else
|
||||
// Newer versions of GCC and clang come with cpuid.h
|
||||
#include <cpuid.h>
|
||||
# include <cpuid.h>
|
||||
#endif
|
||||
|
||||
ZLIB_INTERNAL int x86_cpu_has_sse2;
|
||||
ZLIB_INTERNAL int x86_cpu_has_sse42;
|
||||
ZLIB_INTERNAL int x86_cpu_has_pclmulqdq;
|
||||
ZLIB_INTERNAL int x86_cpu_has_tzcnt;
|
||||
Z_INTERNAL int x86_cpu_has_avx2;
|
||||
Z_INTERNAL int x86_cpu_has_sse2;
|
||||
Z_INTERNAL int x86_cpu_has_ssse3;
|
||||
Z_INTERNAL int x86_cpu_has_sse42;
|
||||
Z_INTERNAL int x86_cpu_has_pclmulqdq;
|
||||
Z_INTERNAL int x86_cpu_has_tzcnt;
|
||||
|
||||
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) {
|
||||
#ifdef _MSC_VER
|
||||
unsigned int registers[4];
|
||||
__cpuid(registers, info);
|
||||
unsigned int registers[4];
|
||||
__cpuid((int *)registers, info);
|
||||
|
||||
*eax = registers[0];
|
||||
*ebx = registers[1];
|
||||
*ecx = registers[2];
|
||||
*edx = registers[3];
|
||||
*eax = registers[0];
|
||||
*ebx = registers[1];
|
||||
*ecx = registers[2];
|
||||
*edx = registers[3];
|
||||
#else
|
||||
unsigned int _eax;
|
||||
unsigned int _ebx;
|
||||
unsigned int _ecx;
|
||||
unsigned int _edx;
|
||||
__cpuid(info, _eax, _ebx, _ecx, _edx);
|
||||
*eax = _eax;
|
||||
*ebx = _ebx;
|
||||
*ecx = _ecx;
|
||||
*edx = _edx;
|
||||
__cpuid(info, *eax, *ebx, *ecx, *edx);
|
||||
#endif
|
||||
}
|
||||
|
||||
void ZLIB_INTERNAL x86_check_features(void) {
|
||||
unsigned eax, ebx, ecx, edx;
|
||||
unsigned maxbasic;
|
||||
static void cpuidex(int info, int subinfo, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) {
|
||||
#ifdef _MSC_VER
|
||||
unsigned int registers[4];
|
||||
__cpuidex((int *)registers, info, subinfo);
|
||||
|
||||
cpuid(0, &maxbasic, &ebx, &ecx, &edx);
|
||||
|
||||
cpuid(1 /*CPU_PROCINFO_AND_FEATUREBITS*/, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
x86_cpu_has_sse2 = edx & 0x4000000;
|
||||
x86_cpu_has_sse42 = ecx & 0x100000;
|
||||
x86_cpu_has_pclmulqdq = ecx & 0x2;
|
||||
|
||||
if (maxbasic >= 7) {
|
||||
cpuid(7, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
// check BMI1 bit
|
||||
// Reference: https://software.intel.com/sites/default/files/article/405250/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family.pdf
|
||||
x86_cpu_has_tzcnt = ebx & 0x8;
|
||||
} else {
|
||||
x86_cpu_has_tzcnt = 0;
|
||||
}
|
||||
*eax = registers[0];
|
||||
*ebx = registers[1];
|
||||
*ecx = registers[2];
|
||||
*edx = registers[3];
|
||||
#else
|
||||
__cpuid_count(info, subinfo, *eax, *ebx, *ecx, *edx);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Z_INTERNAL x86_check_features(void) {
|
||||
unsigned eax, ebx, ecx, edx;
|
||||
unsigned maxbasic;
|
||||
|
||||
cpuid(0, &maxbasic, &ebx, &ecx, &edx);
|
||||
|
||||
cpuid(1 /*CPU_PROCINFO_AND_FEATUREBITS*/, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
x86_cpu_has_sse2 = edx & 0x4000000;
|
||||
x86_cpu_has_ssse3 = ecx & 0x200;
|
||||
x86_cpu_has_sse42 = ecx & 0x100000;
|
||||
x86_cpu_has_pclmulqdq = ecx & 0x2;
|
||||
|
||||
if (maxbasic >= 7) {
|
||||
cpuidex(7, 0, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
// check BMI1 bit
|
||||
// Reference: https://software.intel.com/sites/default/files/article/405250/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family.pdf
|
||||
x86_cpu_has_tzcnt = ebx & 0x8;
|
||||
// check AVX2 bit
|
||||
x86_cpu_has_avx2 = ebx & 0x20;
|
||||
} else {
|
||||
x86_cpu_has_tzcnt = 0;
|
||||
x86_cpu_has_avx2 = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,16 +1,18 @@
|
||||
/* cpu.h -- check for CPU features
|
||||
* Copyright (C) 2013 Intel Corporation Jim Kukunas
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
/* cpu.h -- check for CPU features
|
||||
* Copyright (C) 2013 Intel Corporation Jim Kukunas
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#ifndef CPU_H_
|
||||
#define CPU_H_
|
||||
|
||||
extern int x86_cpu_has_avx2;
|
||||
extern int x86_cpu_has_sse2;
|
||||
extern int x86_cpu_has_ssse3;
|
||||
extern int x86_cpu_has_sse42;
|
||||
extern int x86_cpu_has_pclmulqdq;
|
||||
extern int x86_cpu_has_tzcnt;
|
||||
|
||||
void ZLIB_INTERNAL x86_check_features(void);
|
||||
void Z_INTERNAL x86_check_features(void);
|
||||
|
||||
#endif /* CPU_H_ */
|
||||
|
||||
81
libs/zlibng/chunkset.c
Normal file
81
libs/zlibng/chunkset.c
Normal file
@ -0,0 +1,81 @@
|
||||
/* chunkset.c -- inline functions to copy small data chunks.
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zutil.h"
|
||||
|
||||
// We need sizeof(chunk_t) to be 8, no matter what.
|
||||
#if defined(UNALIGNED64_OK)
|
||||
typedef uint64_t chunk_t;
|
||||
#elif defined(UNALIGNED_OK)
|
||||
typedef struct chunk_t { uint32_t u32[2]; } chunk_t;
|
||||
#else
|
||||
typedef struct chunk_t { uint8_t u8[8]; } chunk_t;
|
||||
#endif
|
||||
|
||||
#define HAVE_CHUNKMEMSET_1
|
||||
#define HAVE_CHUNKMEMSET_4
|
||||
#define HAVE_CHUNKMEMSET_8
|
||||
|
||||
static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
|
||||
#if defined(UNALIGNED64_OK)
|
||||
*chunk = 0x0101010101010101 * (uint8_t)*from;
|
||||
#elif defined(UNALIGNED_OK)
|
||||
chunk->u32[0] = 0x01010101 * (uint8_t)*from;
|
||||
chunk->u32[1] = chunk->u32[0];
|
||||
#else
|
||||
memset(chunk, *from, sizeof(chunk_t));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
|
||||
#if defined(UNALIGNED64_OK)
|
||||
uint32_t half_chunk;
|
||||
half_chunk = *(uint32_t *)from;
|
||||
*chunk = 0x0000000100000001 * (uint64_t)half_chunk;
|
||||
#elif defined(UNALIGNED_OK)
|
||||
chunk->u32[0] = *(uint32_t *)from;
|
||||
chunk->u32[1] = chunk->u32[0];
|
||||
#else
|
||||
uint8_t *chunkptr = (uint8_t *)chunk;
|
||||
memcpy(chunkptr, from, 4);
|
||||
memcpy(chunkptr+4, from, 4);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
|
||||
#if defined(UNALIGNED64_OK)
|
||||
*chunk = *(uint64_t *)from;
|
||||
#elif defined(UNALIGNED_OK)
|
||||
uint32_t* p = (uint32_t *)from;
|
||||
chunk->u32[0] = p[0];
|
||||
chunk->u32[1] = p[1];
|
||||
#else
|
||||
memcpy(chunk, from, sizeof(chunk_t));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
|
||||
chunkmemset_8((uint8_t *)s, chunk);
|
||||
}
|
||||
|
||||
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
|
||||
#if defined(UNALIGNED64_OK)
|
||||
*(uint64_t *)out = *chunk;
|
||||
#elif defined(UNALIGNED_OK)
|
||||
((uint32_t *)out)[0] = chunk->u32[0];
|
||||
((uint32_t *)out)[1] = chunk->u32[1];
|
||||
#else
|
||||
memcpy(out, chunk, sizeof(chunk_t));
|
||||
#endif
|
||||
}
|
||||
|
||||
#define CHUNKSIZE chunksize_c
|
||||
#define CHUNKCOPY chunkcopy_c
|
||||
#define CHUNKCOPY_SAFE chunkcopy_safe_c
|
||||
#define CHUNKUNROLL chunkunroll_c
|
||||
#define CHUNKMEMSET chunkmemset_c
|
||||
#define CHUNKMEMSET_SAFE chunkmemset_safe_c
|
||||
|
||||
#include "chunkset_tpl.h"
|
||||
172
libs/zlibng/chunkset_tpl.h
Normal file
172
libs/zlibng/chunkset_tpl.h
Normal file
@ -0,0 +1,172 @@
|
||||
/* chunkset_tpl.h -- inline functions to copy small data chunks.
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
/* Returns the chunk size */
|
||||
Z_INTERNAL uint32_t CHUNKSIZE(void) {
|
||||
return sizeof(chunk_t);
|
||||
}
|
||||
|
||||
/* Behave like memcpy, but assume that it's OK to overwrite at least
|
||||
chunk_t bytes of output even if the length is shorter than this,
|
||||
that the length is non-zero, and that `from` lags `out` by at least
|
||||
sizeof chunk_t bytes (or that they don't overlap at all or simply that
|
||||
the distance is less than the length of the copy).
|
||||
|
||||
Aside from better memory bus utilisation, this means that short copies
|
||||
(chunk_t bytes or fewer) will fall straight through the loop
|
||||
without iteration, which will hopefully make the branch prediction more
|
||||
reliable. */
|
||||
Z_INTERNAL uint8_t* CHUNKCOPY(uint8_t *out, uint8_t const *from, unsigned len) {
|
||||
chunk_t chunk;
|
||||
--len;
|
||||
loadchunk(from, &chunk);
|
||||
storechunk(out, &chunk);
|
||||
out += (len % sizeof(chunk_t)) + 1;
|
||||
from += (len % sizeof(chunk_t)) + 1;
|
||||
len /= sizeof(chunk_t);
|
||||
while (len > 0) {
|
||||
loadchunk(from, &chunk);
|
||||
storechunk(out, &chunk);
|
||||
out += sizeof(chunk_t);
|
||||
from += sizeof(chunk_t);
|
||||
--len;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/* Behave like chunkcopy, but avoid writing beyond of legal output. */
|
||||
Z_INTERNAL uint8_t* CHUNKCOPY_SAFE(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe) {
|
||||
if ((safe - out) < (ptrdiff_t)sizeof(chunk_t)) {
|
||||
int32_t use_chunk16 = sizeof(chunk_t) > 16 && (len & 16);
|
||||
if (use_chunk16) {
|
||||
memcpy(out, from, 16);
|
||||
out += 16;
|
||||
from += 16;
|
||||
}
|
||||
if (len & 8) {
|
||||
memcpy(out, from, 8);
|
||||
out += 8;
|
||||
from += 8;
|
||||
}
|
||||
if (len & 4) {
|
||||
memcpy(out, from, 4);
|
||||
out += 4;
|
||||
from += 4;
|
||||
}
|
||||
if (len & 2) {
|
||||
memcpy(out, from, 2);
|
||||
out += 2;
|
||||
from += 2;
|
||||
}
|
||||
if (len & 1) {
|
||||
*out++ = *from++;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
return CHUNKCOPY(out, from, len);
|
||||
}
|
||||
|
||||
/* Perform short copies until distance can be rewritten as being at least
|
||||
sizeof chunk_t.
|
||||
|
||||
This assumes that it's OK to overwrite at least the first
|
||||
2*sizeof(chunk_t) bytes of output even if the copy is shorter than this.
|
||||
This assumption holds because inflate_fast() starts every iteration with at
|
||||
least 258 bytes of output space available (258 being the maximum length
|
||||
output from a single token; see inflate_fast()'s assumptions below). */
|
||||
Z_INTERNAL uint8_t* CHUNKUNROLL(uint8_t *out, unsigned *dist, unsigned *len) {
|
||||
unsigned char const *from = out - *dist;
|
||||
chunk_t chunk;
|
||||
while (*dist < *len && *dist < sizeof(chunk_t)) {
|
||||
loadchunk(from, &chunk);
|
||||
storechunk(out, &chunk);
|
||||
out += *dist;
|
||||
*len -= *dist;
|
||||
*dist += *dist;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/* Copy DIST bytes from OUT - DIST into OUT + DIST * k, for 0 <= k < LEN/DIST.
|
||||
Return OUT + LEN. */
|
||||
Z_INTERNAL uint8_t* CHUNKMEMSET(uint8_t *out, unsigned dist, unsigned len) {
|
||||
/* Debug performance related issues when len < sizeof(uint64_t):
|
||||
Assert(len >= sizeof(uint64_t), "chunkmemset should be called on larger chunks"); */
|
||||
Assert(dist > 0, "cannot have a distance 0");
|
||||
|
||||
unsigned char *from = out - dist;
|
||||
chunk_t chunk;
|
||||
unsigned sz = sizeof(chunk);
|
||||
if (len < sz) {
|
||||
do {
|
||||
*out++ = *from++;
|
||||
--len;
|
||||
} while (len != 0);
|
||||
return out;
|
||||
}
|
||||
|
||||
#ifdef HAVE_CHUNKMEMSET_1
|
||||
if (dist == 1) {
|
||||
chunkmemset_1(from, &chunk);
|
||||
} else
|
||||
#endif
|
||||
#ifdef HAVE_CHUNKMEMSET_2
|
||||
if (dist == 2) {
|
||||
chunkmemset_2(from, &chunk);
|
||||
} else
|
||||
#endif
|
||||
#ifdef HAVE_CHUNKMEMSET_4
|
||||
if (dist == 4) {
|
||||
chunkmemset_4(from, &chunk);
|
||||
} else
|
||||
#endif
|
||||
#ifdef HAVE_CHUNKMEMSET_8
|
||||
if (dist == 8) {
|
||||
chunkmemset_8(from, &chunk);
|
||||
} else
|
||||
#endif
|
||||
if (dist == sz) {
|
||||
loadchunk(from, &chunk);
|
||||
} else if (dist < sz) {
|
||||
unsigned char *end = out + len - 1;
|
||||
while (len > dist) {
|
||||
out = CHUNKCOPY_SAFE(out, from, dist, end);
|
||||
len -= dist;
|
||||
}
|
||||
if (len > 0) {
|
||||
out = CHUNKCOPY_SAFE(out, from, len, end);
|
||||
}
|
||||
return out;
|
||||
} else {
|
||||
out = CHUNKUNROLL(out, &dist, &len);
|
||||
return CHUNKCOPY(out, out - dist, len);
|
||||
}
|
||||
|
||||
unsigned rem = len % sz;
|
||||
len -= rem;
|
||||
while (len) {
|
||||
storechunk(out, &chunk);
|
||||
out += sz;
|
||||
len -= sz;
|
||||
}
|
||||
|
||||
/* Last, deal with the case when LEN is not a multiple of SZ. */
|
||||
if (rem)
|
||||
memcpy(out, from, rem);
|
||||
out += rem;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
Z_INTERNAL uint8_t* CHUNKMEMSET_SAFE(uint8_t *out, unsigned dist, unsigned len, unsigned left) {
|
||||
if (left < (unsigned)(3 * sizeof(chunk_t))) {
|
||||
while (len > 0) {
|
||||
*out = *(out - dist);
|
||||
out++;
|
||||
--len;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
return CHUNKMEMSET(out, dist, len);
|
||||
}
|
||||
99
libs/zlibng/cmake/detect-arch.c
Normal file
99
libs/zlibng/cmake/detect-arch.c
Normal file
@ -0,0 +1,99 @@
|
||||
// archdetect.c -- Detect compiler architecture and raise preprocessor error
|
||||
// containing a simple arch identifier.
|
||||
// Copyright (C) 2019 Hans Kristian Rosbach
|
||||
// Licensed under the Zlib license, see LICENSE.md for details
|
||||
|
||||
// x86_64
|
||||
#if defined(__x86_64__) || defined(_M_X64)
|
||||
#error archfound x86_64
|
||||
|
||||
// x86
|
||||
#elif defined(__i386) || defined(_M_IX86)
|
||||
#error archfound i686
|
||||
|
||||
// ARM
|
||||
#elif defined(__aarch64__) || defined(_M_ARM64)
|
||||
#error archfound aarch64
|
||||
#elif defined(__arm__) || defined(__arm) || defined(_M_ARM) || defined(__TARGET_ARCH_ARM)
|
||||
#if defined(__ARM64_ARCH_8__) || defined(__ARMv8__) || defined(__ARMv8_A__)
|
||||
#error archfound armv8
|
||||
#elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__)
|
||||
#error archfound armv7
|
||||
#elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6T2__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6M__)
|
||||
#error archfound armv6
|
||||
#elif defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
|
||||
#error archfound armv5
|
||||
#elif defined(__ARM_ARCH_4T__) || defined(__TARGET_ARCH_5E__)
|
||||
#error archfound armv4
|
||||
#elif defined(__ARM_ARCH_3__) || defined(__TARGET_ARCH_3M__)
|
||||
#error archfound armv3
|
||||
#elif defined(__ARM_ARCH_2__)
|
||||
#error archfound armv2
|
||||
#endif
|
||||
|
||||
// PowerPC
|
||||
#elif defined(__powerpc__) || defined(_ppc__) || defined(__PPC__)
|
||||
#if defined(__64BIT__) || defined(__powerpc64__) || defined(__ppc64__)
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#error archfound powerpc64le
|
||||
#else
|
||||
#error archfound powerpc64
|
||||
#endif
|
||||
#else
|
||||
#error archfound powerpc
|
||||
#endif
|
||||
|
||||
// --------------- Less common architectures alphabetically below ---------------
|
||||
|
||||
// ALPHA
|
||||
#elif defined(__alpha__) || defined(__alpha)
|
||||
#error archfound alpha
|
||||
|
||||
// Blackfin
|
||||
#elif defined(__BFIN__)
|
||||
#error archfound blackfin
|
||||
|
||||
// Itanium
|
||||
#elif defined(__ia64) || defined(_M_IA64)
|
||||
#error archfound ia64
|
||||
|
||||
// MIPS
|
||||
#elif defined(__mips__) || defined(__mips)
|
||||
#error archfound mips
|
||||
|
||||
// Motorola 68000-series
|
||||
#elif defined(__m68k__)
|
||||
#error archfound m68k
|
||||
|
||||
// SuperH
|
||||
#elif defined(__sh__)
|
||||
#error archfound sh
|
||||
|
||||
// SPARC
|
||||
#elif defined(__sparc__) || defined(__sparc)
|
||||
#if defined(__sparcv9) || defined(__sparc_v9__)
|
||||
#error archfound sparc9
|
||||
#elif defined(__sparcv8) || defined(__sparc_v8__)
|
||||
#error archfound sparc8
|
||||
#endif
|
||||
|
||||
// SystemZ
|
||||
#elif defined(__370__)
|
||||
#error archfound s370
|
||||
#elif defined(__s390__)
|
||||
#error archfound s390
|
||||
#elif defined(__s390x) || defined(__zarch__)
|
||||
#error archfound s390x
|
||||
|
||||
// PARISC
|
||||
#elif defined(__hppa__)
|
||||
#error archfound parisc
|
||||
|
||||
// RS-6000
|
||||
#elif defined(__THW_RS6000)
|
||||
#error archfound rs6000
|
||||
|
||||
// return 'unrecognized' if we do not know what architecture this is
|
||||
#else
|
||||
#error archfound unrecognized
|
||||
#endif
|
||||
93
libs/zlibng/cmake/detect-arch.cmake
Normal file
93
libs/zlibng/cmake/detect-arch.cmake
Normal file
@ -0,0 +1,93 @@
|
||||
# detect-arch.cmake -- Detect compiler architecture and set ARCH and BASEARCH
|
||||
# Copyright (C) 2019 Hans Kristian Rosbach
|
||||
# Licensed under the Zlib license, see LICENSE.md for details
|
||||
set(ARCHDETECT_FOUND TRUE)
|
||||
|
||||
if(CMAKE_OSX_ARCHITECTURES)
|
||||
# If multiple architectures are requested (universal build), pick only the first
|
||||
list(GET CMAKE_OSX_ARCHITECTURES 0 ARCH)
|
||||
elseif(MSVC)
|
||||
if("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "X86")
|
||||
set(ARCH "i686")
|
||||
elseif("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "x64")
|
||||
set(ARCH "x86_64")
|
||||
elseif("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM" OR "${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARMV7")
|
||||
set(ARCH "arm")
|
||||
elseif ("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM64")
|
||||
set(ARCH "aarch64")
|
||||
endif()
|
||||
elseif(CMAKE_CROSSCOMPILING)
|
||||
set(ARCH ${CMAKE_C_COMPILER_TARGET})
|
||||
else()
|
||||
# Let preprocessor parse archdetect.c and raise an error containing the arch identifier
|
||||
enable_language(C)
|
||||
try_run(
|
||||
run_result_unused
|
||||
compile_result_unused
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmake/detect-arch.c
|
||||
COMPILE_OUTPUT_VARIABLE RAWOUTPUT
|
||||
CMAKE_FLAGS CMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES}
|
||||
)
|
||||
|
||||
# Find basearch tag, and extract the arch word into BASEARCH variable
|
||||
string(REGEX REPLACE ".*archfound ([a-zA-Z0-9_]+).*" "\\1" ARCH "${RAWOUTPUT}")
|
||||
if(NOT ARCH)
|
||||
set(ARCH unknown)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Make sure we have ARCH set
|
||||
if(NOT ARCH OR ARCH STREQUAL "unknown")
|
||||
set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
|
||||
message(STATUS "Arch not recognized, falling back to cmake arch: '${ARCH}'")
|
||||
else()
|
||||
message(STATUS "Arch detected: '${ARCH}'")
|
||||
endif()
|
||||
|
||||
# Base arch detection
|
||||
if("${ARCH}" MATCHES "(x86_64|AMD64|i[3-6]86)")
|
||||
set(BASEARCH "x86")
|
||||
set(BASEARCH_X86_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "(arm(v[0-9])?|aarch64)")
|
||||
set(BASEARCH "arm")
|
||||
set(BASEARCH_ARM_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "ppc(64(le)?)?|powerpc(64(le)?)?")
|
||||
set(BASEARCH "ppc")
|
||||
set(BASEARCH_PPC_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "alpha")
|
||||
set(BASEARCH "alpha")
|
||||
set(BASEARCH_ALPHA_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "blackfin")
|
||||
set(BASEARCH "blackfin")
|
||||
set(BASEARCH_BLACKFIN_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "ia64")
|
||||
set(BASEARCH "ia64")
|
||||
set(BASEARCH_IA64_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "mips")
|
||||
set(BASEARCH "mips")
|
||||
set(BASEARCH_MIPS_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "m68k")
|
||||
set(BASEARCH "m68k")
|
||||
set(BASEARCH_M68K_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "sh")
|
||||
set(BASEARCH "sh")
|
||||
set(BASEARCH_SH_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "sparc[89]?")
|
||||
set(BASEARCH "sparc")
|
||||
set(BASEARCH_SPARC_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "s3[679]0x?")
|
||||
set(BASEARCH "s360")
|
||||
set(BASEARCH_S360_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "parisc")
|
||||
set(BASEARCH "parisc")
|
||||
set(BASEARCH_PARISC_FOUND TRUE)
|
||||
elseif("${ARCH}" MATCHES "rs6000")
|
||||
set(BASEARCH "rs6000")
|
||||
set(BASEARCH_RS6000_FOUND TRUE)
|
||||
else()
|
||||
set(BASEARCH "x86")
|
||||
set(BASEARCH_X86_FOUND TRUE)
|
||||
message(STATUS "Basearch '${ARCH}' not recognized, defaulting to 'x86'.")
|
||||
endif()
|
||||
message(STATUS "Basearch of '${ARCH}' has been detected as: '${BASEARCH}'")
|
||||
123
libs/zlibng/cmake/detect-sanitizer.cmake
Normal file
123
libs/zlibng/cmake/detect-sanitizer.cmake
Normal file
@ -0,0 +1,123 @@
|
||||
# detect-sanitizer.cmake -- Detect supported compiler sanitizer flags
|
||||
# Licensed under the Zlib license, see LICENSE.md for details
|
||||
|
||||
macro(check_sanitizer_support known_checks supported_checks)
|
||||
set(available_checks "")
|
||||
|
||||
# Build list of supported sanitizer flags by incrementally trying compilation with
|
||||
# known sanitizer checks
|
||||
|
||||
foreach(check ${known_checks})
|
||||
if(available_checks STREQUAL "")
|
||||
set(compile_checks "${check}")
|
||||
else()
|
||||
set(compile_checks "${available_checks},${check}")
|
||||
endif()
|
||||
|
||||
set(CMAKE_REQUIRED_FLAGS "-fsanitize=${compile_checks}")
|
||||
|
||||
check_c_source_compiles("int main() { return 0; }" HAS_SANITIZER_${check}
|
||||
FAIL_REGEX "not supported|unrecognized command|unknown option")
|
||||
|
||||
set(CMAKE_REQUIRED_FLAGS)
|
||||
|
||||
if(HAS_SANITIZER_${check})
|
||||
set(available_checks ${compile_checks})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
set(${supported_checks} ${available_checks})
|
||||
endmacro()
|
||||
|
||||
macro(add_address_sanitizer)
|
||||
set(known_checks
|
||||
address
|
||||
pointer-compare
|
||||
pointer-subtract
|
||||
)
|
||||
|
||||
check_sanitizer_support("${known_checks}" supported_checks)
|
||||
if(NOT ${supported_checks} STREQUAL "")
|
||||
message(STATUS "Address sanitizer is enabled: ${supported_checks}")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}")
|
||||
else()
|
||||
message(STATUS "Address sanitizer is not supported")
|
||||
endif()
|
||||
|
||||
if(CMAKE_CROSSCOMPILING_EMULATOR)
|
||||
# Only check for leak sanitizer if not cross-compiling due to qemu crash
|
||||
message(WARNING "Leak sanitizer is not supported when cross compiling")
|
||||
else()
|
||||
# Leak sanitizer requires address sanitizer
|
||||
check_sanitizer_support("leak" supported_checks)
|
||||
if(NOT ${supported_checks} STREQUAL "")
|
||||
message(STATUS "Leak sanitizer is enabled: ${supported_checks}")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}")
|
||||
else()
|
||||
message(STATUS "Leak sanitizer is not supported")
|
||||
endif()
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(add_memory_sanitizer)
|
||||
check_sanitizer_support("memory" supported_checks)
|
||||
if(NOT ${supported_checks} STREQUAL "")
|
||||
message(STATUS "Memory sanitizer is enabled: ${supported_checks}")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}")
|
||||
else()
|
||||
message(STATUS "Memory sanitizer is not supported")
|
||||
endif()
|
||||
endmacro()
|
||||
|
||||
macro(add_undefined_sanitizer)
|
||||
set(known_checks
|
||||
array-bounds
|
||||
bool
|
||||
bounds
|
||||
builtin
|
||||
enum
|
||||
float-cast-overflow
|
||||
float-divide-by-zero
|
||||
function
|
||||
integer-divide-by-zero
|
||||
local-bounds
|
||||
null
|
||||
nonnull-attribute
|
||||
pointer-overflow
|
||||
return
|
||||
returns-nonnull-attribute
|
||||
shift
|
||||
shift-base
|
||||
shift-exponent
|
||||
signed-integer-overflow
|
||||
undefined
|
||||
unsigned-integer-overflow
|
||||
unsigned-shift-base
|
||||
vla-bound
|
||||
vptr
|
||||
)
|
||||
|
||||
# Only check for alignment sanitizer flag if unaligned access is not supported
|
||||
if(NOT UNALIGNED_OK)
|
||||
list(APPEND known_checks alignment)
|
||||
endif()
|
||||
# Object size sanitizer has no effect at -O0 and produces compiler warning if enabled
|
||||
if(NOT CMAKE_C_FLAGS MATCHES "-O0")
|
||||
list(APPEND known_checks object-size)
|
||||
endif()
|
||||
|
||||
check_sanitizer_support("${known_checks}" supported_checks)
|
||||
|
||||
if(NOT ${supported_checks} STREQUAL "")
|
||||
message(STATUS "Undefined behavior sanitizer is enabled: ${supported_checks}")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=${supported_checks}")
|
||||
|
||||
# Group sanitizer flag -fsanitize=undefined will automatically add alignment, even if
|
||||
# it is not in our sanitize flag list, so we need to explicitly disable alignment sanitizing.
|
||||
if(UNALIGNED_OK)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-sanitize=alignment")
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "UNdefined behavior sanitizer is not supported")
|
||||
endif()
|
||||
endmacro()
|
||||
48
libs/zlibng/cmake/run-and-compare.cmake
Normal file
48
libs/zlibng/cmake/run-and-compare.cmake
Normal file
@ -0,0 +1,48 @@
|
||||
if(NOT DEFINED OUTPUT OR NOT DEFINED COMPARE OR NOT DEFINED COMMAND)
|
||||
message(FATAL_ERROR "Run and compare arguments missing")
|
||||
endif()
|
||||
|
||||
if(INPUT)
|
||||
# Run command with stdin input and redirect stdout to output
|
||||
execute_process(COMMAND ${CMAKE_COMMAND}
|
||||
"-DCOMMAND=${COMMAND}"
|
||||
-DINPUT=${INPUT}
|
||||
-DOUTPUT=${OUTPUT}
|
||||
"-DSUCCESS_EXIT=${SUCCESS_EXIT}"
|
||||
-P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
|
||||
RESULT_VARIABLE CMD_RESULT)
|
||||
else()
|
||||
# Run command and redirect stdout to output
|
||||
execute_process(COMMAND ${CMAKE_COMMAND}
|
||||
"-DCOMMAND=${COMMAND}"
|
||||
-DOUTPUT=${OUTPUT}
|
||||
"-DSUCCESS_EXIT=${SUCCESS_EXIT}"
|
||||
-P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
|
||||
RESULT_VARIABLE CMD_RESULT)
|
||||
endif()
|
||||
|
||||
if(CMD_RESULT)
|
||||
message(FATAL_ERROR "Run before compare failed: ${CMD_RESULT}")
|
||||
endif()
|
||||
|
||||
# Use configure_file to normalize line-endings
|
||||
if(IGNORE_LINE_ENDINGS)
|
||||
configure_file(${COMPARE} ${COMPARE}.cmp NEWLINE_STYLE LF)
|
||||
set(COMPARE ${COMPARE}.cmp)
|
||||
configure_file(${OUTPUT} ${OUTPUT}.cmp NEWLINE_STYLE LF)
|
||||
set(OUTPUT ${OUTPUT}.cmp)
|
||||
endif()
|
||||
|
||||
# Compare that output is equal to specified file
|
||||
execute_process(COMMAND ${CMAKE_COMMAND}
|
||||
-E compare_files ${COMPARE} ${OUTPUT}
|
||||
RESULT_VARIABLE CMD_RESULT)
|
||||
|
||||
# Delete temporary files used to normalize line-endings
|
||||
if(IGNORE_LINE_ENDINGS)
|
||||
file(REMOVE ${COMPARE} ${OUTPUT})
|
||||
endif()
|
||||
|
||||
if(CMD_RESULT)
|
||||
message(FATAL_ERROR "Run compare failed: ${CMD_RESULT}")
|
||||
endif()
|
||||
38
libs/zlibng/cmake/run-and-redirect.cmake
Normal file
38
libs/zlibng/cmake/run-and-redirect.cmake
Normal file
@ -0,0 +1,38 @@
|
||||
# If no output is specified, discard output
|
||||
if(NOT DEFINED OUTPUT)
|
||||
if(WIN32)
|
||||
set(OUTPUT NUL)
|
||||
else()
|
||||
set(OUTPUT /dev/null)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(INPUT)
|
||||
# Check to see that input file exists
|
||||
if(NOT EXISTS ${INPUT})
|
||||
message(FATAL_ERROR "Cannot find input: ${INPUT}")
|
||||
endif()
|
||||
# Execute with both stdin and stdout file
|
||||
execute_process(COMMAND ${COMMAND}
|
||||
RESULT_VARIABLE CMD_RESULT
|
||||
INPUT_FILE ${INPUT}
|
||||
OUTPUT_FILE ${OUTPUT})
|
||||
else()
|
||||
# Execute with only stdout file
|
||||
execute_process(COMMAND ${COMMAND}
|
||||
RESULT_VARIABLE CMD_RESULT
|
||||
OUTPUT_FILE ${OUTPUT})
|
||||
endif()
|
||||
|
||||
# Check if exit code is in list of successful exit codes
|
||||
if(SUCCESS_EXIT)
|
||||
list(FIND SUCCESS_EXIT ${CMD_RESULT} _INDEX)
|
||||
if (${_INDEX} GREATER -1)
|
||||
set(CMD_RESULT 0)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Check to see if successful
|
||||
if(CMD_RESULT)
|
||||
message(FATAL_ERROR "${COMMAND} failed: ${CMD_RESULT}")
|
||||
endif()
|
||||
188
libs/zlibng/cmake/test-compress.cmake
Normal file
188
libs/zlibng/cmake/test-compress.cmake
Normal file
@ -0,0 +1,188 @@
|
||||
if(TARGET)
|
||||
set(COMPRESS_TARGET ${TARGET})
|
||||
set(DECOMPRESS_TARGET ${TARGET})
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED INPUT OR NOT DEFINED COMPRESS_TARGET OR NOT DEFINED DECOMPRESS_TARGET)
|
||||
message(FATAL_ERROR "Compress test arguments missing")
|
||||
endif()
|
||||
|
||||
# Set default values
|
||||
if(NOT DEFINED COMPARE)
|
||||
set(COMPARE ON)
|
||||
endif()
|
||||
if(NOT DEFINED COMPRESS_ARGS)
|
||||
set(COMPRESS_ARGS -c -k)
|
||||
endif()
|
||||
if(NOT DEFINED DECOMPRESS_ARGS)
|
||||
set(DECOMPRESS_ARGS -d -c)
|
||||
endif()
|
||||
if(NOT DEFINED GZIP_VERIFY)
|
||||
set(GZIP_VERIFY ON)
|
||||
endif()
|
||||
if(NOT DEFINED SUCCESS_EXIT)
|
||||
set(SUCCESS_EXIT 0)
|
||||
endif()
|
||||
|
||||
# Generate unique output path so multiple tests can be executed at the same time
|
||||
if(NOT OUTPUT)
|
||||
# Output name based on input and unique id
|
||||
string(RANDOM UNIQUE_ID)
|
||||
set(OUTPUT ${INPUT}-${UNIQUE_ID})
|
||||
else()
|
||||
# Output name appends unique id in case multiple tests with same output name
|
||||
string(RANDOM LENGTH 6 UNIQUE_ID)
|
||||
set(OUTPUT ${OUTPUT}-${UNIQUE_ID})
|
||||
endif()
|
||||
string(REPLACE ".gz" "" OUTPUT "${OUTPUT}")
|
||||
|
||||
macro(cleanup)
|
||||
# Cleanup temporary mingizip files
|
||||
file(REMOVE ${OUTPUT}.gz ${OUTPUT}.out)
|
||||
# Cleanup temporary gzip files
|
||||
file(REMOVE ${OUTPUT}.gzip.gz ${OUTPUT}.gzip.out)
|
||||
endmacro()
|
||||
|
||||
# Compress input file
|
||||
if(NOT EXISTS ${INPUT})
|
||||
message(FATAL_ERROR "Cannot find compress input: ${INPUT}")
|
||||
endif()
|
||||
|
||||
set(COMPRESS_COMMAND ${COMPRESS_TARGET} ${COMPRESS_ARGS})
|
||||
|
||||
execute_process(COMMAND ${CMAKE_COMMAND}
|
||||
"-DCOMMAND=${COMPRESS_COMMAND}"
|
||||
-DINPUT=${INPUT}
|
||||
-DOUTPUT=${OUTPUT}.gz
|
||||
"-DSUCCESS_EXIT=${SUCCESS_EXIT}"
|
||||
-P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
|
||||
RESULT_VARIABLE CMD_RESULT)
|
||||
|
||||
if(CMD_RESULT)
|
||||
cleanup()
|
||||
message(FATAL_ERROR "Compress failed: ${CMD_RESULT}")
|
||||
endif()
|
||||
|
||||
# Decompress output
|
||||
if(NOT EXISTS ${OUTPUT}.gz)
|
||||
cleanup()
|
||||
message(FATAL_ERROR "Cannot find decompress input: ${OUTPUT}.gz")
|
||||
endif()
|
||||
|
||||
set(DECOMPRESS_COMMAND ${DECOMPRESS_TARGET} ${DECOMPRESS_ARGS})
|
||||
|
||||
execute_process(COMMAND ${CMAKE_COMMAND}
|
||||
"-DCOMMAND=${DECOMPRESS_COMMAND}"
|
||||
-DINPUT=${OUTPUT}.gz
|
||||
-DOUTPUT=${OUTPUT}.out
|
||||
"-DSUCCESS_EXIT=${SUCCESS_EXIT}"
|
||||
-P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
|
||||
RESULT_VARIABLE CMD_RESULT)
|
||||
|
||||
if(CMD_RESULT)
|
||||
cleanup()
|
||||
message(FATAL_ERROR "Decompress failed: ${CMD_RESULT}")
|
||||
endif()
|
||||
|
||||
if(COMPARE)
|
||||
# Compare decompressed output with original input file
|
||||
execute_process(COMMAND ${CMAKE_COMMAND}
|
||||
-E compare_files ${INPUT} ${OUTPUT}.out
|
||||
RESULT_VARIABLE CMD_RESULT)
|
||||
|
||||
if(CMD_RESULT)
|
||||
cleanup()
|
||||
message(FATAL_ERROR "Compare minigzip decompress failed: ${CMD_RESULT}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(GZIP_VERIFY AND NOT "${COMPRESS_ARGS}" MATCHES "-T")
|
||||
# Transparent writing does not use gzip format
|
||||
find_program(GZIP gzip)
|
||||
if(GZIP)
|
||||
if(NOT EXISTS ${OUTPUT}.gz)
|
||||
cleanup()
|
||||
message(FATAL_ERROR "Cannot find gzip decompress input: ${OUTPUT}.gz")
|
||||
endif()
|
||||
|
||||
# Check gzip can decompress our compressed output
|
||||
set(GZ_DECOMPRESS_COMMAND ${GZIP} --decompress)
|
||||
|
||||
execute_process(COMMAND ${CMAKE_COMMAND}
|
||||
"-DCOMMAND=${GZ_DECOMPRESS_COMMAND}"
|
||||
-DINPUT=${OUTPUT}.gz
|
||||
-DOUTPUT=${OUTPUT}.gzip.out
|
||||
"-DSUCCESS_EXIT=${SUCCESS_EXIT}"
|
||||
-P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
|
||||
RESULT_VARIABLE CMD_RESULT)
|
||||
|
||||
if(CMD_RESULT)
|
||||
cleanup()
|
||||
message(FATAL_ERROR "Gzip decompress failed: ${CMD_RESULT}")
|
||||
endif()
|
||||
|
||||
# Compare gzip output with original input file
|
||||
execute_process(COMMAND ${CMAKE_COMMAND}
|
||||
-E compare_files ${INPUT} ${OUTPUT}.gzip.out
|
||||
RESULT_VARIABLE CMD_RESULT)
|
||||
|
||||
if(CMD_RESULT)
|
||||
cleanup()
|
||||
message(FATAL_ERROR "Compare gzip decompress failed: ${CMD_RESULT}")
|
||||
endif()
|
||||
|
||||
if(NOT EXISTS ${OUTPUT}.gz)
|
||||
cleanup()
|
||||
message(FATAL_ERROR "Cannot find gzip compress input: ${INPUT}")
|
||||
endif()
|
||||
|
||||
# Compress input file with gzip
|
||||
set(GZ_COMPRESS_COMMAND ${GZIP} --stdout)
|
||||
|
||||
execute_process(COMMAND ${CMAKE_COMMAND}
|
||||
"-DCOMMAND=${GZ_COMPRESS_COMMAND}"
|
||||
-DINPUT=${INPUT}
|
||||
-DOUTPUT=${OUTPUT}.gzip.gz
|
||||
"-DSUCCESS_EXIT=${SUCCESS_EXIT}"
|
||||
-P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
|
||||
RESULT_VARIABLE CMD_RESULT)
|
||||
|
||||
if(CMD_RESULT)
|
||||
cleanup()
|
||||
message(FATAL_ERROR "Gzip compress failed: ${CMD_RESULT}")
|
||||
endif()
|
||||
|
||||
if(NOT EXISTS ${OUTPUT}.gz)
|
||||
cleanup()
|
||||
message(FATAL_ERROR "Cannot find minigzip decompress input: ${OUTPUT}.gzip.gz")
|
||||
endif()
|
||||
|
||||
# Check minigzip can decompress gzip compressed output
|
||||
execute_process(COMMAND ${CMAKE_COMMAND}
|
||||
"-DCOMMAND=${DECOMPRESS_COMMAND}"
|
||||
-DINPUT=${OUTPUT}.gzip.gz
|
||||
-DOUTPUT=${OUTPUT}.gzip.out
|
||||
"-DSUCCESS_EXIT=${SUCCESS_EXIT}"
|
||||
-P ${CMAKE_CURRENT_LIST_DIR}/run-and-redirect.cmake
|
||||
RESULT_VARIABLE CMD_RESULT)
|
||||
|
||||
if(CMD_RESULT)
|
||||
cleanup()
|
||||
message(FATAL_ERROR "Minigzip decompress gzip failed: ${CMD_RESULT}")
|
||||
endif()
|
||||
|
||||
if(COMPARE)
|
||||
# Compare original input file with gzip decompressed output
|
||||
execute_process(COMMAND ${CMAKE_COMMAND}
|
||||
-E compare_files ${INPUT} ${OUTPUT}.gzip.out
|
||||
RESULT_VARIABLE CMD_RESULT)
|
||||
|
||||
if(CMD_RESULT)
|
||||
cleanup()
|
||||
message(FATAL_ERROR "Compare minigzip decompress gzip failed: ${CMD_RESULT}")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
cleanup()
|
||||
26
libs/zlibng/cmake/toolchain-aarch64.cmake
Normal file
26
libs/zlibng/cmake/toolchain-aarch64.cmake
Normal file
@ -0,0 +1,26 @@
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR aarch64)
|
||||
set(CMAKE_SYSTEM_VERSION 1)
|
||||
|
||||
message(STATUS "Using cross-compile toolchain: ${CROSS_COMPILE_TOOLCHAIN}")
|
||||
|
||||
set(CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu")
|
||||
set(CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu")
|
||||
|
||||
set(CMAKE_CROSSCOMPILING TRUE)
|
||||
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-aarch64 -L /usr/${CMAKE_C_COMPILER_TARGET}/)
|
||||
|
||||
SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
|
||||
find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
|
||||
if(NOT C_COMPILER_FULL_PATH)
|
||||
message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
|
||||
endif()
|
||||
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
|
||||
|
||||
find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
|
||||
if(CXX_COMPILER_FULL_PATH)
|
||||
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
|
||||
endif()
|
||||
24
libs/zlibng/cmake/toolchain-arm.cmake
Normal file
24
libs/zlibng/cmake/toolchain-arm.cmake
Normal file
@ -0,0 +1,24 @@
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR arm)
|
||||
set(CMAKE_SYSTEM_VERSION 1)
|
||||
|
||||
message(STATUS "Using cross-compile toolchain: ${CMAKE_C_COMPILER_TARGET}")
|
||||
|
||||
set(CMAKE_CROSSCOMPILING TRUE)
|
||||
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-arm -L /usr/${CMAKE_C_COMPILER_TARGET}/)
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
|
||||
|
||||
find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
|
||||
if(NOT C_COMPILER_FULL_PATH)
|
||||
message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
|
||||
endif()
|
||||
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
|
||||
|
||||
find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
|
||||
if(CXX_COMPILER_FULL_PATH)
|
||||
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
|
||||
endif()
|
||||
16
libs/zlibng/cmake/toolchain-mingw-i686.cmake
Normal file
16
libs/zlibng/cmake/toolchain-mingw-i686.cmake
Normal file
@ -0,0 +1,16 @@
|
||||
set(CMAKE_SYSTEM_NAME Windows)
|
||||
|
||||
set(CMAKE_C_COMPILER_TARGET i686)
|
||||
set(CMAKE_CXX_COMPILER_TARGET i686)
|
||||
|
||||
set(CMAKE_C_COMPILER i686-w64-mingw32-gcc)
|
||||
set(CMAKE_CXX_COMPILER i686-w64-mingw32-g++)
|
||||
set(CMAKE_RC_COMPILER i686-w64-mingw32-windres)
|
||||
|
||||
set(CMAKE_CROSSCOMPILING TRUE)
|
||||
set(CMAKE_CROSSCOMPILING_EMULATOR wine)
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
16
libs/zlibng/cmake/toolchain-mingw-x86_64.cmake
Normal file
16
libs/zlibng/cmake/toolchain-mingw-x86_64.cmake
Normal file
@ -0,0 +1,16 @@
|
||||
set(CMAKE_SYSTEM_NAME Windows)
|
||||
|
||||
set(CMAKE_C_COMPILER_TARGET x86_64)
|
||||
set(CMAKE_CXX_COMPILER_TARGET x86_64)
|
||||
|
||||
set(CMAKE_C_COMPILER x86_64-w64-mingw32-gcc)
|
||||
set(CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++)
|
||||
set(CMAKE_RC_COMPILER x86_64-w64-mingw32-windres)
|
||||
|
||||
set(CMAKE_CROSSCOMPILING TRUE)
|
||||
set(CMAKE_CROSSCOMPILING_EMULATOR wine)
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
25
libs/zlibng/cmake/toolchain-powerpc.cmake
Normal file
25
libs/zlibng/cmake/toolchain-powerpc.cmake
Normal file
@ -0,0 +1,25 @@
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR powerpc)
|
||||
set(CMAKE_SYSTEM_VERSION 1)
|
||||
|
||||
set(CMAKE_C_COMPILER_TARGET "powerpc-linux-gnu")
|
||||
set(CMAKE_CXX_COMPILER_TARGET "powerpc-linux-gnu")
|
||||
|
||||
set(CMAKE_CROSSCOMPILING TRUE)
|
||||
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc -L /usr/${CMAKE_C_COMPILER_TARGET}/)
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
|
||||
|
||||
find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
|
||||
if(NOT C_COMPILER_FULL_PATH)
|
||||
message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
|
||||
endif()
|
||||
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
|
||||
|
||||
find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
|
||||
if(CXX_COMPILER_FULL_PATH)
|
||||
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
|
||||
endif()
|
||||
25
libs/zlibng/cmake/toolchain-powerpc64.cmake
Normal file
25
libs/zlibng/cmake/toolchain-powerpc64.cmake
Normal file
@ -0,0 +1,25 @@
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR ppc64)
|
||||
set(CMAKE_SYSTEM_VERSION 1)
|
||||
|
||||
set(CMAKE_C_COMPILER_TARGET "powerpc64-linux-gnu")
|
||||
set(CMAKE_CXX_COMPILER_TARGET "powerpc64-linux-gnu")
|
||||
|
||||
set(CMAKE_CROSSCOMPILING TRUE)
|
||||
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc64 -L /usr/${CMAKE_C_COMPILER_TARGET}/)
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
|
||||
|
||||
find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
|
||||
if(NOT C_COMPILER_FULL_PATH)
|
||||
message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
|
||||
endif()
|
||||
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
|
||||
|
||||
find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
|
||||
if(CXX_COMPILER_FULL_PATH)
|
||||
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
|
||||
endif()
|
||||
25
libs/zlibng/cmake/toolchain-powerpc64le.cmake
Normal file
25
libs/zlibng/cmake/toolchain-powerpc64le.cmake
Normal file
@ -0,0 +1,25 @@
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR ppc64le)
|
||||
set(CMAKE_SYSTEM_VERSION 1)
|
||||
|
||||
set(CMAKE_C_COMPILER_TARGET "powerpc64le-linux-gnu")
|
||||
set(CMAKE_CXX_COMPILER_TARGET "powerpc64le-linux-gnu")
|
||||
|
||||
set(CMAKE_CROSSCOMPILING TRUE)
|
||||
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-ppc64le -L /usr/${CMAKE_C_COMPILER_TARGET}/)
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
|
||||
|
||||
find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
|
||||
if(NOT C_COMPILER_FULL_PATH)
|
||||
message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
|
||||
endif()
|
||||
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
|
||||
|
||||
find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
|
||||
if(CXX_COMPILER_FULL_PATH)
|
||||
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
|
||||
endif()
|
||||
25
libs/zlibng/cmake/toolchain-s390x.cmake
Normal file
25
libs/zlibng/cmake/toolchain-s390x.cmake
Normal file
@ -0,0 +1,25 @@
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR s390x)
|
||||
set(CMAKE_SYSTEM_VERSION 1)
|
||||
|
||||
set(CMAKE_C_COMPILER_TARGET "s390x-linux-gnu")
|
||||
set(CMAKE_CXX_COMPILER_TARGET "s390x-linux-gnu")
|
||||
|
||||
set(CMAKE_CROSSCOMPILING TRUE)
|
||||
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-s390x -L /usr/${CMAKE_C_COMPILER_TARGET}/)
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
|
||||
|
||||
find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
|
||||
if(NOT C_COMPILER_FULL_PATH)
|
||||
message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
|
||||
endif()
|
||||
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
|
||||
|
||||
find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
|
||||
if(CXX_COMPILER_FULL_PATH)
|
||||
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
|
||||
endif()
|
||||
25
libs/zlibng/cmake/toolchain-sparc64.cmake
Normal file
25
libs/zlibng/cmake/toolchain-sparc64.cmake
Normal file
@ -0,0 +1,25 @@
|
||||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR sparc64)
|
||||
set(CMAKE_SYSTEM_VERSION 1)
|
||||
|
||||
set(CMAKE_C_COMPILER_TARGET "sparc64-linux-gnu")
|
||||
set(CMAKE_CXX_COMPILER_TARGET "sparc64-linux-gnu")
|
||||
|
||||
set(CMAKE_CROSSCOMPILING TRUE)
|
||||
set(CMAKE_CROSSCOMPILING_EMULATOR qemu-sparc64 -L /usr/${CMAKE_C_COMPILER_TARGET}/)
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
|
||||
|
||||
find_program(C_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-gcc)
|
||||
if(NOT C_COMPILER_FULL_PATH)
|
||||
message(FATAL_ERROR "Cross-compiler ${CMAKE_C_COMPILER_TARGET}-gcc not found")
|
||||
endif()
|
||||
set(CMAKE_C_COMPILER ${C_COMPILER_FULL_PATH})
|
||||
|
||||
find_program(CXX_COMPILER_FULL_PATH ${CMAKE_C_COMPILER_TARGET}-g++)
|
||||
if(CXX_COMPILER_FULL_PATH)
|
||||
set(CMAKE_CXX_COMPILER ${CXX_COMPILER_FULL_PATH})
|
||||
endif()
|
||||
186
libs/zlibng/compare258.c
Normal file
186
libs/zlibng/compare258.c
Normal file
@ -0,0 +1,186 @@
|
||||
/* compare258.c -- aligned and unaligned versions of compare258
|
||||
* Copyright (C) 2020 Nathan Moinvaziri
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zutil.h"
|
||||
|
||||
#include "fallback_builtins.h"
|
||||
|
||||
/* ALIGNED, byte comparison */
|
||||
static inline uint32_t compare256_c_static(const unsigned char *src0, const unsigned char *src1) {
|
||||
uint32_t len = 0;
|
||||
|
||||
do {
|
||||
if (*src0 != *src1)
|
||||
return len + (*src0 == *src1);
|
||||
src0 += 1, src1 += 1, len += 1;
|
||||
if (*src0 != *src1)
|
||||
return len + (*src0 == *src1);
|
||||
src0 += 1, src1 += 1, len += 1;
|
||||
if (*src0 != *src1)
|
||||
return len + (*src0 == *src1);
|
||||
src0 += 1, src1 += 1, len += 1;
|
||||
if (*src0 != *src1)
|
||||
return len + (*src0 == *src1);
|
||||
src0 += 1, src1 += 1, len += 1;
|
||||
if (*src0 != *src1)
|
||||
return len + (*src0 == *src1);
|
||||
src0 += 1, src1 += 1, len += 1;
|
||||
if (*src0 != *src1)
|
||||
return len + (*src0 == *src1);
|
||||
src0 += 1, src1 += 1, len += 1;
|
||||
if (*src0 != *src1)
|
||||
return len + (*src0 == *src1);
|
||||
src0 += 1, src1 += 1, len += 1;
|
||||
if (*src0 != *src1)
|
||||
return len + (*src0 == *src1);
|
||||
src0 += 1, src1 += 1, len += 1;
|
||||
} while (len < 256);
|
||||
|
||||
return 256;
|
||||
}
|
||||
|
||||
static inline uint32_t compare258_c_static(const unsigned char *src0, const unsigned char *src1) {
|
||||
if (*src0 != *src1)
|
||||
return 0;
|
||||
src0 += 1, src1 += 1;
|
||||
if (*src0 != *src1)
|
||||
return 1;
|
||||
src0 += 1, src1 += 1;
|
||||
|
||||
return compare256_c_static(src0, src1) + 2;
|
||||
}
|
||||
|
||||
Z_INTERNAL uint32_t compare258_c(const unsigned char *src0, const unsigned char *src1) {
|
||||
return compare258_c_static(src0, src1);
|
||||
}
|
||||
|
||||
#define LONGEST_MATCH longest_match_c
|
||||
#define COMPARE256 compare256_c_static
|
||||
#define COMPARE258 compare258_c_static
|
||||
|
||||
#include "match_tpl.h"
|
||||
|
||||
#ifdef UNALIGNED_OK
|
||||
/* UNALIGNED_OK, 16-bit integer comparison */
|
||||
static inline uint32_t compare256_unaligned_16_static(const unsigned char *src0, const unsigned char *src1) {
|
||||
uint32_t len = 0;
|
||||
|
||||
do {
|
||||
if (*(uint16_t *)src0 != *(uint16_t *)src1)
|
||||
return len + (*src0 == *src1);
|
||||
src0 += 2, src1 += 2, len += 2;
|
||||
if (*(uint16_t *)src0 != *(uint16_t *)src1)
|
||||
return len + (*src0 == *src1);
|
||||
src0 += 2, src1 += 2, len += 2;
|
||||
if (*(uint16_t *)src0 != *(uint16_t *)src1)
|
||||
return len + (*src0 == *src1);
|
||||
src0 += 2, src1 += 2, len += 2;
|
||||
if (*(uint16_t *)src0 != *(uint16_t *)src1)
|
||||
return len + (*src0 == *src1);
|
||||
src0 += 2, src1 += 2, len += 2;
|
||||
} while (len < 256);
|
||||
|
||||
return 256;
|
||||
}
|
||||
|
||||
static inline uint32_t compare258_unaligned_16_static(const unsigned char *src0, const unsigned char *src1) {
|
||||
if (*(uint16_t *)src0 != *(uint16_t *)src1)
|
||||
return (*src0 == *src1);
|
||||
|
||||
return compare256_unaligned_16_static(src0+2, src1+2) + 2;
|
||||
}
|
||||
|
||||
Z_INTERNAL uint32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char *src1) {
|
||||
return compare258_unaligned_16_static(src0, src1);
|
||||
}
|
||||
|
||||
#define LONGEST_MATCH longest_match_unaligned_16
|
||||
#define COMPARE256 compare256_unaligned_16_static
|
||||
#define COMPARE258 compare258_unaligned_16_static
|
||||
|
||||
#include "match_tpl.h"
|
||||
|
||||
#ifdef HAVE_BUILTIN_CTZ
|
||||
/* UNALIGNED_OK, 32-bit integer comparison */
|
||||
static inline uint32_t compare256_unaligned_32_static(const unsigned char *src0, const unsigned char *src1) {
|
||||
uint32_t len = 0;
|
||||
|
||||
do {
|
||||
uint32_t sv = *(uint32_t *)src0;
|
||||
uint32_t mv = *(uint32_t *)src1;
|
||||
uint32_t diff = sv ^ mv;
|
||||
|
||||
if (diff) {
|
||||
uint32_t match_byte = __builtin_ctz(diff) / 8;
|
||||
return len + match_byte;
|
||||
}
|
||||
|
||||
src0 += 4, src1 += 4, len += 4;
|
||||
} while (len < 256);
|
||||
|
||||
return 256;
|
||||
}
|
||||
|
||||
static inline uint32_t compare258_unaligned_32_static(const unsigned char *src0, const unsigned char *src1) {
|
||||
if (*(uint16_t *)src0 != *(uint16_t *)src1)
|
||||
return (*src0 == *src1);
|
||||
|
||||
return compare256_unaligned_32_static(src0+2, src1+2) + 2;
|
||||
}
|
||||
|
||||
Z_INTERNAL uint32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char *src1) {
|
||||
return compare258_unaligned_32_static(src0, src1);
|
||||
}
|
||||
|
||||
#define LONGEST_MATCH longest_match_unaligned_32
|
||||
#define COMPARE256 compare256_unaligned_32_static
|
||||
#define COMPARE258 compare258_unaligned_32_static
|
||||
|
||||
#include "match_tpl.h"
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
||||
/* UNALIGNED64_OK, 64-bit integer comparison */
|
||||
static inline uint32_t compare256_unaligned_64_static(const unsigned char *src0, const unsigned char *src1) {
|
||||
uint32_t len = 0;
|
||||
|
||||
do {
|
||||
uint64_t sv = *(uint64_t *)src0;
|
||||
uint64_t mv = *(uint64_t *)src1;
|
||||
uint64_t diff = sv ^ mv;
|
||||
|
||||
if (diff) {
|
||||
uint64_t match_byte = __builtin_ctzll(diff) / 8;
|
||||
return len + (uint32_t)match_byte;
|
||||
}
|
||||
|
||||
src0 += 8, src1 += 8, len += 8;
|
||||
} while (len < 256);
|
||||
|
||||
return 256;
|
||||
}
|
||||
|
||||
static inline uint32_t compare258_unaligned_64_static(const unsigned char *src0, const unsigned char *src1) {
|
||||
if (*(uint16_t *)src0 != *(uint16_t *)src1)
|
||||
return (*src0 == *src1);
|
||||
|
||||
return compare256_unaligned_64_static(src0+2, src1+2) + 2;
|
||||
}
|
||||
|
||||
Z_INTERNAL uint32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char *src1) {
|
||||
return compare258_unaligned_64_static(src0, src1);
|
||||
}
|
||||
|
||||
#define LONGEST_MATCH longest_match_unaligned_64
|
||||
#define COMPARE256 compare256_unaligned_64_static
|
||||
#define COMPARE258 compare258_unaligned_64_static
|
||||
|
||||
#include "match_tpl.h"
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@ -3,14 +3,12 @@
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
/* @(#) $Id$ */
|
||||
|
||||
#define ZLIB_INTERNAL
|
||||
#include "zbuild.h"
|
||||
#if defined(ZLIB_COMPAT)
|
||||
# include "zlib.h"
|
||||
# include "zlib.h"
|
||||
#else
|
||||
# include "zlib-ng.h"
|
||||
# include "zlib-ng.h"
|
||||
#endif
|
||||
|
||||
/* ===========================================================================
|
||||
@ -24,7 +22,7 @@
|
||||
memory, Z_BUF_ERROR if there was not enough room in the output buffer,
|
||||
Z_STREAM_ERROR if the level parameter is invalid.
|
||||
*/
|
||||
int ZEXPORT PREFIX(compress2)(unsigned char *dest, z_size_t *destLen, const unsigned char *source,
|
||||
int Z_EXPORT PREFIX(compress2)(unsigned char *dest, z_size_t *destLen, const unsigned char *source,
|
||||
z_size_t sourceLen, int level) {
|
||||
PREFIX3(stream) stream;
|
||||
int err;
|
||||
@ -44,7 +42,7 @@ int ZEXPORT PREFIX(compress2)(unsigned char *dest, z_size_t *destLen, const unsi
|
||||
|
||||
stream.next_out = dest;
|
||||
stream.avail_out = 0;
|
||||
stream.next_in = (const unsigned char *)source;
|
||||
stream.next_in = (z_const unsigned char *)source;
|
||||
stream.avail_in = 0;
|
||||
|
||||
do {
|
||||
@ -66,7 +64,7 @@ int ZEXPORT PREFIX(compress2)(unsigned char *dest, z_size_t *destLen, const unsi
|
||||
|
||||
/* ===========================================================================
|
||||
*/
|
||||
int ZEXPORT PREFIX(compress)(unsigned char *dest, z_size_t *destLen, const unsigned char *source, z_size_t sourceLen) {
|
||||
int Z_EXPORT PREFIX(compress)(unsigned char *dest, z_size_t *destLen, const unsigned char *source, z_size_t sourceLen) {
|
||||
return PREFIX(compress2)(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION);
|
||||
}
|
||||
|
||||
@ -74,6 +72,12 @@ int ZEXPORT PREFIX(compress)(unsigned char *dest, z_size_t *destLen, const unsig
|
||||
If the default memLevel or windowBits for deflateInit() is changed, then
|
||||
this function needs to be updated.
|
||||
*/
|
||||
z_size_t ZEXPORT PREFIX(compressBound)(z_size_t sourceLen) {
|
||||
z_size_t Z_EXPORT PREFIX(compressBound)(z_size_t sourceLen) {
|
||||
#ifndef NO_QUICK_STRATEGY
|
||||
/* Quick deflate strategy worse case is 9 bits per literal, rounded to nearest byte,
|
||||
plus the size of block & gzip headers and footers */
|
||||
return sourceLen + ((sourceLen + 13 + 7) >> 3) + 18;
|
||||
#else
|
||||
return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + (sourceLen >> 25) + 13;
|
||||
#endif
|
||||
}
|
||||
|
||||
682
libs/zlibng/configure
vendored
682
libs/zlibng/configure
vendored
File diff suppressed because it is too large
Load Diff
@ -9,251 +9,40 @@
|
||||
* factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
|
||||
*/
|
||||
|
||||
/* @(#) $Id$ */
|
||||
|
||||
# include "zbuild.h"
|
||||
# include "gzendian.h"
|
||||
# include <inttypes.h>
|
||||
|
||||
/*
|
||||
Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
|
||||
protection on the static variables used to control the first-use generation
|
||||
of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should
|
||||
first call get_crc_table() to initialize the tables before allowing more than
|
||||
one thread to use crc32().
|
||||
|
||||
DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h. A main()
|
||||
routine is also produced, so that this one source file can be compiled to an
|
||||
executable.
|
||||
*/
|
||||
|
||||
#ifdef MAKECRCH
|
||||
# include <stdio.h>
|
||||
# ifndef DYNAMIC_CRC_TABLE
|
||||
# define DYNAMIC_CRC_TABLE
|
||||
# endif /* !DYNAMIC_CRC_TABLE */
|
||||
#endif /* MAKECRCH */
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "zendian.h"
|
||||
#include <inttypes.h>
|
||||
#include "deflate.h"
|
||||
#include "functable.h"
|
||||
|
||||
|
||||
/* Local functions for crc concatenation */
|
||||
#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */
|
||||
static uint32_t gf2_matrix_times(const uint32_t *mat, uint32_t vec);
|
||||
static uint32_t crc32_combine_(uint32_t crc1, uint32_t crc2, z_off64_t len2);
|
||||
static void crc32_combine_gen_(uint32_t *op, z_off64_t len2);
|
||||
|
||||
/* ========================================================================= */
|
||||
static uint32_t gf2_matrix_times(const uint32_t *mat, uint32_t vec) {
|
||||
uint32_t sum = 0;
|
||||
while (vec) {
|
||||
if (vec & 1)
|
||||
sum ^= *mat;
|
||||
vec >>= 1;
|
||||
mat++;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
#ifdef DYNAMIC_CRC_TABLE
|
||||
volatile int crc_table_empty = 1;
|
||||
static uint32_t crc_table[8][256];
|
||||
static uint32_t crc_comb[GF2_DIM][GF2_DIM];
|
||||
void make_crc_table(void);
|
||||
static void gf2_matrix_square(uint32_t *square, const uint32_t *mat);
|
||||
#ifdef MAKECRCH
|
||||
static void write_table(FILE *, const uint32_t *, int);
|
||||
#endif /* MAKECRCH */
|
||||
|
||||
/* ========================================================================= */
|
||||
static void gf2_matrix_square(uint32_t *square, const uint32_t *mat) {
|
||||
int n;
|
||||
|
||||
for (n = 0; n < GF2_DIM; n++)
|
||||
square[n] = gf2_matrix_times(mat, mat[n]);
|
||||
}
|
||||
|
||||
/*
|
||||
Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
|
||||
x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.
|
||||
|
||||
Polynomials over GF(2) are represented in binary, one bit per coefficient,
|
||||
with the lowest powers in the most significant bit. Then adding polynomials
|
||||
is just exclusive-or, and multiplying a polynomial by x is a right shift by
|
||||
one. If we call the above polynomial p, and represent a byte as the
|
||||
polynomial q, also with the lowest power in the most significant bit (so the
|
||||
byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
|
||||
where a mod b means the remainder after dividing a by b.
|
||||
|
||||
This calculation is done using the shift-register method of multiplying and
|
||||
taking the remainder. The register is initialized to zero, and for each
|
||||
incoming bit, x^32 is added mod p to the register if the bit is a one (where
|
||||
x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
|
||||
x (which is shifting right by one and adding x^32 mod p if the bit shifted
|
||||
out is a one). We start with the highest power (least significant bit) of
|
||||
q and repeat for all eight bits of q.
|
||||
|
||||
The first table is simply the CRC of all possible eight bit values. This is
|
||||
all the information needed to generate CRCs on data a byte at a time for all
|
||||
combinations of CRC register values and incoming bytes. The remaining tables
|
||||
allow for word-at-a-time CRC calculation for both big-endian and little-
|
||||
endian machines, where a word is four bytes.
|
||||
*/
|
||||
void make_crc_table() {
|
||||
uint32_t c;
|
||||
int n, k;
|
||||
uint32_t poly; /* polynomial exclusive-or pattern */
|
||||
/* terms of polynomial defining this crc (except x^32): */
|
||||
static volatile int first = 1; /* flag to limit concurrent making */
|
||||
static const unsigned char p[] = {0, 1, 2, 4, 5, 7, 8, 10, 11, 12, 16, 22, 23, 26};
|
||||
|
||||
/* See if another task is already doing this (not thread-safe, but better
|
||||
than nothing -- significantly reduces duration of vulnerability in
|
||||
case the advice about DYNAMIC_CRC_TABLE is ignored) */
|
||||
if (first) {
|
||||
first = 0;
|
||||
|
||||
/* make exclusive-or pattern from polynomial (0xedb88320) */
|
||||
poly = 0;
|
||||
for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++)
|
||||
poly |= (uint32_t)1 << (31 - p[n]);
|
||||
|
||||
/* generate a crc for every 8-bit value */
|
||||
for (n = 0; n < 256; n++) {
|
||||
c = (uint32_t)n;
|
||||
for (k = 0; k < 8; k++)
|
||||
c = c & 1 ? poly ^ (c >> 1) : c >> 1;
|
||||
crc_table[0][n] = c;
|
||||
}
|
||||
|
||||
/* generate crc for each value followed by one, two, and three zeros,
|
||||
and then the byte reversal of those as well as the first table */
|
||||
for (n = 0; n < 256; n++) {
|
||||
c = crc_table[0][n];
|
||||
crc_table[4][n] = ZSWAP32(c);
|
||||
for (k = 1; k < 4; k++) {
|
||||
c = crc_table[0][c & 0xff] ^ (c >> 8);
|
||||
crc_table[k][n] = c;
|
||||
crc_table[k + 4][n] = ZSWAP32(c);
|
||||
}
|
||||
}
|
||||
|
||||
/* generate zero operators table for crc32_combine() */
|
||||
|
||||
/* generate the operator to apply a single zero bit to a CRC -- the
|
||||
first row adds the polynomial if the low bit is a 1, and the
|
||||
remaining rows shift the CRC right one bit */
|
||||
k = GF2_DIM - 3;
|
||||
crc_comb[k][0] = 0xedb88320UL; /* CRC-32 polynomial */
|
||||
uint32_t row = 1;
|
||||
for (n = 1; n < GF2_DIM; n++) {
|
||||
crc_comb[k][n] = row;
|
||||
row <<= 1;
|
||||
}
|
||||
|
||||
/* generate operators that apply 2, 4, and 8 zeros to a CRC, putting
|
||||
the last one, the operator for one zero byte, at the 0 position */
|
||||
gf2_matrix_square(crc_comb[k + 1], crc_comb[k]);
|
||||
gf2_matrix_square(crc_comb[k + 2], crc_comb[k + 1]);
|
||||
gf2_matrix_square(crc_comb[0], crc_comb[k + 2]);
|
||||
|
||||
/* generate operators for applying 2^n zero bytes to a CRC, filling out
|
||||
the remainder of the table -- the operators repeat after GF2_DIM
|
||||
values of n, so the table only needs GF2_DIM entries, regardless of
|
||||
the size of the length being processed */
|
||||
for (n = 1; n < k; n++)
|
||||
gf2_matrix_square(crc_comb[n], crc_comb[n - 1]);
|
||||
|
||||
/* mark tables as complete, in case someone else is waiting */
|
||||
crc_table_empty = 0;
|
||||
} else { /* not first */
|
||||
/* wait for the other guy to finish (not efficient, but rare) */
|
||||
while (crc_table_empty)
|
||||
{}
|
||||
}
|
||||
#ifdef MAKECRCH
|
||||
{
|
||||
FILE *out;
|
||||
|
||||
out = fopen("crc32.h", "w");
|
||||
if (out == NULL) return;
|
||||
|
||||
/* write out CRC table to crc32.h */
|
||||
fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n");
|
||||
fprintf(out, " * Generated automatically by crc32.c\n */\n\n");
|
||||
fprintf(out, "static const uint32_t ");
|
||||
fprintf(out, "crc_table[8][256] =\n{\n {\n");
|
||||
write_table(out, crc_table[0], 256);
|
||||
for (k = 1; k < 8; k++) {
|
||||
fprintf(out, " },\n {\n");
|
||||
write_table(out, crc_table[k], 256);
|
||||
}
|
||||
fprintf(out, " }\n};\n");
|
||||
|
||||
/* write out zero operator table to crc32.h */
|
||||
fprintf(out, "\nstatic const uint32_t ");
|
||||
fprintf(out, "crc_comb[%d][%d] =\n{\n {\n", GF2_DIM, GF2_DIM);
|
||||
write_table(out, crc_comb[0], GF2_DIM);
|
||||
for (k = 1; k < GF2_DIM; k++) {
|
||||
fprintf(out, " },\n {\n");
|
||||
write_table(out, crc_comb[k], GF2_DIM);
|
||||
}
|
||||
fprintf(out, " }\n};\n");
|
||||
fclose(out);
|
||||
}
|
||||
#endif /* MAKECRCH */
|
||||
}
|
||||
|
||||
#ifdef MAKECRCH
|
||||
static void write_table(FILE *out, const uint32_t *table, int k) {
|
||||
int n;
|
||||
|
||||
for (n = 0; n < k; n++)
|
||||
fprintf(out, "%s0x%08" PRIx32 "%s", n % 5 ? "" : " ",
|
||||
(uint32_t)(table[n]),
|
||||
n == k - 1 ? "\n" : (n % 5 == 4 ? ",\n" : ", "));
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
make_crc_table();
|
||||
return 0;
|
||||
}
|
||||
#endif /* MAKECRCH */
|
||||
|
||||
#else /* !DYNAMIC_CRC_TABLE */
|
||||
/* ========================================================================
|
||||
* Tables of CRC-32s of all single-byte values, made by make_crc_table(),
|
||||
* and tables of zero operator matrices for crc32_combine().
|
||||
*/
|
||||
#include "crc32.h"
|
||||
#endif /* DYNAMIC_CRC_TABLE */
|
||||
#include "crc32_tbl.h"
|
||||
|
||||
/* =========================================================================
|
||||
* This function can be used by asm versions of crc32()
|
||||
*/
|
||||
const uint32_t * ZEXPORT PREFIX(get_crc_table)(void) {
|
||||
#ifdef DYNAMIC_CRC_TABLE
|
||||
if (crc_table_empty)
|
||||
make_crc_table();
|
||||
#endif /* DYNAMIC_CRC_TABLE */
|
||||
const uint32_t * Z_EXPORT PREFIX(get_crc_table)(void) {
|
||||
return (const uint32_t *)crc_table;
|
||||
}
|
||||
|
||||
uint32_t ZEXPORT PREFIX(crc32_z)(uint32_t crc, const unsigned char *buf, size_t len) {
|
||||
#ifdef ZLIB_COMPAT
|
||||
unsigned long Z_EXPORT PREFIX(crc32_z)(unsigned long crc, const unsigned char *buf, size_t len) {
|
||||
if (buf == NULL) return 0;
|
||||
|
||||
return (unsigned long)functable.crc32((uint32_t)crc, buf, len);
|
||||
}
|
||||
#else
|
||||
uint32_t Z_EXPORT PREFIX(crc32_z)(uint32_t crc, const unsigned char *buf, size_t len) {
|
||||
if (buf == NULL) return 0;
|
||||
|
||||
return functable.crc32(crc, buf, len);
|
||||
}
|
||||
#endif
|
||||
/* ========================================================================= */
|
||||
#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
|
||||
#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
|
||||
#define DO4 DO1; DO1; DO1; DO1
|
||||
|
||||
/* ========================================================================= */
|
||||
ZLIB_INTERNAL uint32_t crc32_generic(uint32_t crc, const unsigned char *buf, uint64_t len)
|
||||
{
|
||||
Z_INTERNAL uint32_t crc32_generic(uint32_t crc, const unsigned char *buf, uint64_t len) {
|
||||
crc = crc ^ 0xffffffff;
|
||||
|
||||
#ifdef UNROLL_MORE
|
||||
@ -274,9 +63,15 @@ ZLIB_INTERNAL uint32_t crc32_generic(uint32_t crc, const unsigned char *buf, uin
|
||||
return crc ^ 0xffffffff;
|
||||
}
|
||||
|
||||
uint32_t ZEXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t len) {
|
||||
#ifdef ZLIB_COMPAT
|
||||
unsigned long Z_EXPORT PREFIX(crc32)(unsigned long crc, const unsigned char *buf, unsigned int len) {
|
||||
return (unsigned long)PREFIX(crc32_z)((uint32_t)crc, buf, len);
|
||||
}
|
||||
#else
|
||||
uint32_t Z_EXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t len) {
|
||||
return PREFIX(crc32_z)(crc, buf, len);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit
|
||||
@ -298,9 +93,9 @@ uint32_t ZEXPORT PREFIX(crc32)(uint32_t crc, const unsigned char *buf, uint32_t
|
||||
#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
|
||||
|
||||
/* ========================================================================= */
|
||||
ZLIB_INTERNAL uint32_t crc32_little(uint32_t crc, const unsigned char *buf, uint64_t len) {
|
||||
register uint32_t c;
|
||||
register const uint32_t *buf4;
|
||||
Z_INTERNAL uint32_t crc32_little(uint32_t crc, const unsigned char *buf, uint64_t len) {
|
||||
Z_REGISTER uint32_t c;
|
||||
Z_REGISTER const uint32_t *buf4;
|
||||
|
||||
c = crc;
|
||||
c = ~c;
|
||||
@ -340,9 +135,9 @@ ZLIB_INTERNAL uint32_t crc32_little(uint32_t crc, const unsigned char *buf, uint
|
||||
#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
|
||||
|
||||
/* ========================================================================= */
|
||||
ZLIB_INTERNAL uint32_t crc32_big(uint32_t crc, const unsigned char *buf, uint64_t len) {
|
||||
register uint32_t c;
|
||||
register const uint32_t *buf4;
|
||||
Z_INTERNAL uint32_t crc32_big(uint32_t crc, const unsigned char *buf, uint64_t len) {
|
||||
Z_REGISTER uint32_t c;
|
||||
Z_REGISTER const uint32_t *buf4;
|
||||
|
||||
c = ZSWAP32(crc);
|
||||
c = ~c;
|
||||
@ -374,45 +169,19 @@ ZLIB_INTERNAL uint32_t crc32_big(uint32_t crc, const unsigned char *buf, uint64_
|
||||
}
|
||||
#endif /* BYTE_ORDER == BIG_ENDIAN */
|
||||
|
||||
|
||||
/* ========================================================================= */
|
||||
static uint32_t crc32_combine_(uint32_t crc1, uint32_t crc2, z_off64_t len2) {
|
||||
int n;
|
||||
|
||||
#ifdef DYNAMIC_CRC_TABLE
|
||||
if (crc_table_empty)
|
||||
make_crc_table();
|
||||
#endif /* DYNAMIC_CRC_TABLE */
|
||||
|
||||
if (len2 > 0)
|
||||
/* operator for 2^n zeros repeats every GF2_DIM n values */
|
||||
for (n = 0; len2; n = (n + 1) % GF2_DIM, len2 >>= 1)
|
||||
if (len2 & 1)
|
||||
crc1 = gf2_matrix_times(crc_comb[n], crc1);
|
||||
return crc1 ^ crc2;
|
||||
}
|
||||
|
||||
/* ========================================================================= */
|
||||
uint32_t ZEXPORT PREFIX(crc32_combine)(uint32_t crc1, uint32_t crc2, z_off_t len2) {
|
||||
return crc32_combine_(crc1, crc2, len2);
|
||||
}
|
||||
|
||||
uint32_t ZEXPORT PREFIX(crc32_combine64)(uint32_t crc1, uint32_t crc2, z_off64_t len2) {
|
||||
return crc32_combine_(crc1, crc2, len2);
|
||||
}
|
||||
|
||||
#ifdef X86_PCLMULQDQ_CRC
|
||||
#include "arch/x86/x86.h"
|
||||
#include "arch/x86/crc_folding.h"
|
||||
|
||||
ZLIB_INTERNAL void crc_finalize(deflate_state *const s) {
|
||||
Z_INTERNAL void crc_finalize(deflate_state *const s) {
|
||||
if (x86_cpu_has_pclmulqdq)
|
||||
s->strm->adler = crc_fold_512to32(s);
|
||||
}
|
||||
#endif
|
||||
|
||||
ZLIB_INTERNAL void crc_reset(deflate_state *const s) {
|
||||
Z_INTERNAL void crc_reset(deflate_state *const s) {
|
||||
#ifdef X86_PCLMULQDQ_CRC
|
||||
x86_check_features();
|
||||
if (x86_cpu_has_pclmulqdq) {
|
||||
crc_fold_init(s);
|
||||
return;
|
||||
@ -421,7 +190,7 @@ ZLIB_INTERNAL void crc_reset(deflate_state *const s) {
|
||||
s->strm->adler = PREFIX(crc32)(0L, NULL, 0);
|
||||
}
|
||||
|
||||
ZLIB_INTERNAL void copy_with_crc(PREFIX3(stream) *strm, unsigned char *dst, unsigned long size) {
|
||||
Z_INTERNAL void copy_with_crc(PREFIX3(stream) *strm, unsigned char *dst, unsigned long size) {
|
||||
#ifdef X86_PCLMULQDQ_CRC
|
||||
if (x86_cpu_has_pclmulqdq) {
|
||||
crc_fold_copy(strm->state, dst, strm->next_in, size);
|
||||
@ -431,68 +200,3 @@ ZLIB_INTERNAL void copy_with_crc(PREFIX3(stream) *strm, unsigned char *dst, unsi
|
||||
memcpy(dst, strm->next_in, size);
|
||||
strm->adler = PREFIX(crc32)(strm->adler, dst, size);
|
||||
}
|
||||
|
||||
/* ========================================================================= */
|
||||
static void crc32_combine_gen_(uint32_t *op, z_off64_t len2)
|
||||
{
|
||||
uint32_t row;
|
||||
int j;
|
||||
unsigned i;
|
||||
|
||||
#ifdef DYNAMIC_CRC_TABLE
|
||||
if (crc_table_empty)
|
||||
make_crc_table();
|
||||
#endif /* DYNAMIC_CRC_TABLE */
|
||||
|
||||
/* if len2 is zero or negative, return the identity matrix */
|
||||
if (len2 <= 0) {
|
||||
row = 1;
|
||||
for (j = 0; j < GF2_DIM; j++) {
|
||||
op[j] = row;
|
||||
row <<= 1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* at least one bit in len2 is set -- find it, and copy the operator
|
||||
corresponding to that position into op */
|
||||
i = 0;
|
||||
for (;;) {
|
||||
if (len2 & 1) {
|
||||
for (j = 0; j < GF2_DIM; j++)
|
||||
op[j] = crc_comb[i][j];
|
||||
break;
|
||||
}
|
||||
len2 >>= 1;
|
||||
i = (i + 1) % GF2_DIM;
|
||||
}
|
||||
|
||||
/* for each remaining bit set in len2 (if any), multiply op by the operator
|
||||
corresponding to that position */
|
||||
for (;;) {
|
||||
len2 >>= 1;
|
||||
i = (i + 1) % GF2_DIM;
|
||||
if (len2 == 0)
|
||||
break;
|
||||
if (len2 & 1)
|
||||
for (j = 0; j < GF2_DIM; j++)
|
||||
op[j] = gf2_matrix_times(crc_comb[i], op[j]);
|
||||
}
|
||||
}
|
||||
|
||||
/* ========================================================================= */
|
||||
void ZEXPORT PREFIX(crc32_combine_gen)(uint32_t *op, z_off_t len2)
|
||||
{
|
||||
crc32_combine_gen_(op, len2);
|
||||
}
|
||||
|
||||
void ZEXPORT PREFIX(crc32_combine_gen64)(uint32_t *op, z_off64_t len2)
|
||||
{
|
||||
crc32_combine_gen_(op, len2);
|
||||
}
|
||||
|
||||
/* ========================================================================= */
|
||||
uint32_t ZEXPORT PREFIX(crc32_combine_op)(uint32_t crc1, uint32_t crc2, const uint32_t *op)
|
||||
{
|
||||
return gf2_matrix_times(op, crc1) ^ crc2;
|
||||
}
|
||||
|
||||
108
libs/zlibng/crc32_comb.c
Normal file
108
libs/zlibng/crc32_comb.c
Normal file
@ -0,0 +1,108 @@
|
||||
/* crc32_comb.c -- compute the CRC-32 of a data stream
|
||||
* Copyright (C) 1995-2006, 2010, 2011, 2012, 2016, 2018 Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*
|
||||
* Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
|
||||
* CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing
|
||||
* tables for updating the shift register in one step with three exclusive-ors
|
||||
* instead of four steps with four exclusive-ors. This results in about a
|
||||
* factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include <inttypes.h>
|
||||
#include "deflate.h"
|
||||
#include "crc32_p.h"
|
||||
#include "crc32_comb_tbl.h"
|
||||
|
||||
|
||||
/* Local functions for crc concatenation */
|
||||
static uint32_t crc32_combine_(uint32_t crc1, uint32_t crc2, z_off64_t len2);
|
||||
static void crc32_combine_gen_(uint32_t *op, z_off64_t len2);
|
||||
|
||||
/* ========================================================================= */
|
||||
static uint32_t crc32_combine_(uint32_t crc1, uint32_t crc2, z_off64_t len2) {
|
||||
int n;
|
||||
|
||||
if (len2 > 0)
|
||||
/* operator for 2^n zeros repeats every GF2_DIM n values */
|
||||
for (n = 0; len2; n = (n + 1) % GF2_DIM, len2 >>= 1)
|
||||
if (len2 & 1)
|
||||
crc1 = gf2_matrix_times(crc_comb[n], crc1);
|
||||
return crc1 ^ crc2;
|
||||
}
|
||||
|
||||
/* ========================================================================= */
|
||||
#ifdef ZLIB_COMPAT
|
||||
unsigned long Z_EXPORT PREFIX(crc32_combine)(unsigned long crc1, unsigned long crc2, z_off_t len2) {
|
||||
return (unsigned long)crc32_combine_((uint32_t)crc1, (uint32_t)crc2, len2);
|
||||
}
|
||||
|
||||
unsigned long Z_EXPORT PREFIX4(crc32_combine)(unsigned long crc1, unsigned long crc2, z_off64_t len2) {
|
||||
return (unsigned long)crc32_combine_((uint32_t)crc1, (uint32_t)crc2, len2);
|
||||
}
|
||||
#else
|
||||
uint32_t Z_EXPORT PREFIX4(crc32_combine)(uint32_t crc1, uint32_t crc2, z_off64_t len2) {
|
||||
return crc32_combine_(crc1, crc2, len2);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* ========================================================================= */
|
||||
|
||||
static void crc32_combine_gen_(uint32_t *op, z_off64_t len2) {
|
||||
uint32_t row;
|
||||
int j;
|
||||
unsigned i;
|
||||
|
||||
/* if len2 is zero or negative, return the identity matrix */
|
||||
if (len2 <= 0) {
|
||||
row = 1;
|
||||
for (j = 0; j < GF2_DIM; j++) {
|
||||
op[j] = row;
|
||||
row <<= 1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* at least one bit in len2 is set -- find it, and copy the operator
|
||||
corresponding to that position into op */
|
||||
i = 0;
|
||||
for (;;) {
|
||||
if (len2 & 1) {
|
||||
for (j = 0; j < GF2_DIM; j++)
|
||||
op[j] = crc_comb[i][j];
|
||||
break;
|
||||
}
|
||||
len2 >>= 1;
|
||||
i = (i + 1) % GF2_DIM;
|
||||
}
|
||||
|
||||
/* for each remaining bit set in len2 (if any), multiply op by the operator
|
||||
corresponding to that position */
|
||||
for (;;) {
|
||||
len2 >>= 1;
|
||||
i = (i + 1) % GF2_DIM;
|
||||
if (len2 == 0)
|
||||
break;
|
||||
if (len2 & 1)
|
||||
for (j = 0; j < GF2_DIM; j++)
|
||||
op[j] = gf2_matrix_times(crc_comb[i], op[j]);
|
||||
}
|
||||
}
|
||||
|
||||
/* ========================================================================= */
|
||||
|
||||
#ifdef ZLIB_COMPAT
|
||||
void Z_EXPORT PREFIX(crc32_combine_gen)(uint32_t *op, z_off_t len2) {
|
||||
crc32_combine_gen_(op, len2);
|
||||
}
|
||||
#endif
|
||||
|
||||
void Z_EXPORT PREFIX4(crc32_combine_gen)(uint32_t *op, z_off64_t len2) {
|
||||
crc32_combine_gen_(op, len2);
|
||||
}
|
||||
|
||||
/* ========================================================================= */
|
||||
uint32_t Z_EXPORT PREFIX(crc32_combine_op)(uint32_t crc1, uint32_t crc2, const uint32_t *op) {
|
||||
return gf2_matrix_times(op, crc1) ^ crc2;
|
||||
}
|
||||
300
libs/zlibng/crc32_comb_tbl.h
Normal file
300
libs/zlibng/crc32_comb_tbl.h
Normal file
@ -0,0 +1,300 @@
|
||||
#ifndef CRC32_COMB_TBL_H_
|
||||
#define CRC32_COMB_TBL_H_
|
||||
|
||||
/* crc32_comb_tbl.h -- zero operators table for CRC combine
|
||||
* Generated automatically by makecrct.c
|
||||
*/
|
||||
|
||||
static const uint32_t crc_comb[32][32] =
|
||||
{
|
||||
{
|
||||
0x77073096, 0xee0e612c, 0x076dc419, 0x0edb8832, 0x1db71064,
|
||||
0x3b6e20c8, 0x76dc4190, 0xedb88320, 0x00000001, 0x00000002,
|
||||
0x00000004, 0x00000008, 0x00000010, 0x00000020, 0x00000040,
|
||||
0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800,
|
||||
0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000,
|
||||
0x00020000, 0x00040000, 0x00080000, 0x00100000, 0x00200000,
|
||||
0x00400000, 0x00800000
|
||||
},
|
||||
{
|
||||
0x191b3141, 0x32366282, 0x646cc504, 0xc8d98a08, 0x4ac21251,
|
||||
0x958424a2, 0xf0794f05, 0x3b83984b, 0x77073096, 0xee0e612c,
|
||||
0x076dc419, 0x0edb8832, 0x1db71064, 0x3b6e20c8, 0x76dc4190,
|
||||
0xedb88320, 0x00000001, 0x00000002, 0x00000004, 0x00000008,
|
||||
0x00000010, 0x00000020, 0x00000040, 0x00000080, 0x00000100,
|
||||
0x00000200, 0x00000400, 0x00000800, 0x00001000, 0x00002000,
|
||||
0x00004000, 0x00008000
|
||||
},
|
||||
{
|
||||
0xb8bc6765, 0xaa09c88b, 0x8f629757, 0xc5b428ef, 0x5019579f,
|
||||
0xa032af3e, 0x9b14583d, 0xed59b63b, 0x01c26a37, 0x0384d46e,
|
||||
0x0709a8dc, 0x0e1351b8, 0x1c26a370, 0x384d46e0, 0x709a8dc0,
|
||||
0xe1351b80, 0x191b3141, 0x32366282, 0x646cc504, 0xc8d98a08,
|
||||
0x4ac21251, 0x958424a2, 0xf0794f05, 0x3b83984b, 0x77073096,
|
||||
0xee0e612c, 0x076dc419, 0x0edb8832, 0x1db71064, 0x3b6e20c8,
|
||||
0x76dc4190, 0xedb88320
|
||||
},
|
||||
{
|
||||
0xccaa009e, 0x4225077d, 0x844a0efa, 0xd3e51bb5, 0x7cbb312b,
|
||||
0xf9766256, 0x299dc2ed, 0x533b85da, 0xa6770bb4, 0x979f1129,
|
||||
0xf44f2413, 0x33ef4e67, 0x67de9cce, 0xcfbd399c, 0x440b7579,
|
||||
0x8816eaf2, 0xcb5cd3a5, 0x4dc8a10b, 0x9b914216, 0xec53826d,
|
||||
0x03d6029b, 0x07ac0536, 0x0f580a6c, 0x1eb014d8, 0x3d6029b0,
|
||||
0x7ac05360, 0xf580a6c0, 0x30704bc1, 0x60e09782, 0xc1c12f04,
|
||||
0x58f35849, 0xb1e6b092
|
||||
},
|
||||
{
|
||||
0xae689191, 0x87a02563, 0xd4314c87, 0x73139f4f, 0xe6273e9e,
|
||||
0x173f7b7d, 0x2e7ef6fa, 0x5cfdedf4, 0xb9fbdbe8, 0xa886b191,
|
||||
0x8a7c6563, 0xcf89cc87, 0x44629f4f, 0x88c53e9e, 0xcafb7b7d,
|
||||
0x4e87f0bb, 0x9d0fe176, 0xe16ec4ad, 0x19ac8f1b, 0x33591e36,
|
||||
0x66b23c6c, 0xcd6478d8, 0x41b9f7f1, 0x8373efe2, 0xdd96d985,
|
||||
0x605cb54b, 0xc0b96a96, 0x5a03d36d, 0xb407a6da, 0xb37e4bf5,
|
||||
0xbd8d91ab, 0xa06a2517
|
||||
},
|
||||
{
|
||||
0xf1da05aa, 0x38c50d15, 0x718a1a2a, 0xe3143454, 0x1d596ee9,
|
||||
0x3ab2ddd2, 0x7565bba4, 0xeacb7748, 0x0ee7e8d1, 0x1dcfd1a2,
|
||||
0x3b9fa344, 0x773f4688, 0xee7e8d10, 0x078c1c61, 0x0f1838c2,
|
||||
0x1e307184, 0x3c60e308, 0x78c1c610, 0xf1838c20, 0x38761e01,
|
||||
0x70ec3c02, 0xe1d87804, 0x18c1f649, 0x3183ec92, 0x6307d924,
|
||||
0xc60fb248, 0x576e62d1, 0xaedcc5a2, 0x86c88d05, 0xd6e01c4b,
|
||||
0x76b13ed7, 0xed627dae
|
||||
},
|
||||
{
|
||||
0x8f352d95, 0xc51b5d6b, 0x5147bc97, 0xa28f792e, 0x9e6ff41d,
|
||||
0xe7aeee7b, 0x142cdab7, 0x2859b56e, 0x50b36adc, 0xa166d5b8,
|
||||
0x99bcad31, 0xe8085c23, 0x0b61be07, 0x16c37c0e, 0x2d86f81c,
|
||||
0x5b0df038, 0xb61be070, 0xb746c6a1, 0xb5fc8b03, 0xb0881047,
|
||||
0xba6126cf, 0xafb34bdf, 0x841791ff, 0xd35e25bf, 0x7dcd4d3f,
|
||||
0xfb9a9a7e, 0x2c4432bd, 0x5888657a, 0xb110caf4, 0xb95093a9,
|
||||
0xa9d02113, 0x88d14467
|
||||
},
|
||||
{
|
||||
0x33fff533, 0x67ffea66, 0xcfffd4cc, 0x448eafd9, 0x891d5fb2,
|
||||
0xc94bb925, 0x49e6740b, 0x93cce816, 0xfce8d66d, 0x22a0aa9b,
|
||||
0x45415536, 0x8a82aa6c, 0xce745299, 0x4799a373, 0x8f3346e6,
|
||||
0xc5178b8d, 0x515e115b, 0xa2bc22b6, 0x9e09432d, 0xe763801b,
|
||||
0x15b60677, 0x2b6c0cee, 0x56d819dc, 0xadb033b8, 0x80116131,
|
||||
0xdb53c423, 0x6dd68e07, 0xdbad1c0e, 0x6c2b3e5d, 0xd8567cba,
|
||||
0x6bddff35, 0xd7bbfe6a
|
||||
},
|
||||
{
|
||||
0xce3371cb, 0x4717e5d7, 0x8e2fcbae, 0xc72e911d, 0x552c247b,
|
||||
0xaa5848f6, 0x8fc197ad, 0xc4f2291b, 0x52955477, 0xa52aa8ee,
|
||||
0x9124579d, 0xf939a97b, 0x290254b7, 0x5204a96e, 0xa40952dc,
|
||||
0x9363a3f9, 0xfdb641b3, 0x201d8527, 0x403b0a4e, 0x8076149c,
|
||||
0xdb9d2f79, 0x6c4b58b3, 0xd896b166, 0x6a5c648d, 0xd4b8c91a,
|
||||
0x72009475, 0xe40128ea, 0x13735795, 0x26e6af2a, 0x4dcd5e54,
|
||||
0x9b9abca8, 0xec447f11
|
||||
},
|
||||
{
|
||||
0x1072db28, 0x20e5b650, 0x41cb6ca0, 0x8396d940, 0xdc5cb4c1,
|
||||
0x63c86fc3, 0xc790df86, 0x5450b94d, 0xa8a1729a, 0x8a33e375,
|
||||
0xcf16c0ab, 0x455c8717, 0x8ab90e2e, 0xce031a1d, 0x4777327b,
|
||||
0x8eee64f6, 0xc6adcfad, 0x562a991b, 0xac553236, 0x83db622d,
|
||||
0xdcc7c21b, 0x62fe8277, 0xc5fd04ee, 0x508b0f9d, 0xa1161f3a,
|
||||
0x995d3835, 0xe9cb762b, 0x08e7ea17, 0x11cfd42e, 0x239fa85c,
|
||||
0x473f50b8, 0x8e7ea170
|
||||
},
|
||||
{
|
||||
0xf891f16f, 0x2a52e49f, 0x54a5c93e, 0xa94b927c, 0x89e622b9,
|
||||
0xc8bd4333, 0x4a0b8027, 0x9417004e, 0xf35f06dd, 0x3dcf0bfb,
|
||||
0x7b9e17f6, 0xf73c2fec, 0x35095999, 0x6a12b332, 0xd4256664,
|
||||
0x733bca89, 0xe6779512, 0x179e2c65, 0x2f3c58ca, 0x5e78b194,
|
||||
0xbcf16328, 0xa293c011, 0x9e568663, 0xe7dc0a87, 0x14c9134f,
|
||||
0x2992269e, 0x53244d3c, 0xa6489a78, 0x97e032b1, 0xf4b16323,
|
||||
0x3213c007, 0x6427800e
|
||||
},
|
||||
{
|
||||
0x88b6ba63, 0xca1c7287, 0x4f49e34f, 0x9e93c69e, 0xe6568b7d,
|
||||
0x17dc10bb, 0x2fb82176, 0x5f7042ec, 0xbee085d8, 0xa6b00df1,
|
||||
0x96111da3, 0xf7533d07, 0x35d77c4f, 0x6baef89e, 0xd75df13c,
|
||||
0x75cae439, 0xeb95c872, 0x0c5a96a5, 0x18b52d4a, 0x316a5a94,
|
||||
0x62d4b528, 0xc5a96a50, 0x5023d2e1, 0xa047a5c2, 0x9bfe4dc5,
|
||||
0xec8d9dcb, 0x026a3dd7, 0x04d47bae, 0x09a8f75c, 0x1351eeb8,
|
||||
0x26a3dd70, 0x4d47bae0
|
||||
},
|
||||
{
|
||||
0x5ad8a92c, 0xb5b15258, 0xb013a2f1, 0xbb5643a3, 0xaddd8107,
|
||||
0x80ca044f, 0xdae50edf, 0x6ebb1bff, 0xdd7637fe, 0x619d69bd,
|
||||
0xc33ad37a, 0x5d04a0b5, 0xba09416a, 0xaf638495, 0x85b60f6b,
|
||||
0xd01d1897, 0x7b4b376f, 0xf6966ede, 0x365ddbfd, 0x6cbbb7fa,
|
||||
0xd9776ff4, 0x699fd9a9, 0xd33fb352, 0x7d0e60e5, 0xfa1cc1ca,
|
||||
0x2f4885d5, 0x5e910baa, 0xbd221754, 0xa13528e9, 0x991b5793,
|
||||
0xe947a967, 0x09fe548f
|
||||
},
|
||||
{
|
||||
0xb566f6e2, 0xb1bceb85, 0xb808d14b, 0xab60a4d7, 0x8db04fef,
|
||||
0xc011999f, 0x5b52357f, 0xb6a46afe, 0xb639d3bd, 0xb702a13b,
|
||||
0xb5744437, 0xb1998e2f, 0xb8421a1f, 0xabf5327f, 0x8c9b62bf,
|
||||
0xc247c33f, 0x5ffe803f, 0xbffd007e, 0xa48b06bd, 0x92670b3b,
|
||||
0xffbf1037, 0x240f262f, 0x481e4c5e, 0x903c98bc, 0xfb083739,
|
||||
0x2d616833, 0x5ac2d066, 0xb585a0cc, 0xb07a47d9, 0xbb8589f3,
|
||||
0xac7a15a7, 0x83852d0f
|
||||
},
|
||||
{
|
||||
0x9d9129bf, 0xe053553f, 0x1bd7ac3f, 0x37af587e, 0x6f5eb0fc,
|
||||
0xdebd61f8, 0x660bc5b1, 0xcc178b62, 0x435e1085, 0x86bc210a,
|
||||
0xd6094455, 0x77638eeb, 0xeec71dd6, 0x06ff3ded, 0x0dfe7bda,
|
||||
0x1bfcf7b4, 0x37f9ef68, 0x6ff3ded0, 0xdfe7bda0, 0x64be7d01,
|
||||
0xc97cfa02, 0x4988f245, 0x9311e48a, 0xfd52cf55, 0x21d498eb,
|
||||
0x43a931d6, 0x875263ac, 0xd5d5c119, 0x70da8473, 0xe1b508e6,
|
||||
0x181b178d, 0x30362f1a
|
||||
},
|
||||
{
|
||||
0x2ee43a2c, 0x5dc87458, 0xbb90e8b0, 0xac50d721, 0x83d0a803,
|
||||
0xdcd05647, 0x62d1aacf, 0xc5a3559e, 0x5037ad7d, 0xa06f5afa,
|
||||
0x9bafb3b5, 0xec2e612b, 0x032dc417, 0x065b882e, 0x0cb7105c,
|
||||
0x196e20b8, 0x32dc4170, 0x65b882e0, 0xcb7105c0, 0x4d930dc1,
|
||||
0x9b261b82, 0xed3d3145, 0x010b64cb, 0x0216c996, 0x042d932c,
|
||||
0x085b2658, 0x10b64cb0, 0x216c9960, 0x42d932c0, 0x85b26580,
|
||||
0xd015cd41, 0x7b5a9cc3
|
||||
},
|
||||
{
|
||||
0x1b4511ee, 0x368a23dc, 0x6d1447b8, 0xda288f70, 0x6f2018a1,
|
||||
0xde403142, 0x67f164c5, 0xcfe2c98a, 0x44b49555, 0x89692aaa,
|
||||
0xc9a35315, 0x4837a06b, 0x906f40d6, 0xfbaf87ed, 0x2c2e099b,
|
||||
0x585c1336, 0xb0b8266c, 0xba014a99, 0xaf739373, 0x859620a7,
|
||||
0xd05d470f, 0x7bcb885f, 0xf79710be, 0x345f273d, 0x68be4e7a,
|
||||
0xd17c9cf4, 0x79883fa9, 0xf3107f52, 0x3d51f8e5, 0x7aa3f1ca,
|
||||
0xf547e394, 0x31fec169
|
||||
},
|
||||
{
|
||||
0xbce15202, 0xa2b3a245, 0x9e1642cb, 0xe75d83d7, 0x15ca01ef,
|
||||
0x2b9403de, 0x572807bc, 0xae500f78, 0x87d118b1, 0xd4d33723,
|
||||
0x72d76807, 0xe5aed00e, 0x102ca65d, 0x20594cba, 0x40b29974,
|
||||
0x816532e8, 0xd9bb6391, 0x6807c163, 0xd00f82c6, 0x7b6e03cd,
|
||||
0xf6dc079a, 0x36c90975, 0x6d9212ea, 0xdb2425d4, 0x6d394de9,
|
||||
0xda729bd2, 0x6f9431e5, 0xdf2863ca, 0x6521c1d5, 0xca4383aa,
|
||||
0x4ff60115, 0x9fec022a
|
||||
},
|
||||
{
|
||||
0xff08e5ef, 0x2560cd9f, 0x4ac19b3e, 0x9583367c, 0xf0776ab9,
|
||||
0x3b9fd333, 0x773fa666, 0xee7f4ccc, 0x078f9fd9, 0x0f1f3fb2,
|
||||
0x1e3e7f64, 0x3c7cfec8, 0x78f9fd90, 0xf1f3fb20, 0x3896f001,
|
||||
0x712de002, 0xe25bc004, 0x1fc68649, 0x3f8d0c92, 0x7f1a1924,
|
||||
0xfe343248, 0x271962d1, 0x4e32c5a2, 0x9c658b44, 0xe3ba10c9,
|
||||
0x1c0527d3, 0x380a4fa6, 0x70149f4c, 0xe0293e98, 0x1b237b71,
|
||||
0x3646f6e2, 0x6c8dedc4
|
||||
},
|
||||
{
|
||||
0x6f76172e, 0xdeec2e5c, 0x66a95af9, 0xcd52b5f2, 0x41d46da5,
|
||||
0x83a8db4a, 0xdc20b0d5, 0x633067eb, 0xc660cfd6, 0x57b099ed,
|
||||
0xaf6133da, 0x85b361f5, 0xd017c5ab, 0x7b5e8d17, 0xf6bd1a2e,
|
||||
0x360b321d, 0x6c16643a, 0xd82cc874, 0x6b2896a9, 0xd6512d52,
|
||||
0x77d35ce5, 0xefa6b9ca, 0x043c75d5, 0x0878ebaa, 0x10f1d754,
|
||||
0x21e3aea8, 0x43c75d50, 0x878ebaa0, 0xd46c7301, 0x73a9e043,
|
||||
0xe753c086, 0x15d6874d
|
||||
},
|
||||
{
|
||||
0x56f5cab9, 0xadeb9572, 0x80a62ca5, 0xda3d5f0b, 0x6f0bb857,
|
||||
0xde1770ae, 0x675fe71d, 0xcebfce3a, 0x460e9a35, 0x8c1d346a,
|
||||
0xc34b6e95, 0x5de7db6b, 0xbbcfb6d6, 0xacee6bed, 0x82add19b,
|
||||
0xde2aa577, 0x67244caf, 0xce48995e, 0x47e034fd, 0x8fc069fa,
|
||||
0xc4f1d5b5, 0x5292ad2b, 0xa5255a56, 0x913bb2ed, 0xf906639b,
|
||||
0x297dc177, 0x52fb82ee, 0xa5f705dc, 0x909f0df9, 0xfa4f1db3,
|
||||
0x2fef3d27, 0x5fde7a4e
|
||||
},
|
||||
{
|
||||
0x385993ac, 0x70b32758, 0xe1664eb0, 0x19bd9b21, 0x337b3642,
|
||||
0x66f66c84, 0xcdecd908, 0x40a8b451, 0x815168a2, 0xd9d3d705,
|
||||
0x68d6a84b, 0xd1ad5096, 0x782ba76d, 0xf0574eda, 0x3bdf9bf5,
|
||||
0x77bf37ea, 0xef7e6fd4, 0x058dd9e9, 0x0b1bb3d2, 0x163767a4,
|
||||
0x2c6ecf48, 0x58dd9e90, 0xb1bb3d20, 0xb8077c01, 0xab7ffe43,
|
||||
0x8d8efac7, 0xc06cf3cf, 0x5ba8e1df, 0xb751c3be, 0xb5d2813d,
|
||||
0xb0d4043b, 0xbad90e37
|
||||
},
|
||||
{
|
||||
0xb4247b20, 0xb339f001, 0xbd02e643, 0xa174cac7, 0x999893cf,
|
||||
0xe84021df, 0x0bf145ff, 0x17e28bfe, 0x2fc517fc, 0x5f8a2ff8,
|
||||
0xbf145ff0, 0xa559b9a1, 0x91c27503, 0xf8f5ec47, 0x2a9adecf,
|
||||
0x5535bd9e, 0xaa6b7b3c, 0x8fa7f039, 0xc43ee633, 0x530cca27,
|
||||
0xa619944e, 0x97422edd, 0xf5f55bfb, 0x309bb1b7, 0x6137636e,
|
||||
0xc26ec6dc, 0x5fac8bf9, 0xbf5917f2, 0xa5c329a5, 0x90f7550b,
|
||||
0xfa9fac57, 0x2e4e5eef
|
||||
},
|
||||
{
|
||||
0x695186a7, 0xd2a30d4e, 0x7e371cdd, 0xfc6e39ba, 0x23ad7535,
|
||||
0x475aea6a, 0x8eb5d4d4, 0xc61aafe9, 0x57445993, 0xae88b326,
|
||||
0x8660600d, 0xd7b1c65b, 0x74128af7, 0xe82515ee, 0x0b3b2d9d,
|
||||
0x16765b3a, 0x2cecb674, 0x59d96ce8, 0xb3b2d9d0, 0xbc14b5e1,
|
||||
0xa3586d83, 0x9dc1dd47, 0xe0f2bccf, 0x1a947fdf, 0x3528ffbe,
|
||||
0x6a51ff7c, 0xd4a3fef8, 0x7236fbb1, 0xe46df762, 0x13aae885,
|
||||
0x2755d10a, 0x4eaba214
|
||||
},
|
||||
{
|
||||
0x66bc001e, 0xcd78003c, 0x41810639, 0x83020c72, 0xdd751ea5,
|
||||
0x619b3b0b, 0xc3367616, 0x5d1dea6d, 0xba3bd4da, 0xaf06aff5,
|
||||
0x857c59ab, 0xd189b517, 0x78626c6f, 0xf0c4d8de, 0x3af8b7fd,
|
||||
0x75f16ffa, 0xebe2dff4, 0x0cb4b9a9, 0x19697352, 0x32d2e6a4,
|
||||
0x65a5cd48, 0xcb4b9a90, 0x4de63361, 0x9bcc66c2, 0xece9cbc5,
|
||||
0x02a291cb, 0x05452396, 0x0a8a472c, 0x15148e58, 0x2a291cb0,
|
||||
0x54523960, 0xa8a472c0
|
||||
},
|
||||
{
|
||||
0xb58b27b3, 0xb0674927, 0xbbbf940f, 0xac0e2e5f, 0x836d5aff,
|
||||
0xddabb3bf, 0x6026613f, 0xc04cc27e, 0x5be882bd, 0xb7d1057a,
|
||||
0xb4d30cb5, 0xb2d71f2b, 0xbedf3817, 0xa6cf766f, 0x96efea9f,
|
||||
0xf6aed37f, 0x362ca0bf, 0x6c59417e, 0xd8b282fc, 0x6a1403b9,
|
||||
0xd4280772, 0x732108a5, 0xe642114a, 0x17f524d5, 0x2fea49aa,
|
||||
0x5fd49354, 0xbfa926a8, 0xa4234b11, 0x93379063, 0xfd1e2687,
|
||||
0x214d4b4f, 0x429a969e
|
||||
},
|
||||
{
|
||||
0xfe273162, 0x273f6485, 0x4e7ec90a, 0x9cfd9214, 0xe28a2269,
|
||||
0x1e654293, 0x3cca8526, 0x79950a4c, 0xf32a1498, 0x3d252f71,
|
||||
0x7a4a5ee2, 0xf494bdc4, 0x32587dc9, 0x64b0fb92, 0xc961f724,
|
||||
0x49b2e809, 0x9365d012, 0xfdbaa665, 0x20044a8b, 0x40089516,
|
||||
0x80112a2c, 0xdb535219, 0x6dd7a273, 0xdbaf44e6, 0x6c2f8f8d,
|
||||
0xd85f1f1a, 0x6bcf3875, 0xd79e70ea, 0x744de795, 0xe89bcf2a,
|
||||
0x0a469815, 0x148d302a
|
||||
},
|
||||
{
|
||||
0xd3c98813, 0x7ce21667, 0xf9c42cce, 0x28f95fdd, 0x51f2bfba,
|
||||
0xa3e57f74, 0x9cbbf8a9, 0xe206f713, 0x1f7ce867, 0x3ef9d0ce,
|
||||
0x7df3a19c, 0xfbe74338, 0x2cbf8031, 0x597f0062, 0xb2fe00c4,
|
||||
0xbe8d07c9, 0xa66b09d3, 0x97a715e7, 0xf43f2d8f, 0x330f5d5f,
|
||||
0x661ebabe, 0xcc3d757c, 0x430becb9, 0x8617d972, 0xd75eb4a5,
|
||||
0x75cc6f0b, 0xeb98de16, 0x0c40ba6d, 0x188174da, 0x3102e9b4,
|
||||
0x6205d368, 0xc40ba6d0
|
||||
},
|
||||
{
|
||||
0xf7d6deb4, 0x34dcbb29, 0x69b97652, 0xd372eca4, 0x7d94df09,
|
||||
0xfb29be12, 0x2d227a65, 0x5a44f4ca, 0xb489e994, 0xb262d569,
|
||||
0xbfb4ac93, 0xa4185f67, 0x9341b88f, 0xfdf2775f, 0x2095e8ff,
|
||||
0x412bd1fe, 0x8257a3fc, 0xdfde41b9, 0x64cd8533, 0xc99b0a66,
|
||||
0x4847128d, 0x908e251a, 0xfa6d4c75, 0x2fab9eab, 0x5f573d56,
|
||||
0xbeae7aac, 0xa62df319, 0x972ae073, 0xf524c6a7, 0x31388b0f,
|
||||
0x6271161e, 0xc4e22c3c
|
||||
},
|
||||
{
|
||||
0xedb88320, 0x00000001, 0x00000002, 0x00000004, 0x00000008,
|
||||
0x00000010, 0x00000020, 0x00000040, 0x00000080, 0x00000100,
|
||||
0x00000200, 0x00000400, 0x00000800, 0x00001000, 0x00002000,
|
||||
0x00004000, 0x00008000, 0x00010000, 0x00020000, 0x00040000,
|
||||
0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000,
|
||||
0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000,
|
||||
0x20000000, 0x40000000
|
||||
},
|
||||
{
|
||||
0x76dc4190, 0xedb88320, 0x00000001, 0x00000002, 0x00000004,
|
||||
0x00000008, 0x00000010, 0x00000020, 0x00000040, 0x00000080,
|
||||
0x00000100, 0x00000200, 0x00000400, 0x00000800, 0x00001000,
|
||||
0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000,
|
||||
0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000,
|
||||
0x00800000, 0x01000000, 0x02000000, 0x04000000, 0x08000000,
|
||||
0x10000000, 0x20000000
|
||||
},
|
||||
{
|
||||
0x1db71064, 0x3b6e20c8, 0x76dc4190, 0xedb88320, 0x00000001,
|
||||
0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020,
|
||||
0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400,
|
||||
0x00000800, 0x00001000, 0x00002000, 0x00004000, 0x00008000,
|
||||
0x00010000, 0x00020000, 0x00040000, 0x00080000, 0x00100000,
|
||||
0x00200000, 0x00400000, 0x00800000, 0x01000000, 0x02000000,
|
||||
0x04000000, 0x08000000
|
||||
}
|
||||
};
|
||||
|
||||
#endif /* CRC32_COMB_TBL_H_ */
|
||||
19
libs/zlibng/crc32_p.h
Normal file
19
libs/zlibng/crc32_p.h
Normal file
@ -0,0 +1,19 @@
|
||||
#ifndef CRC32_P_H_
|
||||
#define CRC32_P_H_
|
||||
|
||||
#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */
|
||||
|
||||
|
||||
static inline uint32_t gf2_matrix_times(const uint32_t *mat, uint32_t vec) {
|
||||
uint32_t sum = 0;
|
||||
while (vec) {
|
||||
if (vec & 1)
|
||||
sum ^= *mat;
|
||||
vec >>= 1;
|
||||
mat++;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
#endif /* CRC32_P_H_ */
|
||||
@ -1,8 +1,8 @@
|
||||
#ifndef CRC32_H_
|
||||
#define CRC32_H_
|
||||
#ifndef CRC32_TBL_H_
|
||||
#define CRC32_TBL_H_
|
||||
|
||||
/* crc32.h -- tables for rapid CRC calculation
|
||||
* Generated automatically by crc32.c
|
||||
/* crc32_tbl.h -- tables for rapid CRC calculation
|
||||
* Generated automatically by makecrct.c
|
||||
*/
|
||||
|
||||
static const uint32_t crc_table[8][256] =
|
||||
@ -441,295 +441,4 @@ static const uint32_t crc_table[8][256] =
|
||||
}
|
||||
};
|
||||
|
||||
static const uint32_t crc_comb[32][32] =
|
||||
{
|
||||
{
|
||||
0x77073096UL, 0xee0e612cUL, 0x076dc419UL, 0x0edb8832UL, 0x1db71064UL,
|
||||
0x3b6e20c8UL, 0x76dc4190UL, 0xedb88320UL, 0x00000001UL, 0x00000002UL,
|
||||
0x00000004UL, 0x00000008UL, 0x00000010UL, 0x00000020UL, 0x00000040UL,
|
||||
0x00000080UL, 0x00000100UL, 0x00000200UL, 0x00000400UL, 0x00000800UL,
|
||||
0x00001000UL, 0x00002000UL, 0x00004000UL, 0x00008000UL, 0x00010000UL,
|
||||
0x00020000UL, 0x00040000UL, 0x00080000UL, 0x00100000UL, 0x00200000UL,
|
||||
0x00400000UL, 0x00800000UL
|
||||
},
|
||||
{
|
||||
0x191b3141UL, 0x32366282UL, 0x646cc504UL, 0xc8d98a08UL, 0x4ac21251UL,
|
||||
0x958424a2UL, 0xf0794f05UL, 0x3b83984bUL, 0x77073096UL, 0xee0e612cUL,
|
||||
0x076dc419UL, 0x0edb8832UL, 0x1db71064UL, 0x3b6e20c8UL, 0x76dc4190UL,
|
||||
0xedb88320UL, 0x00000001UL, 0x00000002UL, 0x00000004UL, 0x00000008UL,
|
||||
0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL, 0x00000100UL,
|
||||
0x00000200UL, 0x00000400UL, 0x00000800UL, 0x00001000UL, 0x00002000UL,
|
||||
0x00004000UL, 0x00008000UL
|
||||
},
|
||||
{
|
||||
0xb8bc6765UL, 0xaa09c88bUL, 0x8f629757UL, 0xc5b428efUL, 0x5019579fUL,
|
||||
0xa032af3eUL, 0x9b14583dUL, 0xed59b63bUL, 0x01c26a37UL, 0x0384d46eUL,
|
||||
0x0709a8dcUL, 0x0e1351b8UL, 0x1c26a370UL, 0x384d46e0UL, 0x709a8dc0UL,
|
||||
0xe1351b80UL, 0x191b3141UL, 0x32366282UL, 0x646cc504UL, 0xc8d98a08UL,
|
||||
0x4ac21251UL, 0x958424a2UL, 0xf0794f05UL, 0x3b83984bUL, 0x77073096UL,
|
||||
0xee0e612cUL, 0x076dc419UL, 0x0edb8832UL, 0x1db71064UL, 0x3b6e20c8UL,
|
||||
0x76dc4190UL, 0xedb88320UL
|
||||
},
|
||||
{
|
||||
0xccaa009eUL, 0x4225077dUL, 0x844a0efaUL, 0xd3e51bb5UL, 0x7cbb312bUL,
|
||||
0xf9766256UL, 0x299dc2edUL, 0x533b85daUL, 0xa6770bb4UL, 0x979f1129UL,
|
||||
0xf44f2413UL, 0x33ef4e67UL, 0x67de9cceUL, 0xcfbd399cUL, 0x440b7579UL,
|
||||
0x8816eaf2UL, 0xcb5cd3a5UL, 0x4dc8a10bUL, 0x9b914216UL, 0xec53826dUL,
|
||||
0x03d6029bUL, 0x07ac0536UL, 0x0f580a6cUL, 0x1eb014d8UL, 0x3d6029b0UL,
|
||||
0x7ac05360UL, 0xf580a6c0UL, 0x30704bc1UL, 0x60e09782UL, 0xc1c12f04UL,
|
||||
0x58f35849UL, 0xb1e6b092UL
|
||||
},
|
||||
{
|
||||
0xae689191UL, 0x87a02563UL, 0xd4314c87UL, 0x73139f4fUL, 0xe6273e9eUL,
|
||||
0x173f7b7dUL, 0x2e7ef6faUL, 0x5cfdedf4UL, 0xb9fbdbe8UL, 0xa886b191UL,
|
||||
0x8a7c6563UL, 0xcf89cc87UL, 0x44629f4fUL, 0x88c53e9eUL, 0xcafb7b7dUL,
|
||||
0x4e87f0bbUL, 0x9d0fe176UL, 0xe16ec4adUL, 0x19ac8f1bUL, 0x33591e36UL,
|
||||
0x66b23c6cUL, 0xcd6478d8UL, 0x41b9f7f1UL, 0x8373efe2UL, 0xdd96d985UL,
|
||||
0x605cb54bUL, 0xc0b96a96UL, 0x5a03d36dUL, 0xb407a6daUL, 0xb37e4bf5UL,
|
||||
0xbd8d91abUL, 0xa06a2517UL
|
||||
},
|
||||
{
|
||||
0xf1da05aaUL, 0x38c50d15UL, 0x718a1a2aUL, 0xe3143454UL, 0x1d596ee9UL,
|
||||
0x3ab2ddd2UL, 0x7565bba4UL, 0xeacb7748UL, 0x0ee7e8d1UL, 0x1dcfd1a2UL,
|
||||
0x3b9fa344UL, 0x773f4688UL, 0xee7e8d10UL, 0x078c1c61UL, 0x0f1838c2UL,
|
||||
0x1e307184UL, 0x3c60e308UL, 0x78c1c610UL, 0xf1838c20UL, 0x38761e01UL,
|
||||
0x70ec3c02UL, 0xe1d87804UL, 0x18c1f649UL, 0x3183ec92UL, 0x6307d924UL,
|
||||
0xc60fb248UL, 0x576e62d1UL, 0xaedcc5a2UL, 0x86c88d05UL, 0xd6e01c4bUL,
|
||||
0x76b13ed7UL, 0xed627daeUL
|
||||
},
|
||||
{
|
||||
0x8f352d95UL, 0xc51b5d6bUL, 0x5147bc97UL, 0xa28f792eUL, 0x9e6ff41dUL,
|
||||
0xe7aeee7bUL, 0x142cdab7UL, 0x2859b56eUL, 0x50b36adcUL, 0xa166d5b8UL,
|
||||
0x99bcad31UL, 0xe8085c23UL, 0x0b61be07UL, 0x16c37c0eUL, 0x2d86f81cUL,
|
||||
0x5b0df038UL, 0xb61be070UL, 0xb746c6a1UL, 0xb5fc8b03UL, 0xb0881047UL,
|
||||
0xba6126cfUL, 0xafb34bdfUL, 0x841791ffUL, 0xd35e25bfUL, 0x7dcd4d3fUL,
|
||||
0xfb9a9a7eUL, 0x2c4432bdUL, 0x5888657aUL, 0xb110caf4UL, 0xb95093a9UL,
|
||||
0xa9d02113UL, 0x88d14467UL
|
||||
},
|
||||
{
|
||||
0x33fff533UL, 0x67ffea66UL, 0xcfffd4ccUL, 0x448eafd9UL, 0x891d5fb2UL,
|
||||
0xc94bb925UL, 0x49e6740bUL, 0x93cce816UL, 0xfce8d66dUL, 0x22a0aa9bUL,
|
||||
0x45415536UL, 0x8a82aa6cUL, 0xce745299UL, 0x4799a373UL, 0x8f3346e6UL,
|
||||
0xc5178b8dUL, 0x515e115bUL, 0xa2bc22b6UL, 0x9e09432dUL, 0xe763801bUL,
|
||||
0x15b60677UL, 0x2b6c0ceeUL, 0x56d819dcUL, 0xadb033b8UL, 0x80116131UL,
|
||||
0xdb53c423UL, 0x6dd68e07UL, 0xdbad1c0eUL, 0x6c2b3e5dUL, 0xd8567cbaUL,
|
||||
0x6bddff35UL, 0xd7bbfe6aUL
|
||||
},
|
||||
{
|
||||
0xce3371cbUL, 0x4717e5d7UL, 0x8e2fcbaeUL, 0xc72e911dUL, 0x552c247bUL,
|
||||
0xaa5848f6UL, 0x8fc197adUL, 0xc4f2291bUL, 0x52955477UL, 0xa52aa8eeUL,
|
||||
0x9124579dUL, 0xf939a97bUL, 0x290254b7UL, 0x5204a96eUL, 0xa40952dcUL,
|
||||
0x9363a3f9UL, 0xfdb641b3UL, 0x201d8527UL, 0x403b0a4eUL, 0x8076149cUL,
|
||||
0xdb9d2f79UL, 0x6c4b58b3UL, 0xd896b166UL, 0x6a5c648dUL, 0xd4b8c91aUL,
|
||||
0x72009475UL, 0xe40128eaUL, 0x13735795UL, 0x26e6af2aUL, 0x4dcd5e54UL,
|
||||
0x9b9abca8UL, 0xec447f11UL
|
||||
},
|
||||
{
|
||||
0x1072db28UL, 0x20e5b650UL, 0x41cb6ca0UL, 0x8396d940UL, 0xdc5cb4c1UL,
|
||||
0x63c86fc3UL, 0xc790df86UL, 0x5450b94dUL, 0xa8a1729aUL, 0x8a33e375UL,
|
||||
0xcf16c0abUL, 0x455c8717UL, 0x8ab90e2eUL, 0xce031a1dUL, 0x4777327bUL,
|
||||
0x8eee64f6UL, 0xc6adcfadUL, 0x562a991bUL, 0xac553236UL, 0x83db622dUL,
|
||||
0xdcc7c21bUL, 0x62fe8277UL, 0xc5fd04eeUL, 0x508b0f9dUL, 0xa1161f3aUL,
|
||||
0x995d3835UL, 0xe9cb762bUL, 0x08e7ea17UL, 0x11cfd42eUL, 0x239fa85cUL,
|
||||
0x473f50b8UL, 0x8e7ea170UL
|
||||
},
|
||||
{
|
||||
0xf891f16fUL, 0x2a52e49fUL, 0x54a5c93eUL, 0xa94b927cUL, 0x89e622b9UL,
|
||||
0xc8bd4333UL, 0x4a0b8027UL, 0x9417004eUL, 0xf35f06ddUL, 0x3dcf0bfbUL,
|
||||
0x7b9e17f6UL, 0xf73c2fecUL, 0x35095999UL, 0x6a12b332UL, 0xd4256664UL,
|
||||
0x733bca89UL, 0xe6779512UL, 0x179e2c65UL, 0x2f3c58caUL, 0x5e78b194UL,
|
||||
0xbcf16328UL, 0xa293c011UL, 0x9e568663UL, 0xe7dc0a87UL, 0x14c9134fUL,
|
||||
0x2992269eUL, 0x53244d3cUL, 0xa6489a78UL, 0x97e032b1UL, 0xf4b16323UL,
|
||||
0x3213c007UL, 0x6427800eUL
|
||||
},
|
||||
{
|
||||
0x88b6ba63UL, 0xca1c7287UL, 0x4f49e34fUL, 0x9e93c69eUL, 0xe6568b7dUL,
|
||||
0x17dc10bbUL, 0x2fb82176UL, 0x5f7042ecUL, 0xbee085d8UL, 0xa6b00df1UL,
|
||||
0x96111da3UL, 0xf7533d07UL, 0x35d77c4fUL, 0x6baef89eUL, 0xd75df13cUL,
|
||||
0x75cae439UL, 0xeb95c872UL, 0x0c5a96a5UL, 0x18b52d4aUL, 0x316a5a94UL,
|
||||
0x62d4b528UL, 0xc5a96a50UL, 0x5023d2e1UL, 0xa047a5c2UL, 0x9bfe4dc5UL,
|
||||
0xec8d9dcbUL, 0x026a3dd7UL, 0x04d47baeUL, 0x09a8f75cUL, 0x1351eeb8UL,
|
||||
0x26a3dd70UL, 0x4d47bae0UL
|
||||
},
|
||||
{
|
||||
0x5ad8a92cUL, 0xb5b15258UL, 0xb013a2f1UL, 0xbb5643a3UL, 0xaddd8107UL,
|
||||
0x80ca044fUL, 0xdae50edfUL, 0x6ebb1bffUL, 0xdd7637feUL, 0x619d69bdUL,
|
||||
0xc33ad37aUL, 0x5d04a0b5UL, 0xba09416aUL, 0xaf638495UL, 0x85b60f6bUL,
|
||||
0xd01d1897UL, 0x7b4b376fUL, 0xf6966edeUL, 0x365ddbfdUL, 0x6cbbb7faUL,
|
||||
0xd9776ff4UL, 0x699fd9a9UL, 0xd33fb352UL, 0x7d0e60e5UL, 0xfa1cc1caUL,
|
||||
0x2f4885d5UL, 0x5e910baaUL, 0xbd221754UL, 0xa13528e9UL, 0x991b5793UL,
|
||||
0xe947a967UL, 0x09fe548fUL
|
||||
},
|
||||
{
|
||||
0xb566f6e2UL, 0xb1bceb85UL, 0xb808d14bUL, 0xab60a4d7UL, 0x8db04fefUL,
|
||||
0xc011999fUL, 0x5b52357fUL, 0xb6a46afeUL, 0xb639d3bdUL, 0xb702a13bUL,
|
||||
0xb5744437UL, 0xb1998e2fUL, 0xb8421a1fUL, 0xabf5327fUL, 0x8c9b62bfUL,
|
||||
0xc247c33fUL, 0x5ffe803fUL, 0xbffd007eUL, 0xa48b06bdUL, 0x92670b3bUL,
|
||||
0xffbf1037UL, 0x240f262fUL, 0x481e4c5eUL, 0x903c98bcUL, 0xfb083739UL,
|
||||
0x2d616833UL, 0x5ac2d066UL, 0xb585a0ccUL, 0xb07a47d9UL, 0xbb8589f3UL,
|
||||
0xac7a15a7UL, 0x83852d0fUL
|
||||
},
|
||||
{
|
||||
0x9d9129bfUL, 0xe053553fUL, 0x1bd7ac3fUL, 0x37af587eUL, 0x6f5eb0fcUL,
|
||||
0xdebd61f8UL, 0x660bc5b1UL, 0xcc178b62UL, 0x435e1085UL, 0x86bc210aUL,
|
||||
0xd6094455UL, 0x77638eebUL, 0xeec71dd6UL, 0x06ff3dedUL, 0x0dfe7bdaUL,
|
||||
0x1bfcf7b4UL, 0x37f9ef68UL, 0x6ff3ded0UL, 0xdfe7bda0UL, 0x64be7d01UL,
|
||||
0xc97cfa02UL, 0x4988f245UL, 0x9311e48aUL, 0xfd52cf55UL, 0x21d498ebUL,
|
||||
0x43a931d6UL, 0x875263acUL, 0xd5d5c119UL, 0x70da8473UL, 0xe1b508e6UL,
|
||||
0x181b178dUL, 0x30362f1aUL
|
||||
},
|
||||
{
|
||||
0x2ee43a2cUL, 0x5dc87458UL, 0xbb90e8b0UL, 0xac50d721UL, 0x83d0a803UL,
|
||||
0xdcd05647UL, 0x62d1aacfUL, 0xc5a3559eUL, 0x5037ad7dUL, 0xa06f5afaUL,
|
||||
0x9bafb3b5UL, 0xec2e612bUL, 0x032dc417UL, 0x065b882eUL, 0x0cb7105cUL,
|
||||
0x196e20b8UL, 0x32dc4170UL, 0x65b882e0UL, 0xcb7105c0UL, 0x4d930dc1UL,
|
||||
0x9b261b82UL, 0xed3d3145UL, 0x010b64cbUL, 0x0216c996UL, 0x042d932cUL,
|
||||
0x085b2658UL, 0x10b64cb0UL, 0x216c9960UL, 0x42d932c0UL, 0x85b26580UL,
|
||||
0xd015cd41UL, 0x7b5a9cc3UL
|
||||
},
|
||||
{
|
||||
0x1b4511eeUL, 0x368a23dcUL, 0x6d1447b8UL, 0xda288f70UL, 0x6f2018a1UL,
|
||||
0xde403142UL, 0x67f164c5UL, 0xcfe2c98aUL, 0x44b49555UL, 0x89692aaaUL,
|
||||
0xc9a35315UL, 0x4837a06bUL, 0x906f40d6UL, 0xfbaf87edUL, 0x2c2e099bUL,
|
||||
0x585c1336UL, 0xb0b8266cUL, 0xba014a99UL, 0xaf739373UL, 0x859620a7UL,
|
||||
0xd05d470fUL, 0x7bcb885fUL, 0xf79710beUL, 0x345f273dUL, 0x68be4e7aUL,
|
||||
0xd17c9cf4UL, 0x79883fa9UL, 0xf3107f52UL, 0x3d51f8e5UL, 0x7aa3f1caUL,
|
||||
0xf547e394UL, 0x31fec169UL
|
||||
},
|
||||
{
|
||||
0xbce15202UL, 0xa2b3a245UL, 0x9e1642cbUL, 0xe75d83d7UL, 0x15ca01efUL,
|
||||
0x2b9403deUL, 0x572807bcUL, 0xae500f78UL, 0x87d118b1UL, 0xd4d33723UL,
|
||||
0x72d76807UL, 0xe5aed00eUL, 0x102ca65dUL, 0x20594cbaUL, 0x40b29974UL,
|
||||
0x816532e8UL, 0xd9bb6391UL, 0x6807c163UL, 0xd00f82c6UL, 0x7b6e03cdUL,
|
||||
0xf6dc079aUL, 0x36c90975UL, 0x6d9212eaUL, 0xdb2425d4UL, 0x6d394de9UL,
|
||||
0xda729bd2UL, 0x6f9431e5UL, 0xdf2863caUL, 0x6521c1d5UL, 0xca4383aaUL,
|
||||
0x4ff60115UL, 0x9fec022aUL
|
||||
},
|
||||
{
|
||||
0xff08e5efUL, 0x2560cd9fUL, 0x4ac19b3eUL, 0x9583367cUL, 0xf0776ab9UL,
|
||||
0x3b9fd333UL, 0x773fa666UL, 0xee7f4cccUL, 0x078f9fd9UL, 0x0f1f3fb2UL,
|
||||
0x1e3e7f64UL, 0x3c7cfec8UL, 0x78f9fd90UL, 0xf1f3fb20UL, 0x3896f001UL,
|
||||
0x712de002UL, 0xe25bc004UL, 0x1fc68649UL, 0x3f8d0c92UL, 0x7f1a1924UL,
|
||||
0xfe343248UL, 0x271962d1UL, 0x4e32c5a2UL, 0x9c658b44UL, 0xe3ba10c9UL,
|
||||
0x1c0527d3UL, 0x380a4fa6UL, 0x70149f4cUL, 0xe0293e98UL, 0x1b237b71UL,
|
||||
0x3646f6e2UL, 0x6c8dedc4UL
|
||||
},
|
||||
{
|
||||
0x6f76172eUL, 0xdeec2e5cUL, 0x66a95af9UL, 0xcd52b5f2UL, 0x41d46da5UL,
|
||||
0x83a8db4aUL, 0xdc20b0d5UL, 0x633067ebUL, 0xc660cfd6UL, 0x57b099edUL,
|
||||
0xaf6133daUL, 0x85b361f5UL, 0xd017c5abUL, 0x7b5e8d17UL, 0xf6bd1a2eUL,
|
||||
0x360b321dUL, 0x6c16643aUL, 0xd82cc874UL, 0x6b2896a9UL, 0xd6512d52UL,
|
||||
0x77d35ce5UL, 0xefa6b9caUL, 0x043c75d5UL, 0x0878ebaaUL, 0x10f1d754UL,
|
||||
0x21e3aea8UL, 0x43c75d50UL, 0x878ebaa0UL, 0xd46c7301UL, 0x73a9e043UL,
|
||||
0xe753c086UL, 0x15d6874dUL
|
||||
},
|
||||
{
|
||||
0x56f5cab9UL, 0xadeb9572UL, 0x80a62ca5UL, 0xda3d5f0bUL, 0x6f0bb857UL,
|
||||
0xde1770aeUL, 0x675fe71dUL, 0xcebfce3aUL, 0x460e9a35UL, 0x8c1d346aUL,
|
||||
0xc34b6e95UL, 0x5de7db6bUL, 0xbbcfb6d6UL, 0xacee6bedUL, 0x82add19bUL,
|
||||
0xde2aa577UL, 0x67244cafUL, 0xce48995eUL, 0x47e034fdUL, 0x8fc069faUL,
|
||||
0xc4f1d5b5UL, 0x5292ad2bUL, 0xa5255a56UL, 0x913bb2edUL, 0xf906639bUL,
|
||||
0x297dc177UL, 0x52fb82eeUL, 0xa5f705dcUL, 0x909f0df9UL, 0xfa4f1db3UL,
|
||||
0x2fef3d27UL, 0x5fde7a4eUL
|
||||
},
|
||||
{
|
||||
0x385993acUL, 0x70b32758UL, 0xe1664eb0UL, 0x19bd9b21UL, 0x337b3642UL,
|
||||
0x66f66c84UL, 0xcdecd908UL, 0x40a8b451UL, 0x815168a2UL, 0xd9d3d705UL,
|
||||
0x68d6a84bUL, 0xd1ad5096UL, 0x782ba76dUL, 0xf0574edaUL, 0x3bdf9bf5UL,
|
||||
0x77bf37eaUL, 0xef7e6fd4UL, 0x058dd9e9UL, 0x0b1bb3d2UL, 0x163767a4UL,
|
||||
0x2c6ecf48UL, 0x58dd9e90UL, 0xb1bb3d20UL, 0xb8077c01UL, 0xab7ffe43UL,
|
||||
0x8d8efac7UL, 0xc06cf3cfUL, 0x5ba8e1dfUL, 0xb751c3beUL, 0xb5d2813dUL,
|
||||
0xb0d4043bUL, 0xbad90e37UL
|
||||
},
|
||||
{
|
||||
0xb4247b20UL, 0xb339f001UL, 0xbd02e643UL, 0xa174cac7UL, 0x999893cfUL,
|
||||
0xe84021dfUL, 0x0bf145ffUL, 0x17e28bfeUL, 0x2fc517fcUL, 0x5f8a2ff8UL,
|
||||
0xbf145ff0UL, 0xa559b9a1UL, 0x91c27503UL, 0xf8f5ec47UL, 0x2a9adecfUL,
|
||||
0x5535bd9eUL, 0xaa6b7b3cUL, 0x8fa7f039UL, 0xc43ee633UL, 0x530cca27UL,
|
||||
0xa619944eUL, 0x97422eddUL, 0xf5f55bfbUL, 0x309bb1b7UL, 0x6137636eUL,
|
||||
0xc26ec6dcUL, 0x5fac8bf9UL, 0xbf5917f2UL, 0xa5c329a5UL, 0x90f7550bUL,
|
||||
0xfa9fac57UL, 0x2e4e5eefUL
|
||||
},
|
||||
{
|
||||
0x695186a7UL, 0xd2a30d4eUL, 0x7e371cddUL, 0xfc6e39baUL, 0x23ad7535UL,
|
||||
0x475aea6aUL, 0x8eb5d4d4UL, 0xc61aafe9UL, 0x57445993UL, 0xae88b326UL,
|
||||
0x8660600dUL, 0xd7b1c65bUL, 0x74128af7UL, 0xe82515eeUL, 0x0b3b2d9dUL,
|
||||
0x16765b3aUL, 0x2cecb674UL, 0x59d96ce8UL, 0xb3b2d9d0UL, 0xbc14b5e1UL,
|
||||
0xa3586d83UL, 0x9dc1dd47UL, 0xe0f2bccfUL, 0x1a947fdfUL, 0x3528ffbeUL,
|
||||
0x6a51ff7cUL, 0xd4a3fef8UL, 0x7236fbb1UL, 0xe46df762UL, 0x13aae885UL,
|
||||
0x2755d10aUL, 0x4eaba214UL
|
||||
},
|
||||
{
|
||||
0x66bc001eUL, 0xcd78003cUL, 0x41810639UL, 0x83020c72UL, 0xdd751ea5UL,
|
||||
0x619b3b0bUL, 0xc3367616UL, 0x5d1dea6dUL, 0xba3bd4daUL, 0xaf06aff5UL,
|
||||
0x857c59abUL, 0xd189b517UL, 0x78626c6fUL, 0xf0c4d8deUL, 0x3af8b7fdUL,
|
||||
0x75f16ffaUL, 0xebe2dff4UL, 0x0cb4b9a9UL, 0x19697352UL, 0x32d2e6a4UL,
|
||||
0x65a5cd48UL, 0xcb4b9a90UL, 0x4de63361UL, 0x9bcc66c2UL, 0xece9cbc5UL,
|
||||
0x02a291cbUL, 0x05452396UL, 0x0a8a472cUL, 0x15148e58UL, 0x2a291cb0UL,
|
||||
0x54523960UL, 0xa8a472c0UL
|
||||
},
|
||||
{
|
||||
0xb58b27b3UL, 0xb0674927UL, 0xbbbf940fUL, 0xac0e2e5fUL, 0x836d5affUL,
|
||||
0xddabb3bfUL, 0x6026613fUL, 0xc04cc27eUL, 0x5be882bdUL, 0xb7d1057aUL,
|
||||
0xb4d30cb5UL, 0xb2d71f2bUL, 0xbedf3817UL, 0xa6cf766fUL, 0x96efea9fUL,
|
||||
0xf6aed37fUL, 0x362ca0bfUL, 0x6c59417eUL, 0xd8b282fcUL, 0x6a1403b9UL,
|
||||
0xd4280772UL, 0x732108a5UL, 0xe642114aUL, 0x17f524d5UL, 0x2fea49aaUL,
|
||||
0x5fd49354UL, 0xbfa926a8UL, 0xa4234b11UL, 0x93379063UL, 0xfd1e2687UL,
|
||||
0x214d4b4fUL, 0x429a969eUL
|
||||
},
|
||||
{
|
||||
0xfe273162UL, 0x273f6485UL, 0x4e7ec90aUL, 0x9cfd9214UL, 0xe28a2269UL,
|
||||
0x1e654293UL, 0x3cca8526UL, 0x79950a4cUL, 0xf32a1498UL, 0x3d252f71UL,
|
||||
0x7a4a5ee2UL, 0xf494bdc4UL, 0x32587dc9UL, 0x64b0fb92UL, 0xc961f724UL,
|
||||
0x49b2e809UL, 0x9365d012UL, 0xfdbaa665UL, 0x20044a8bUL, 0x40089516UL,
|
||||
0x80112a2cUL, 0xdb535219UL, 0x6dd7a273UL, 0xdbaf44e6UL, 0x6c2f8f8dUL,
|
||||
0xd85f1f1aUL, 0x6bcf3875UL, 0xd79e70eaUL, 0x744de795UL, 0xe89bcf2aUL,
|
||||
0x0a469815UL, 0x148d302aUL
|
||||
},
|
||||
{
|
||||
0xd3c98813UL, 0x7ce21667UL, 0xf9c42cceUL, 0x28f95fddUL, 0x51f2bfbaUL,
|
||||
0xa3e57f74UL, 0x9cbbf8a9UL, 0xe206f713UL, 0x1f7ce867UL, 0x3ef9d0ceUL,
|
||||
0x7df3a19cUL, 0xfbe74338UL, 0x2cbf8031UL, 0x597f0062UL, 0xb2fe00c4UL,
|
||||
0xbe8d07c9UL, 0xa66b09d3UL, 0x97a715e7UL, 0xf43f2d8fUL, 0x330f5d5fUL,
|
||||
0x661ebabeUL, 0xcc3d757cUL, 0x430becb9UL, 0x8617d972UL, 0xd75eb4a5UL,
|
||||
0x75cc6f0bUL, 0xeb98de16UL, 0x0c40ba6dUL, 0x188174daUL, 0x3102e9b4UL,
|
||||
0x6205d368UL, 0xc40ba6d0UL
|
||||
},
|
||||
{
|
||||
0xf7d6deb4UL, 0x34dcbb29UL, 0x69b97652UL, 0xd372eca4UL, 0x7d94df09UL,
|
||||
0xfb29be12UL, 0x2d227a65UL, 0x5a44f4caUL, 0xb489e994UL, 0xb262d569UL,
|
||||
0xbfb4ac93UL, 0xa4185f67UL, 0x9341b88fUL, 0xfdf2775fUL, 0x2095e8ffUL,
|
||||
0x412bd1feUL, 0x8257a3fcUL, 0xdfde41b9UL, 0x64cd8533UL, 0xc99b0a66UL,
|
||||
0x4847128dUL, 0x908e251aUL, 0xfa6d4c75UL, 0x2fab9eabUL, 0x5f573d56UL,
|
||||
0xbeae7aacUL, 0xa62df319UL, 0x972ae073UL, 0xf524c6a7UL, 0x31388b0fUL,
|
||||
0x6271161eUL, 0xc4e22c3cUL
|
||||
},
|
||||
{
|
||||
0xedb88320UL, 0x00000001UL, 0x00000002UL, 0x00000004UL, 0x00000008UL,
|
||||
0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL, 0x00000100UL,
|
||||
0x00000200UL, 0x00000400UL, 0x00000800UL, 0x00001000UL, 0x00002000UL,
|
||||
0x00004000UL, 0x00008000UL, 0x00010000UL, 0x00020000UL, 0x00040000UL,
|
||||
0x00080000UL, 0x00100000UL, 0x00200000UL, 0x00400000UL, 0x00800000UL,
|
||||
0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL, 0x10000000UL,
|
||||
0x20000000UL, 0x40000000UL
|
||||
},
|
||||
{
|
||||
0x76dc4190UL, 0xedb88320UL, 0x00000001UL, 0x00000002UL, 0x00000004UL,
|
||||
0x00000008UL, 0x00000010UL, 0x00000020UL, 0x00000040UL, 0x00000080UL,
|
||||
0x00000100UL, 0x00000200UL, 0x00000400UL, 0x00000800UL, 0x00001000UL,
|
||||
0x00002000UL, 0x00004000UL, 0x00008000UL, 0x00010000UL, 0x00020000UL,
|
||||
0x00040000UL, 0x00080000UL, 0x00100000UL, 0x00200000UL, 0x00400000UL,
|
||||
0x00800000UL, 0x01000000UL, 0x02000000UL, 0x04000000UL, 0x08000000UL,
|
||||
0x10000000UL, 0x20000000UL
|
||||
},
|
||||
{
|
||||
0x1db71064UL, 0x3b6e20c8UL, 0x76dc4190UL, 0xedb88320UL, 0x00000001UL,
|
||||
0x00000002UL, 0x00000004UL, 0x00000008UL, 0x00000010UL, 0x00000020UL,
|
||||
0x00000040UL, 0x00000080UL, 0x00000100UL, 0x00000200UL, 0x00000400UL,
|
||||
0x00000800UL, 0x00001000UL, 0x00002000UL, 0x00004000UL, 0x00008000UL,
|
||||
0x00010000UL, 0x00020000UL, 0x00040000UL, 0x00080000UL, 0x00100000UL,
|
||||
0x00200000UL, 0x00400000UL, 0x00800000UL, 0x01000000UL, 0x02000000UL,
|
||||
0x04000000UL, 0x08000000UL
|
||||
}
|
||||
};
|
||||
#endif /* CRC32_H_ */
|
||||
#endif /* CRC32_TBL_H_ */
|
||||
File diff suppressed because it is too large
Load Diff
@ -10,10 +10,8 @@
|
||||
subject to change. Applications should only use zlib.h.
|
||||
*/
|
||||
|
||||
/* @(#) $Id$ */
|
||||
|
||||
#include "zutil.h"
|
||||
#include "gzendian.h"
|
||||
#include "zendian.h"
|
||||
|
||||
/* define NO_GZIP when compiling if you want to disable gzip header and
|
||||
trailer creation by deflate(). NO_GZIP would be used to avoid linking in
|
||||
@ -23,10 +21,6 @@
|
||||
# define GZIP
|
||||
#endif
|
||||
|
||||
#define NIL 0
|
||||
/* Tail of hash chains */
|
||||
|
||||
|
||||
/* ===========================================================================
|
||||
* Internal compression state.
|
||||
*/
|
||||
@ -52,7 +46,7 @@
|
||||
#define MAX_BITS 15
|
||||
/* All codes must not exceed MAX_BITS bits */
|
||||
|
||||
#define Buf_size 16
|
||||
#define BIT_BUF_SIZE 64
|
||||
/* size of bit buffer in bi_buf */
|
||||
|
||||
#define END_BLOCK 256
|
||||
@ -70,6 +64,10 @@
|
||||
#define FINISH_STATE 666 /* stream complete */
|
||||
/* Stream status */
|
||||
|
||||
#define HASH_BITS 16u /* log2(HASH_SIZE) */
|
||||
#define HASH_SIZE 65536u /* number of elements in hash table */
|
||||
#define HASH_MASK (HASH_SIZE - 1u) /* HASH_SIZE-1 */
|
||||
|
||||
|
||||
/* Data structure describing a single value and its code string. */
|
||||
typedef struct ct_data_s {
|
||||
@ -97,34 +95,47 @@ typedef struct tree_desc_s {
|
||||
} tree_desc;
|
||||
|
||||
typedef uint16_t Pos;
|
||||
typedef unsigned IPos;
|
||||
|
||||
/* A Pos is an index in the character window. We use short instead of int to
|
||||
* save space in the various tables. IPos is used only for parameter passing.
|
||||
* save space in the various tables.
|
||||
*/
|
||||
|
||||
typedef struct internal_state {
|
||||
PREFIX3(stream) *strm; /* pointer back to this zlib stream */
|
||||
int status; /* as the name implies */
|
||||
unsigned char *pending_buf; /* output still pending */
|
||||
unsigned long pending_buf_size; /* size of pending_buf */
|
||||
unsigned char *pending_out; /* next pending byte to output to the stream */
|
||||
uint32_t pending_buf_size; /* size of pending_buf */
|
||||
uint32_t pending; /* nb of bytes in the pending buffer */
|
||||
int wrap; /* bit 0 true for zlib, bit 1 true for gzip */
|
||||
PREFIX(gz_headerp) gzhead; /* gzip header information to write */
|
||||
uint32_t gzindex; /* where in extra, name, or comment */
|
||||
unsigned char method; /* can only be DEFLATED */
|
||||
PREFIX(gz_headerp) gzhead; /* gzip header information to write */
|
||||
int status; /* as the name implies */
|
||||
int last_flush; /* value of flush param for previous deflate call */
|
||||
int reproducible; /* Whether reproducible compression results are required. */
|
||||
|
||||
#ifdef X86_PCLMULQDQ_CRC
|
||||
unsigned crc0[4 * 5];
|
||||
#endif
|
||||
int block_open;
|
||||
/* Whether or not a block is currently open for the QUICK deflation scheme.
|
||||
* This is set to 1 if there is an active block, or 0 if the block was just closed.
|
||||
*/
|
||||
|
||||
/* used by deflate.c: */
|
||||
|
||||
unsigned int w_size; /* LZ77 window size (32K by default) */
|
||||
unsigned int w_bits; /* log2(w_size) (8..16) */
|
||||
unsigned int w_mask; /* w_size - 1 */
|
||||
unsigned int lookahead; /* number of valid bytes ahead in window */
|
||||
|
||||
unsigned int high_water;
|
||||
/* High water mark offset in window for initialized bytes -- bytes above
|
||||
* this are set to zero in order to avoid memory check warnings when
|
||||
* longest match routines access bytes past the input. This is then
|
||||
* updated to the new high water mark.
|
||||
*/
|
||||
|
||||
unsigned int window_size;
|
||||
/* Actual size of window: 2*wSize, except when the user input buffer
|
||||
* is directly used as sliding window.
|
||||
*/
|
||||
|
||||
unsigned char *window;
|
||||
/* Sliding window. Input bytes are read into the second half of the window,
|
||||
@ -136,44 +147,24 @@ typedef struct internal_state {
|
||||
* To do: use the user input buffer as sliding window.
|
||||
*/
|
||||
|
||||
unsigned long window_size;
|
||||
/* Actual size of window: 2*wSize, except when the user input buffer
|
||||
* is directly used as sliding window.
|
||||
*/
|
||||
|
||||
Pos *prev;
|
||||
/* Link to older string with same hash index. To limit the size of this
|
||||
* array to 64K, this link is maintained only for the last 32K strings.
|
||||
* An index in this array is thus a window index modulo 32K.
|
||||
*/
|
||||
|
||||
Pos *head; /* Heads of the hash chains or NIL. */
|
||||
Pos *head; /* Heads of the hash chains or 0. */
|
||||
|
||||
unsigned int ins_h; /* hash index of string to be inserted */
|
||||
unsigned int hash_size; /* number of elements in hash table */
|
||||
unsigned int hash_bits; /* log2(hash_size) */
|
||||
unsigned int hash_mask; /* hash_size-1 */
|
||||
|
||||
#if !defined(__x86_64__) && !defined(_M_X64) && !defined(__i386) && !defined(_M_IX86)
|
||||
unsigned int hash_shift;
|
||||
#endif
|
||||
/* Number of bits by which ins_h must be shifted at each input
|
||||
* step. It must be such that after MIN_MATCH steps, the oldest
|
||||
* byte no longer takes part in the hash key, that is:
|
||||
* hash_shift * MIN_MATCH >= hash_bits
|
||||
*/
|
||||
|
||||
long block_start;
|
||||
int block_start;
|
||||
/* Window position at the beginning of the current output block. Gets
|
||||
* negative when the window is moved backwards.
|
||||
*/
|
||||
|
||||
unsigned int match_length; /* length of best match */
|
||||
IPos prev_match; /* previous match */
|
||||
Pos prev_match; /* previous match */
|
||||
int match_available; /* set if previous match exists */
|
||||
unsigned int strstart; /* start of string to insert */
|
||||
unsigned int match_start; /* start of matching string */
|
||||
unsigned int lookahead; /* number of valid bytes ahead in window */
|
||||
|
||||
unsigned int prev_length;
|
||||
/* Length of the best match at previous step. Matches not greater than this
|
||||
@ -181,15 +172,13 @@ typedef struct internal_state {
|
||||
*/
|
||||
|
||||
unsigned int max_chain_length;
|
||||
/* To speed up deflation, hash chains are never searched beyond this
|
||||
* length. A higher limit improves compression ratio but degrades the
|
||||
* speed.
|
||||
/* To speed up deflation, hash chains are never searched beyond this length.
|
||||
* A higher limit improves compression ratio but degrades the speed.
|
||||
*/
|
||||
|
||||
unsigned int max_lazy_match;
|
||||
/* Attempt to find a better match only when the current match is strictly
|
||||
* smaller than this value. This mechanism is used only for compression
|
||||
* levels >= 4.
|
||||
/* Attempt to find a better match only when the current match is strictly smaller
|
||||
* than this value. This mechanism is used only for compression levels >= 4.
|
||||
*/
|
||||
# define max_insert_length max_lazy_match
|
||||
/* Insert new strings in the hash table only if the match length is not
|
||||
@ -205,6 +194,11 @@ typedef struct internal_state {
|
||||
|
||||
int nice_match; /* Stop searching when current match exceeds this */
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
|
||||
/* Only used if X86_PCLMULQDQ_CRC is defined */
|
||||
unsigned crc0[4 * 5];
|
||||
#endif
|
||||
|
||||
/* used by trees.c: */
|
||||
/* Didn't use ct_data typedef below to suppress compiler warning */
|
||||
struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */
|
||||
@ -229,8 +223,6 @@ typedef struct internal_state {
|
||||
/* Depth of each subtree used as tie breaker for trees of equal frequency
|
||||
*/
|
||||
|
||||
unsigned char *sym_buf; /* buffer for distances and literals/lengths */
|
||||
|
||||
unsigned int lit_bufsize;
|
||||
/* Size of match buffer for literals/lengths. There are 4 reasons for
|
||||
* limiting lit_bufsize to 64K:
|
||||
@ -251,41 +243,31 @@ typedef struct internal_state {
|
||||
* - I can't count above 4
|
||||
*/
|
||||
|
||||
unsigned int sym_next; /* running index in sym_buf */
|
||||
unsigned int sym_end; /* symbol table full when sym_next reaches this */
|
||||
unsigned char *sym_buf; /* buffer for distances and literals/lengths */
|
||||
unsigned int sym_next; /* running index in sym_buf */
|
||||
unsigned int sym_end; /* symbol table full when sym_next reaches this */
|
||||
|
||||
unsigned long opt_len; /* bit length of current block with optimal trees */
|
||||
unsigned long static_len; /* bit length of current block with static trees */
|
||||
unsigned int matches; /* number of string matches in current block */
|
||||
unsigned int insert; /* bytes at end of window left to insert */
|
||||
|
||||
#ifdef ZLIB_DEBUG
|
||||
/* compressed_len and bits_sent are only used if ZLIB_DEBUG is defined */
|
||||
unsigned long compressed_len; /* total bit length of compressed file mod 2^32 */
|
||||
unsigned long bits_sent; /* bit length of compressed data sent mod 2^32 */
|
||||
#endif
|
||||
|
||||
uint16_t bi_buf;
|
||||
/* Output buffer. bits are inserted starting at the bottom (least
|
||||
* significant bits).
|
||||
*/
|
||||
int bi_valid;
|
||||
/* Number of valid bits in bi_buf. All bits above the last valid bit
|
||||
* are always zero.
|
||||
*/
|
||||
/* Reserved for future use and alignment purposes */
|
||||
char *reserved_p;
|
||||
|
||||
unsigned long high_water;
|
||||
/* High water mark offset in window for initialized bytes -- bytes above
|
||||
* this are set to zero in order to avoid memory check warnings when
|
||||
* longest match routines access bytes past the input. This is then
|
||||
* updated to the new high water mark.
|
||||
*/
|
||||
int block_open;
|
||||
/* Whether or not a block is currently open for the QUICK deflation scheme.
|
||||
* This is set to 1 if there is an active block, or 0 if the block was just
|
||||
* closed.
|
||||
*/
|
||||
uint64_t bi_buf;
|
||||
/* Output buffer. bits are inserted starting at the bottom (least significant bits). */
|
||||
|
||||
} deflate_state;
|
||||
int32_t bi_valid;
|
||||
/* Number of valid bits in bi_buf. All bits above the last valid bit are always zero. */
|
||||
|
||||
/* Reserved for future use and alignment purposes */
|
||||
int32_t reserved[11];
|
||||
} ALIGNED_(8) deflate_state;
|
||||
|
||||
typedef enum {
|
||||
need_more, /* block not completed, need more input or more output */
|
||||
@ -297,18 +279,88 @@ typedef enum {
|
||||
/* Output a byte on the stream.
|
||||
* IN assertion: there is enough room in pending_buf.
|
||||
*/
|
||||
#define put_byte(s, c) {s->pending_buf[s->pending++] = (unsigned char)(c);}
|
||||
#define put_byte(s, c) { \
|
||||
s->pending_buf[s->pending++] = (unsigned char)(c); \
|
||||
}
|
||||
|
||||
/* ===========================================================================
|
||||
* Output a short LSB first on the stream.
|
||||
* IN assertion: there is enough room in pendingBuf.
|
||||
* IN assertion: there is enough room in pending_buf.
|
||||
*/
|
||||
static inline void put_short(deflate_state *s, uint16_t w) {
|
||||
#if BYTE_ORDER == BIG_ENDIAN
|
||||
w = ZSWAP16(w);
|
||||
#if defined(UNALIGNED_OK)
|
||||
*(uint16_t *)(&s->pending_buf[s->pending]) = w;
|
||||
s->pending += 2;
|
||||
#else
|
||||
put_byte(s, (w & 0xff));
|
||||
put_byte(s, ((w >> 8) & 0xff));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ===========================================================================
|
||||
* Output a short MSB first on the stream.
|
||||
* IN assertion: there is enough room in pending_buf.
|
||||
*/
|
||||
static inline void put_short_msb(deflate_state *s, uint16_t w) {
|
||||
put_byte(s, ((w >> 8) & 0xff));
|
||||
put_byte(s, (w & 0xff));
|
||||
}
|
||||
|
||||
/* ===========================================================================
|
||||
* Output a 32-bit unsigned int LSB first on the stream.
|
||||
* IN assertion: there is enough room in pending_buf.
|
||||
*/
|
||||
static inline void put_uint32(deflate_state *s, uint32_t dw) {
|
||||
#if defined(UNALIGNED_OK)
|
||||
*(uint32_t *)(&s->pending_buf[s->pending]) = dw;
|
||||
s->pending += 4;
|
||||
#else
|
||||
put_byte(s, (dw & 0xff));
|
||||
put_byte(s, ((dw >> 8) & 0xff));
|
||||
put_byte(s, ((dw >> 16) & 0xff));
|
||||
put_byte(s, ((dw >> 24) & 0xff));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ===========================================================================
|
||||
* Output a 32-bit unsigned int MSB first on the stream.
|
||||
* IN assertion: there is enough room in pending_buf.
|
||||
*/
|
||||
static inline void put_uint32_msb(deflate_state *s, uint32_t dw) {
|
||||
#if defined(UNALIGNED_OK)
|
||||
*(uint32_t *)(&s->pending_buf[s->pending]) = ZSWAP32(dw);
|
||||
s->pending += 4;
|
||||
#else
|
||||
put_byte(s, ((dw >> 24) & 0xff));
|
||||
put_byte(s, ((dw >> 16) & 0xff));
|
||||
put_byte(s, ((dw >> 8) & 0xff));
|
||||
put_byte(s, (dw & 0xff));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ===========================================================================
|
||||
* Output a 64-bit unsigned int LSB first on the stream.
|
||||
* IN assertion: there is enough room in pending_buf.
|
||||
*/
|
||||
static inline void put_uint64(deflate_state *s, uint64_t lld) {
|
||||
#if defined(UNALIGNED64_OK)
|
||||
*(uint64_t *)(&s->pending_buf[s->pending]) = lld;
|
||||
s->pending += 8;
|
||||
#elif defined(UNALIGNED_OK)
|
||||
*(uint32_t *)(&s->pending_buf[s->pending]) = lld & 0xffffffff;
|
||||
s->pending += 4;
|
||||
*(uint32_t *)(&s->pending_buf[s->pending]) = (lld >> 32) & 0xffffffff;
|
||||
s->pending += 4;
|
||||
#else
|
||||
put_byte(s, (lld & 0xff));
|
||||
put_byte(s, ((lld >> 8) & 0xff));
|
||||
put_byte(s, ((lld >> 16) & 0xff));
|
||||
put_byte(s, ((lld >> 24) & 0xff));
|
||||
put_byte(s, ((lld >> 32) & 0xff));
|
||||
put_byte(s, ((lld >> 40) & 0xff));
|
||||
put_byte(s, ((lld >> 48) & 0xff));
|
||||
put_byte(s, ((lld >> 56) & 0xff));
|
||||
#endif
|
||||
memcpy(&(s->pending_buf[s->pending]), &w, sizeof(uint16_t));
|
||||
s->pending += 2;
|
||||
}
|
||||
|
||||
#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
|
||||
@ -326,120 +378,34 @@ static inline void put_short(deflate_state *s, uint16_t w) {
|
||||
memory checker errors from longest match routines */
|
||||
|
||||
|
||||
void ZLIB_INTERNAL fill_window_c(deflate_state *s);
|
||||
void Z_INTERNAL fill_window(deflate_state *s);
|
||||
void Z_INTERNAL slide_hash_c(deflate_state *s);
|
||||
|
||||
/* in trees.c */
|
||||
void ZLIB_INTERNAL _tr_init(deflate_state *s);
|
||||
int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc);
|
||||
void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, char *buf, unsigned long stored_len, int last);
|
||||
void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s);
|
||||
void ZLIB_INTERNAL _tr_align(deflate_state *s);
|
||||
void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, char *buf, unsigned long stored_len, int last);
|
||||
void ZLIB_INTERNAL bi_windup(deflate_state *s);
|
||||
unsigned ZLIB_INTERNAL bi_reverse(unsigned code, int len);
|
||||
void ZLIB_INTERNAL flush_pending(PREFIX3(streamp) strm);
|
||||
|
||||
#define d_code(dist) ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
|
||||
void Z_INTERNAL zng_tr_init(deflate_state *s);
|
||||
void Z_INTERNAL zng_tr_flush_block(deflate_state *s, char *buf, uint32_t stored_len, int last);
|
||||
void Z_INTERNAL zng_tr_flush_bits(deflate_state *s);
|
||||
void Z_INTERNAL zng_tr_align(deflate_state *s);
|
||||
void Z_INTERNAL zng_tr_stored_block(deflate_state *s, char *buf, uint32_t stored_len, int last);
|
||||
unsigned Z_INTERNAL bi_reverse(unsigned code, int len);
|
||||
void Z_INTERNAL flush_pending(PREFIX3(streamp) strm);
|
||||
#define d_code(dist) ((dist) < 256 ? zng_dist_code[dist] : zng_dist_code[256+((dist)>>7)])
|
||||
/* Mapping from a distance to a distance code. dist is the distance - 1 and
|
||||
* must not have side effects. _dist_code[256] and _dist_code[257] are never
|
||||
* must not have side effects. zng_dist_code[256] and zng_dist_code[257] are never
|
||||
* used.
|
||||
*/
|
||||
|
||||
#ifndef ZLIB_DEBUG
|
||||
/* Inline versions of _tr_tally for speed: */
|
||||
|
||||
# if defined(GEN_TREES_H)
|
||||
extern unsigned char ZLIB_INTERNAL _length_code[];
|
||||
extern unsigned char ZLIB_INTERNAL _dist_code[];
|
||||
# else
|
||||
extern const unsigned char ZLIB_INTERNAL _length_code[];
|
||||
extern const unsigned char ZLIB_INTERNAL _dist_code[];
|
||||
# endif
|
||||
|
||||
# define _tr_tally_lit(s, c, flush) \
|
||||
{ unsigned char cc = (c); \
|
||||
s->sym_buf[s->sym_next++] = 0; \
|
||||
s->sym_buf[s->sym_next++] = 0; \
|
||||
s->sym_buf[s->sym_next++] = cc; \
|
||||
s->dyn_ltree[cc].Freq++; \
|
||||
flush = (s->sym_next == s->sym_end); \
|
||||
}
|
||||
# define _tr_tally_dist(s, distance, length, flush) \
|
||||
{ unsigned char len = (unsigned char)(length); \
|
||||
uint16_t dist = (uint16_t)(distance); \
|
||||
s->sym_buf[s->sym_next++] = dist; \
|
||||
s->sym_buf[s->sym_next++] = dist >> 8; \
|
||||
s->sym_buf[s->sym_next++] = len; \
|
||||
dist--; \
|
||||
s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
|
||||
s->dyn_dtree[d_code(dist)].Freq++; \
|
||||
flush = (s->sym_next == s->sym_end); \
|
||||
}
|
||||
#else
|
||||
# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
|
||||
# define _tr_tally_dist(s, distance, length, flush) \
|
||||
flush = _tr_tally(s, (unsigned)(distance), (unsigned)(length))
|
||||
#endif
|
||||
|
||||
/* ===========================================================================
|
||||
* Update a hash value with the given input byte
|
||||
* IN assertion: all calls to to UPDATE_HASH are made with consecutive
|
||||
* input characters, so that a running hash key can be computed from the
|
||||
* previous key instead of complete recalculation each time.
|
||||
*/
|
||||
|
||||
#ifdef NOT_TWEAK_COMPILER
|
||||
#define TRIGGER_LEVEL 6
|
||||
#else
|
||||
#define TRIGGER_LEVEL 5
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
|
||||
#define UPDATE_HASH(s, h, i) \
|
||||
do {\
|
||||
if (s->level < TRIGGER_LEVEL) \
|
||||
h = (3483 * (s->window[i]) +\
|
||||
23081* (s->window[i+1]) +\
|
||||
6954 * (s->window[i+2]) +\
|
||||
20947* (s->window[i+3])) & s->hash_mask;\
|
||||
else\
|
||||
h = (25881* (s->window[i]) +\
|
||||
24674* (s->window[i+1]) +\
|
||||
25811* (s->window[i+2])) & s->hash_mask;\
|
||||
} while (0)
|
||||
#else
|
||||
# define UPDATE_HASH(s, h, i) (h = (((h) << s->hash_shift) ^ (s->window[i + (MIN_MATCH-1)])) & s->hash_mask)
|
||||
#endif
|
||||
|
||||
#ifndef ZLIB_DEBUG
|
||||
# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
|
||||
/* Send a code of the given tree. c and tree must not have side effects */
|
||||
|
||||
#else /* ZLIB_DEBUG */
|
||||
# define send_code(s, c, tree) \
|
||||
{ if (z_verbose > 2) { \
|
||||
fprintf(stderr, "\ncd %3d ", (c)); \
|
||||
} \
|
||||
send_bits(s, tree[c].Code, tree[c].Len); \
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Bit buffer and compress bits calculation debugging */
|
||||
#ifdef ZLIB_DEBUG
|
||||
void send_bits(deflate_state *s, int value, int length);
|
||||
# define cmpr_bits_add(s, len) s->compressed_len += (len)
|
||||
# define cmpr_bits_align(s) s->compressed_len = (s->compressed_len + 7) & ~7L
|
||||
# define sent_bits_add(s, bits) s->bits_sent += (bits)
|
||||
# define sent_bits_align(s) s->bits_sent = (s->bits_sent + 7) & ~7L
|
||||
#else
|
||||
#define send_bits(s, value, length) \
|
||||
{ int len = length;\
|
||||
if (s->bi_valid > (int)Buf_size - len) {\
|
||||
int val = (int)value;\
|
||||
s->bi_buf |= (uint16_t)val << s->bi_valid;\
|
||||
put_short(s, s->bi_buf);\
|
||||
s->bi_buf = (uint16_t)val >> (Buf_size - s->bi_valid);\
|
||||
s->bi_valid += len - Buf_size;\
|
||||
} else {\
|
||||
s->bi_buf |= (uint16_t)(value) << s->bi_valid;\
|
||||
s->bi_valid += len;\
|
||||
}\
|
||||
}
|
||||
# define cmpr_bits_add(s, len) (void)(len)
|
||||
# define cmpr_bits_align(s)
|
||||
# define sent_bits_add(s, bits) (void)(bits)
|
||||
# define sent_bits_align(s)
|
||||
#endif
|
||||
|
||||
#endif /* DEFLATE_H_ */
|
||||
|
||||
@ -7,7 +7,6 @@
|
||||
#include "zbuild.h"
|
||||
#include "deflate.h"
|
||||
#include "deflate_p.h"
|
||||
#include "match_p.h"
|
||||
#include "functable.h"
|
||||
|
||||
/* ===========================================================================
|
||||
@ -17,9 +16,11 @@
|
||||
* new strings in the dictionary only for unmatched strings or for short
|
||||
* matches. It is used only for the fast compression options.
|
||||
*/
|
||||
ZLIB_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
|
||||
IPos hash_head; /* head of the hash chain */
|
||||
int bflush; /* set if current block must be flushed */
|
||||
Z_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
|
||||
Pos hash_head; /* head of the hash chain */
|
||||
int bflush = 0; /* set if current block must be flushed */
|
||||
int64_t dist;
|
||||
uint32_t match_len = 0;
|
||||
|
||||
for (;;) {
|
||||
/* Make sure that we always have enough lookahead, except
|
||||
@ -28,93 +29,78 @@ ZLIB_INTERNAL block_state deflate_fast(deflate_state *s, int flush) {
|
||||
* string following the next match.
|
||||
*/
|
||||
if (s->lookahead < MIN_LOOKAHEAD) {
|
||||
functable.fill_window(s);
|
||||
if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
|
||||
fill_window(s);
|
||||
if (UNLIKELY(s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH)) {
|
||||
return need_more;
|
||||
}
|
||||
if (s->lookahead == 0)
|
||||
if (UNLIKELY(s->lookahead == 0))
|
||||
break; /* flush the current block */
|
||||
}
|
||||
|
||||
/* Insert the string window[strstart .. strstart+2] in the
|
||||
* dictionary, and set hash_head to the head of the hash chain:
|
||||
*/
|
||||
hash_head = NIL;
|
||||
if (s->lookahead >= MIN_MATCH) {
|
||||
hash_head = functable.insert_string(s, s->strstart, 1);
|
||||
}
|
||||
hash_head = functable.quick_insert_string(s, s->strstart);
|
||||
dist = (int64_t)s->strstart - hash_head;
|
||||
|
||||
/* Find the longest match, discarding those <= prev_length.
|
||||
* At this point we have always match_length < MIN_MATCH
|
||||
*/
|
||||
if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) {
|
||||
/* To simplify the code, we prevent matches with the string
|
||||
* of window index 0 (in particular we have to avoid a match
|
||||
* of the string with itself at the start of the input file).
|
||||
/* Find the longest match, discarding those <= prev_length.
|
||||
* At this point we have always match length < MIN_MATCH
|
||||
*/
|
||||
s->match_length = longest_match(s, hash_head);
|
||||
/* longest_match() sets match_start */
|
||||
|
||||
if (dist <= MAX_DIST(s) && dist > 0) {
|
||||
/* To simplify the code, we prevent matches with the string
|
||||
* of window index 0 (in particular we have to avoid a match
|
||||
* of the string with itself at the start of the input file).
|
||||
*/
|
||||
match_len = functable.longest_match(s, hash_head);
|
||||
/* longest_match() sets match_start */
|
||||
}
|
||||
}
|
||||
if (s->match_length >= MIN_MATCH) {
|
||||
check_match(s, s->strstart, s->match_start, s->match_length);
|
||||
|
||||
_tr_tally_dist(s, s->strstart - s->match_start, s->match_length - MIN_MATCH, bflush);
|
||||
if (match_len >= MIN_MATCH) {
|
||||
check_match(s, s->strstart, s->match_start, match_len);
|
||||
|
||||
s->lookahead -= s->match_length;
|
||||
bflush = zng_tr_tally_dist(s, s->strstart - s->match_start, match_len - MIN_MATCH);
|
||||
|
||||
s->lookahead -= match_len;
|
||||
|
||||
/* Insert new strings in the hash table only if the match length
|
||||
* is not too large. This saves time but degrades compression.
|
||||
*/
|
||||
if (s->match_length <= s->max_insert_length && s->lookahead >= MIN_MATCH) {
|
||||
s->match_length--; /* string at strstart already in table */
|
||||
if (match_len <= s->max_insert_length && s->lookahead >= MIN_MATCH) {
|
||||
match_len--; /* string at strstart already in table */
|
||||
s->strstart++;
|
||||
#ifdef NOT_TWEAK_COMPILER
|
||||
do {
|
||||
functable.insert_string(s, s->strstart, 1);
|
||||
s->strstart++;
|
||||
/* strstart never exceeds WSIZE-MAX_MATCH, so there are
|
||||
* always MIN_MATCH bytes ahead.
|
||||
*/
|
||||
} while (--s->match_length != 0);
|
||||
#else
|
||||
{
|
||||
functable.insert_string(s, s->strstart, s->match_length);
|
||||
s->strstart += s->match_length;
|
||||
s->match_length = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
functable.insert_string(s, s->strstart, match_len);
|
||||
s->strstart += match_len;
|
||||
} else {
|
||||
s->strstart += s->match_length;
|
||||
s->match_length = 0;
|
||||
s->ins_h = s->window[s->strstart];
|
||||
#ifndef NOT_TWEAK_COMPILER
|
||||
s->strstart += match_len;
|
||||
#if MIN_MATCH != 3
|
||||
functable.insert_string(s, s->strstart + 2 - MIN_MATCH, MIN_MATCH - 2);
|
||||
#else
|
||||
functable.insert_string(s, s->strstart + 2 - MIN_MATCH, 1);
|
||||
#if MIN_MATCH != 3
|
||||
#warning Call insert_string() MIN_MATCH-3 more times
|
||||
#endif
|
||||
functable.quick_insert_string(s, s->strstart + 2 - MIN_MATCH);
|
||||
#endif
|
||||
/* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
|
||||
* matter since it will be recomputed at next deflate call.
|
||||
*/
|
||||
}
|
||||
match_len = 0;
|
||||
} else {
|
||||
/* No match, output a literal byte */
|
||||
Tracevv((stderr, "%c", s->window[s->strstart]));
|
||||
_tr_tally_lit(s, s->window[s->strstart], bflush);
|
||||
bflush = zng_tr_tally_lit(s, s->window[s->strstart]);
|
||||
s->lookahead--;
|
||||
s->strstart++;
|
||||
}
|
||||
if (bflush)
|
||||
if (UNLIKELY(bflush))
|
||||
FLUSH_BLOCK(s, 0);
|
||||
}
|
||||
s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
|
||||
if (flush == Z_FINISH) {
|
||||
if (UNLIKELY(flush == Z_FINISH)) {
|
||||
FLUSH_BLOCK(s, 1);
|
||||
return finish_done;
|
||||
}
|
||||
if (s->sym_next)
|
||||
if (UNLIKELY(s->sym_next))
|
||||
FLUSH_BLOCK(s, 0);
|
||||
return block_done;
|
||||
}
|
||||
|
||||
@ -7,72 +7,50 @@
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
#ifndef NO_MEDIUM_STRATEGY
|
||||
#include <stdint.h>
|
||||
#include "zbuild.h"
|
||||
#include "deflate.h"
|
||||
#include "deflate_p.h"
|
||||
#include "match_p.h"
|
||||
#include "functable.h"
|
||||
|
||||
struct match {
|
||||
unsigned int match_start;
|
||||
unsigned int match_length;
|
||||
unsigned int strstart;
|
||||
unsigned int orgstart;
|
||||
uint16_t match_start;
|
||||
uint16_t match_length;
|
||||
uint16_t strstart;
|
||||
uint16_t orgstart;
|
||||
};
|
||||
|
||||
#define MAX_DIST2 ((1 << MAX_WBITS) - MIN_LOOKAHEAD)
|
||||
|
||||
static int tr_tally_dist(deflate_state *s, int distance, int length) {
|
||||
return _tr_tally(s, distance, length);
|
||||
}
|
||||
|
||||
static int tr_tally_lit(deflate_state *s, int c) {
|
||||
return _tr_tally(s, 0, c);
|
||||
}
|
||||
|
||||
static int emit_match(deflate_state *s, struct match match) {
|
||||
int flush = 0;
|
||||
int bflush = 0;
|
||||
|
||||
/* matches that are not long enough we need to emit as literals */
|
||||
if (match.match_length < MIN_MATCH) {
|
||||
while (match.match_length) {
|
||||
flush += tr_tally_lit(s, s->window[match.strstart]);
|
||||
bflush += zng_tr_tally_lit(s, s->window[match.strstart]);
|
||||
s->lookahead--;
|
||||
match.strstart++;
|
||||
match.match_length--;
|
||||
}
|
||||
return flush;
|
||||
return bflush;
|
||||
}
|
||||
|
||||
check_match(s, match.strstart, match.match_start, match.match_length);
|
||||
|
||||
flush += tr_tally_dist(s, match.strstart - match.match_start, match.match_length - MIN_MATCH);
|
||||
bflush += zng_tr_tally_dist(s, match.strstart - match.match_start, match.match_length - MIN_MATCH);
|
||||
|
||||
s->lookahead -= match.match_length;
|
||||
return flush;
|
||||
return bflush;
|
||||
}
|
||||
|
||||
static void insert_match(deflate_state *s, struct match match) {
|
||||
if (unlikely(s->lookahead <= match.match_length + MIN_MATCH))
|
||||
if (UNLIKELY(s->lookahead <= (unsigned int)(match.match_length + MIN_MATCH)))
|
||||
return;
|
||||
|
||||
/* matches that are not long enough we need to emit as literals */
|
||||
if (match.match_length < MIN_MATCH) {
|
||||
#ifdef NOT_TWEAK_COMPILER
|
||||
while (match.match_length) {
|
||||
match.strstart++;
|
||||
match.match_length--;
|
||||
|
||||
if (match.match_length) {
|
||||
if (match.strstart >= match.orgstart) {
|
||||
functable.insert_string(s, match.strstart, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (LIKELY(match.match_length < MIN_MATCH)) {
|
||||
match.strstart++;
|
||||
match.match_length--;
|
||||
if (match.match_length > 0) {
|
||||
if (UNLIKELY(match.match_length > 0)) {
|
||||
if (match.strstart >= match.orgstart) {
|
||||
if (match.strstart + match.match_length - 1 >= match.orgstart) {
|
||||
functable.insert_string(s, match.strstart, match.match_length);
|
||||
@ -83,7 +61,6 @@ static void insert_match(deflate_state *s, struct match match) {
|
||||
match.match_length = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
@ -93,48 +70,35 @@ static void insert_match(deflate_state *s, struct match match) {
|
||||
if (match.match_length <= 16* s->max_insert_length && s->lookahead >= MIN_MATCH) {
|
||||
match.match_length--; /* string at strstart already in table */
|
||||
match.strstart++;
|
||||
#ifdef NOT_TWEAK_COMPILER
|
||||
do {
|
||||
if (likely(match.strstart >= match.orgstart)) {
|
||||
functable.insert_string(s, match.strstart, 1);
|
||||
}
|
||||
match.strstart++;
|
||||
/* strstart never exceeds WSIZE-MAX_MATCH, so there are
|
||||
* always MIN_MATCH bytes ahead.
|
||||
*/
|
||||
} while (--match.match_length != 0);
|
||||
#else
|
||||
if (likely(match.strstart >= match.orgstart)) {
|
||||
if (likely(match.strstart + match.match_length - 1 >= match.orgstart)) {
|
||||
|
||||
if (LIKELY(match.strstart >= match.orgstart)) {
|
||||
if (LIKELY(match.strstart + match.match_length - 1 >= match.orgstart)) {
|
||||
functable.insert_string(s, match.strstart, match.match_length);
|
||||
} else {
|
||||
functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
|
||||
}
|
||||
} else if (match.orgstart < match.strstart + match.match_length) {
|
||||
functable.insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart);
|
||||
}
|
||||
match.strstart += match.match_length;
|
||||
match.match_length = 0;
|
||||
#endif
|
||||
} else {
|
||||
match.strstart += match.match_length;
|
||||
match.match_length = 0;
|
||||
s->ins_h = s->window[match.strstart];
|
||||
if (match.strstart >= (MIN_MATCH - 2))
|
||||
#ifndef NOT_TWEAK_COMPILER
|
||||
#if MIN_MATCH != 3
|
||||
functable.insert_string(s, match.strstart + 2 - MIN_MATCH, MIN_MATCH - 2);
|
||||
#else
|
||||
functable.insert_string(s, match.strstart + 2 - MIN_MATCH, 1);
|
||||
#if MIN_MATCH != 3
|
||||
#warning Call insert_string() MIN_MATCH-3 more times
|
||||
functable.quick_insert_string(s, match.strstart + 2 - MIN_MATCH);
|
||||
#endif
|
||||
#endif
|
||||
/* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
|
||||
* matter since it will be recomputed at next deflate call.
|
||||
*/
|
||||
/* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
|
||||
* matter since it will be recomputed at next deflate call.
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
static void fizzle_matches(deflate_state *s, struct match *current, struct match *next) {
|
||||
IPos limit;
|
||||
Pos limit;
|
||||
unsigned char *match, *orig;
|
||||
int changed = 0;
|
||||
struct match c, n;
|
||||
@ -143,36 +107,36 @@ static void fizzle_matches(deflate_state *s, struct match *current, struct match
|
||||
if (current->match_length <= 1)
|
||||
return;
|
||||
|
||||
if (unlikely(current->match_length > 1 + next->match_start))
|
||||
if (UNLIKELY(current->match_length > 1 + next->match_start))
|
||||
return;
|
||||
|
||||
if (unlikely(current->match_length > 1 + next->strstart))
|
||||
if (UNLIKELY(current->match_length > 1 + next->strstart))
|
||||
return;
|
||||
|
||||
match = s->window - current->match_length + 1 + next->match_start;
|
||||
orig = s->window - current->match_length + 1 + next->strstart;
|
||||
|
||||
/* quick exit check.. if this fails then don't bother with anything else */
|
||||
if (likely(*match != *orig))
|
||||
if (LIKELY(*match != *orig))
|
||||
return;
|
||||
|
||||
c = *current;
|
||||
n = *next;
|
||||
|
||||
/* step one: try to move the "next" match to the left as much as possible */
|
||||
limit = next->strstart > MAX_DIST2 ? next->strstart - MAX_DIST2 : 0;
|
||||
limit = next->strstart > MAX_DIST(s) ? next->strstart - (Pos)MAX_DIST(s) : 0;
|
||||
|
||||
match = s->window + n.match_start - 1;
|
||||
orig = s->window + n.strstart - 1;
|
||||
|
||||
while (*match == *orig) {
|
||||
if (c.match_length < 1)
|
||||
if (UNLIKELY(c.match_length < 1))
|
||||
break;
|
||||
if (n.strstart <= limit)
|
||||
if (UNLIKELY(n.strstart <= limit))
|
||||
break;
|
||||
if (n.match_length >= 256)
|
||||
if (UNLIKELY(n.match_length >= 256))
|
||||
break;
|
||||
if (n.match_start <= 1)
|
||||
if (UNLIKELY(n.match_start <= 1))
|
||||
break;
|
||||
|
||||
n.strstart--;
|
||||
@ -196,15 +160,18 @@ static void fizzle_matches(deflate_state *s, struct match *current, struct match
|
||||
}
|
||||
}
|
||||
|
||||
ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
|
||||
struct match current_match, next_match;
|
||||
Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
|
||||
/* Align the first struct to start on a new cacheline, this allows us to fit both structs in one cacheline */
|
||||
ALIGNED_(16) struct match current_match;
|
||||
struct match next_match;
|
||||
|
||||
memset(¤t_match, 0, sizeof(struct match));
|
||||
memset(&next_match, 0, sizeof(struct match));
|
||||
|
||||
for (;;) {
|
||||
IPos hash_head = 0; /* head of the hash chain */
|
||||
int bflush; /* set if current block must be flushed */
|
||||
Pos hash_head = 0; /* head of the hash chain */
|
||||
int bflush = 0; /* set if current block must be flushed */
|
||||
int64_t dist;
|
||||
|
||||
/* Make sure that we always have enough lookahead, except
|
||||
* at the end of the input file. We need MAX_MATCH bytes
|
||||
@ -212,15 +179,14 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
|
||||
* string following the next current_match.
|
||||
*/
|
||||
if (s->lookahead < MIN_LOOKAHEAD) {
|
||||
functable.fill_window(s);
|
||||
fill_window(s);
|
||||
if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
|
||||
return need_more;
|
||||
}
|
||||
if (s->lookahead == 0)
|
||||
if (UNLIKELY(s->lookahead == 0))
|
||||
break; /* flush the current block */
|
||||
next_match.match_length = 0;
|
||||
}
|
||||
s->prev_length = 2;
|
||||
|
||||
/* Insert the string window[strstart .. strstart+2] in the
|
||||
* dictionary, and set hash_head to the head of the hash chain:
|
||||
@ -230,63 +196,63 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
|
||||
if (next_match.match_length > 0) {
|
||||
current_match = next_match;
|
||||
next_match.match_length = 0;
|
||||
|
||||
} else {
|
||||
hash_head = 0;
|
||||
if (s->lookahead >= MIN_MATCH) {
|
||||
hash_head = functable.insert_string(s, s->strstart, 1);
|
||||
hash_head = functable.quick_insert_string(s, s->strstart);
|
||||
}
|
||||
|
||||
/* set up the initial match to be a 1 byte literal */
|
||||
current_match.match_start = 0;
|
||||
current_match.match_length = 1;
|
||||
current_match.strstart = s->strstart;
|
||||
current_match.strstart = (uint16_t)s->strstart;
|
||||
current_match.orgstart = current_match.strstart;
|
||||
|
||||
/* Find the longest match, discarding those <= prev_length.
|
||||
* At this point we have always match_length < MIN_MATCH
|
||||
*/
|
||||
|
||||
if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST2) {
|
||||
dist = (int64_t)s->strstart - hash_head;
|
||||
if (dist <= MAX_DIST(s) && dist > 0) {
|
||||
/* To simplify the code, we prevent matches with the string
|
||||
* of window index 0 (in particular we have to avoid a match
|
||||
* of the string with itself at the start of the input file).
|
||||
*/
|
||||
current_match.match_length = longest_match(s, hash_head);
|
||||
current_match.match_start = s->match_start;
|
||||
if (current_match.match_length < MIN_MATCH)
|
||||
current_match.match_length = (uint16_t)functable.longest_match(s, hash_head);
|
||||
current_match.match_start = (uint16_t)s->match_start;
|
||||
if (UNLIKELY(current_match.match_length < MIN_MATCH))
|
||||
current_match.match_length = 1;
|
||||
if (current_match.match_start >= current_match.strstart) {
|
||||
if (UNLIKELY(current_match.match_start >= current_match.strstart)) {
|
||||
/* this can happen due to some restarts */
|
||||
current_match.match_length = 1;
|
||||
}
|
||||
} else {
|
||||
/* Set up the match to be a 1 byte literal */
|
||||
current_match.match_start = 0;
|
||||
current_match.match_length = 1;
|
||||
}
|
||||
}
|
||||
|
||||
insert_match(s, current_match);
|
||||
|
||||
/* now, look ahead one */
|
||||
if (s->lookahead > MIN_LOOKAHEAD && (current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD)) {
|
||||
if (LIKELY(s->lookahead > MIN_LOOKAHEAD && (uint32_t)(current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD))) {
|
||||
s->strstart = current_match.strstart + current_match.match_length;
|
||||
hash_head = functable.insert_string(s, s->strstart, 1);
|
||||
hash_head = functable.quick_insert_string(s, s->strstart);
|
||||
|
||||
/* set up the initial match to be a 1 byte literal */
|
||||
next_match.match_start = 0;
|
||||
next_match.match_length = 1;
|
||||
next_match.strstart = s->strstart;
|
||||
next_match.strstart = (uint16_t)s->strstart;
|
||||
next_match.orgstart = next_match.strstart;
|
||||
|
||||
/* Find the longest match, discarding those <= prev_length.
|
||||
* At this point we have always match_length < MIN_MATCH
|
||||
*/
|
||||
if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST2) {
|
||||
|
||||
dist = (int64_t)s->strstart - hash_head;
|
||||
if (dist <= MAX_DIST(s) && dist > 0) {
|
||||
/* To simplify the code, we prevent matches with the string
|
||||
* of window index 0 (in particular we have to avoid a match
|
||||
* of the string with itself at the start of the input file).
|
||||
*/
|
||||
next_match.match_length = longest_match(s, hash_head);
|
||||
next_match.match_start = s->match_start;
|
||||
if (next_match.match_start >= next_match.strstart) {
|
||||
next_match.match_length = (uint16_t)functable.longest_match(s, hash_head);
|
||||
next_match.match_start = (uint16_t)s->match_start;
|
||||
if (UNLIKELY(next_match.match_start >= next_match.strstart)) {
|
||||
/* this can happen due to some restarts */
|
||||
next_match.match_length = 1;
|
||||
}
|
||||
@ -294,13 +260,13 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
|
||||
next_match.match_length = 1;
|
||||
else
|
||||
fizzle_matches(s, ¤t_match, &next_match);
|
||||
} else {
|
||||
/* Set up the match to be a 1 byte literal */
|
||||
next_match.match_start = 0;
|
||||
next_match.match_length = 1;
|
||||
}
|
||||
|
||||
/* short matches with a very long distance are rarely a good idea encoding wise */
|
||||
if (next_match.match_length == 3 && (next_match.strstart - next_match.match_start) > 12000)
|
||||
next_match.match_length = 1;
|
||||
s->strstart = current_match.strstart;
|
||||
|
||||
} else {
|
||||
next_match.match_length = 0;
|
||||
}
|
||||
@ -311,7 +277,7 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
|
||||
/* move the "cursor" forward */
|
||||
s->strstart += current_match.match_length;
|
||||
|
||||
if (bflush)
|
||||
if (UNLIKELY(bflush))
|
||||
FLUSH_BLOCK(s, 0);
|
||||
}
|
||||
s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
|
||||
@ -319,7 +285,7 @@ ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
|
||||
FLUSH_BLOCK(s, 1);
|
||||
return finish_done;
|
||||
}
|
||||
if (s->sym_next)
|
||||
if (UNLIKELY(s->sym_next))
|
||||
FLUSH_BLOCK(s, 0);
|
||||
|
||||
return block_done;
|
||||
|
||||
@ -12,39 +12,45 @@
|
||||
/* Forward declare common non-inlined functions declared in deflate.c */
|
||||
|
||||
#ifdef ZLIB_DEBUG
|
||||
void check_match(deflate_state *s, IPos start, IPos match, int length);
|
||||
void check_match(deflate_state *s, Pos start, Pos match, int length);
|
||||
#else
|
||||
#define check_match(s, start, match, length)
|
||||
#endif
|
||||
void flush_pending(PREFIX3(stream) *strm);
|
||||
|
||||
/* ===========================================================================
|
||||
* Insert string str in the dictionary and set match_head to the previous head
|
||||
* of the hash chain (the most recent string with same hash key). Return
|
||||
* the previous length of the hash chain.
|
||||
* IN assertion: all calls to to INSERT_STRING are made with consecutive
|
||||
* input characters and the first MIN_MATCH bytes of str are valid
|
||||
* (except for the last MIN_MATCH-1 bytes of the input file).
|
||||
* Save the match info and tally the frequency counts. Return true if
|
||||
* the current block must be flushed.
|
||||
*/
|
||||
|
||||
static inline Pos insert_string_c(deflate_state *const s, const Pos str, unsigned int count) {
|
||||
Pos ret = 0;
|
||||
unsigned int idx;
|
||||
extern const unsigned char Z_INTERNAL zng_length_code[];
|
||||
extern const unsigned char Z_INTERNAL zng_dist_code[];
|
||||
|
||||
for (idx = 0; idx < count; idx++) {
|
||||
UPDATE_HASH(s, s->ins_h, str+idx);
|
||||
static inline int zng_tr_tally_lit(deflate_state *s, unsigned char c) {
|
||||
/* c is the unmatched char */
|
||||
s->sym_buf[s->sym_next++] = 0;
|
||||
s->sym_buf[s->sym_next++] = 0;
|
||||
s->sym_buf[s->sym_next++] = c;
|
||||
s->dyn_ltree[c].Freq++;
|
||||
Tracevv((stderr, "%c", c));
|
||||
Assert(c <= (MAX_MATCH-MIN_MATCH), "zng_tr_tally: bad literal");
|
||||
return (s->sym_next == s->sym_end);
|
||||
}
|
||||
|
||||
Pos head = s->head[s->ins_h];
|
||||
if (head != str+idx) {
|
||||
s->prev[(str+idx) & s->w_mask] = head;
|
||||
s->head[s->ins_h] = str+idx;
|
||||
if (idx == count - 1)
|
||||
ret = head;
|
||||
} else if (idx == count - 1) {
|
||||
ret = str + idx;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
static inline int zng_tr_tally_dist(deflate_state *s, uint32_t dist, uint32_t len) {
|
||||
/* dist: distance of matched string */
|
||||
/* len: match length-MIN_MATCH */
|
||||
s->sym_buf[s->sym_next++] = (uint8_t)(dist);
|
||||
s->sym_buf[s->sym_next++] = (uint8_t)(dist >> 8);
|
||||
s->sym_buf[s->sym_next++] = (uint8_t)len;
|
||||
s->matches++;
|
||||
dist--;
|
||||
Assert(dist < MAX_DIST(s) && (uint16_t)d_code(dist) < (uint16_t)D_CODES,
|
||||
"zng_tr_tally: bad match");
|
||||
|
||||
s->dyn_ltree[zng_length_code[len]+LITERALS+1].Freq++;
|
||||
s->dyn_dtree[d_code(dist)].Freq++;
|
||||
return (s->sym_next == s->sym_end);
|
||||
}
|
||||
|
||||
/* ===========================================================================
|
||||
@ -52,14 +58,13 @@ static inline Pos insert_string_c(deflate_state *const s, const Pos str, unsigne
|
||||
* IN assertion: strstart is set to the end of the current match.
|
||||
*/
|
||||
#define FLUSH_BLOCK_ONLY(s, last) { \
|
||||
_tr_flush_block(s, (s->block_start >= 0L ? \
|
||||
zng_tr_flush_block(s, (s->block_start >= 0 ? \
|
||||
(char *)&s->window[(unsigned)s->block_start] : \
|
||||
NULL), \
|
||||
(unsigned long)((long)s->strstart - s->block_start), \
|
||||
(uint32_t)((int)s->strstart - s->block_start), \
|
||||
(last)); \
|
||||
s->block_start = s->strstart; \
|
||||
s->block_start = (int)s->strstart; \
|
||||
flush_pending(s->strm); \
|
||||
Tracev((stderr, "[FLUSH]")); \
|
||||
}
|
||||
|
||||
/* Same but force premature exit if necessary. */
|
||||
|
||||
121
libs/zlibng/deflate_quick.c
Normal file
121
libs/zlibng/deflate_quick.c
Normal file
@ -0,0 +1,121 @@
|
||||
/*
|
||||
* The deflate_quick deflate strategy, designed to be used when cycles are
|
||||
* at a premium.
|
||||
*
|
||||
* Copyright (C) 2013 Intel Corporation. All rights reserved.
|
||||
* Authors:
|
||||
* Wajdi Feghali <wajdi.k.feghali@intel.com>
|
||||
* Jim Guilford <james.guilford@intel.com>
|
||||
* Vinodh Gopal <vinodh.gopal@intel.com>
|
||||
* Erdinc Ozturk <erdinc.ozturk@intel.com>
|
||||
* Jim Kukunas <james.t.kukunas@linux.intel.com>
|
||||
*
|
||||
* Portions are Copyright (C) 2016 12Sided Technology, LLC.
|
||||
* Author:
|
||||
* Phil Vachon <pvachon@12sidedtech.com>
|
||||
*
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "deflate.h"
|
||||
#include "deflate_p.h"
|
||||
#include "functable.h"
|
||||
#include "trees_emit.h"
|
||||
|
||||
extern const ct_data static_ltree[L_CODES+2];
|
||||
extern const ct_data static_dtree[D_CODES];
|
||||
|
||||
#define QUICK_START_BLOCK(s, last) { \
|
||||
zng_tr_emit_tree(s, STATIC_TREES, last); \
|
||||
s->block_open = 1 + (int)last; \
|
||||
s->block_start = (int)s->strstart; \
|
||||
}
|
||||
|
||||
#define QUICK_END_BLOCK(s, last) { \
|
||||
if (s->block_open) { \
|
||||
zng_tr_emit_end_block(s, static_ltree, last); \
|
||||
s->block_open = 0; \
|
||||
s->block_start = (int)s->strstart; \
|
||||
flush_pending(s->strm); \
|
||||
if (s->strm->avail_out == 0) \
|
||||
return (last) ? finish_started : need_more; \
|
||||
} \
|
||||
}
|
||||
|
||||
Z_INTERNAL block_state deflate_quick(deflate_state *s, int flush) {
|
||||
Pos hash_head;
|
||||
int64_t dist;
|
||||
unsigned match_len, last;
|
||||
|
||||
|
||||
last = (flush == Z_FINISH) ? 1 : 0;
|
||||
if (UNLIKELY(last && s->block_open != 2)) {
|
||||
/* Emit end of previous block */
|
||||
QUICK_END_BLOCK(s, 0);
|
||||
/* Emit start of last block */
|
||||
QUICK_START_BLOCK(s, last);
|
||||
} else if (UNLIKELY(s->block_open == 0 && s->lookahead > 0)) {
|
||||
/* Start new block only when we have lookahead data, so that if no
|
||||
input data is given an empty block will not be written */
|
||||
QUICK_START_BLOCK(s, last);
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
if (UNLIKELY(s->pending + ((BIT_BUF_SIZE + 7) >> 3) >= s->pending_buf_size)) {
|
||||
flush_pending(s->strm);
|
||||
if (s->strm->avail_out == 0) {
|
||||
return (last && s->strm->avail_in == 0) ? finish_started : need_more;
|
||||
}
|
||||
}
|
||||
|
||||
if (UNLIKELY(s->lookahead < MIN_LOOKAHEAD)) {
|
||||
fill_window(s);
|
||||
if (UNLIKELY(s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH)) {
|
||||
return need_more;
|
||||
}
|
||||
if (UNLIKELY(s->lookahead == 0))
|
||||
break;
|
||||
|
||||
if (UNLIKELY(s->block_open == 0)) {
|
||||
/* Start new block when we have lookahead data, so that if no
|
||||
input data is given an empty block will not be written */
|
||||
QUICK_START_BLOCK(s, last);
|
||||
}
|
||||
}
|
||||
|
||||
if (LIKELY(s->lookahead >= MIN_MATCH)) {
|
||||
hash_head = functable.quick_insert_string(s, s->strstart);
|
||||
dist = (int64_t)s->strstart - hash_head;
|
||||
|
||||
if (dist <= MAX_DIST(s) && dist > 0) {
|
||||
match_len = functable.compare258(s->window + s->strstart, s->window + hash_head);
|
||||
|
||||
if (match_len >= MIN_MATCH) {
|
||||
if (UNLIKELY(match_len > s->lookahead))
|
||||
match_len = s->lookahead;
|
||||
|
||||
check_match(s, s->strstart, hash_head, match_len);
|
||||
|
||||
zng_tr_emit_dist(s, static_ltree, static_dtree, match_len - MIN_MATCH, (uint32_t)dist);
|
||||
s->lookahead -= match_len;
|
||||
s->strstart += match_len;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
zng_tr_emit_lit(s, static_ltree, s->window[s->strstart]);
|
||||
s->strstart++;
|
||||
s->lookahead--;
|
||||
}
|
||||
|
||||
s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
|
||||
if (UNLIKELY(last)) {
|
||||
QUICK_END_BLOCK(s, 1);
|
||||
return finish_done;
|
||||
}
|
||||
|
||||
QUICK_END_BLOCK(s, 0);
|
||||
return block_done;
|
||||
}
|
||||
@ -7,26 +7,18 @@
|
||||
#include "zbuild.h"
|
||||
#include "deflate.h"
|
||||
#include "deflate_p.h"
|
||||
#include "match_p.h"
|
||||
#include "functable.h"
|
||||
|
||||
/* ===========================================================================
|
||||
* Local data
|
||||
*/
|
||||
|
||||
#ifndef TOO_FAR
|
||||
# define TOO_FAR 4096
|
||||
#endif
|
||||
/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
|
||||
|
||||
/* ===========================================================================
|
||||
* Same as deflate_medium, but achieves better compression. We use a lazy
|
||||
* evaluation for matches: a match is finally adopted only if there is
|
||||
* no better match at the next window position.
|
||||
*/
|
||||
ZLIB_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
|
||||
IPos hash_head; /* head of hash chain */
|
||||
Z_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
|
||||
Pos hash_head; /* head of hash chain */
|
||||
int bflush; /* set if current block must be flushed */
|
||||
int64_t dist;
|
||||
uint32_t match_len;
|
||||
|
||||
/* Process the input block. */
|
||||
for (;;) {
|
||||
@ -36,57 +28,53 @@ ZLIB_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
|
||||
* string following the next match.
|
||||
*/
|
||||
if (s->lookahead < MIN_LOOKAHEAD) {
|
||||
functable.fill_window(s);
|
||||
if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
|
||||
fill_window(s);
|
||||
if (UNLIKELY(s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH)) {
|
||||
return need_more;
|
||||
}
|
||||
if (s->lookahead == 0)
|
||||
if (UNLIKELY(s->lookahead == 0))
|
||||
break; /* flush the current block */
|
||||
}
|
||||
|
||||
/* Insert the string window[strstart .. strstart+2] in the
|
||||
* dictionary, and set hash_head to the head of the hash chain:
|
||||
*/
|
||||
hash_head = NIL;
|
||||
if (s->lookahead >= MIN_MATCH) {
|
||||
hash_head = functable.insert_string(s, s->strstart, 1);
|
||||
hash_head = 0;
|
||||
if (LIKELY(s->lookahead >= MIN_MATCH)) {
|
||||
hash_head = functable.quick_insert_string(s, s->strstart);
|
||||
}
|
||||
|
||||
/* Find the longest match, discarding those <= prev_length.
|
||||
*/
|
||||
s->prev_length = s->match_length, s->prev_match = s->match_start;
|
||||
s->match_length = MIN_MATCH-1;
|
||||
s->prev_match = (Pos)s->match_start;
|
||||
match_len = MIN_MATCH-1;
|
||||
dist = (int64_t)s->strstart - hash_head;
|
||||
|
||||
if (hash_head != NIL && s->prev_length < s->max_lazy_match && s->strstart - hash_head <= MAX_DIST(s)) {
|
||||
if (dist <= MAX_DIST(s) && dist > 0 && s->prev_length < s->max_lazy_match) {
|
||||
/* To simplify the code, we prevent matches with the string
|
||||
* of window index 0 (in particular we have to avoid a match
|
||||
* of the string with itself at the start of the input file).
|
||||
*/
|
||||
s->match_length = longest_match(s, hash_head);
|
||||
match_len = functable.longest_match(s, hash_head);
|
||||
/* longest_match() sets match_start */
|
||||
|
||||
if (s->match_length <= 5 && (s->strategy == Z_FILTERED
|
||||
#if TOO_FAR <= 32767
|
||||
|| (s->match_length == MIN_MATCH && s->strstart - s->match_start > TOO_FAR)
|
||||
#endif
|
||||
)) {
|
||||
|
||||
if (match_len <= 5 && (s->strategy == Z_FILTERED)) {
|
||||
/* If prev_match is also MIN_MATCH, match_start is garbage
|
||||
* but we will ignore the current match anyway.
|
||||
*/
|
||||
s->match_length = MIN_MATCH-1;
|
||||
match_len = MIN_MATCH-1;
|
||||
}
|
||||
}
|
||||
/* If there was a match at the previous step and the current
|
||||
* match is not better, output the previous match:
|
||||
*/
|
||||
if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) {
|
||||
if (s->prev_length >= MIN_MATCH && match_len <= s->prev_length) {
|
||||
unsigned int max_insert = s->strstart + s->lookahead - MIN_MATCH;
|
||||
/* Do not insert strings in hash table beyond this. */
|
||||
|
||||
check_match(s, s->strstart-1, s->prev_match, s->prev_length);
|
||||
|
||||
_tr_tally_dist(s, s->strstart -1 - s->prev_match, s->prev_length - MIN_MATCH, bflush);
|
||||
bflush = zng_tr_tally_dist(s, s->strstart -1 - s->prev_match, s->prev_length - MIN_MATCH);
|
||||
|
||||
/* Insert in hash table all strings up to the end of the match.
|
||||
* strstart-1 and strstart are already inserted. If there is not
|
||||
@ -95,70 +83,55 @@ ZLIB_INTERNAL block_state deflate_slow(deflate_state *s, int flush) {
|
||||
*/
|
||||
s->lookahead -= s->prev_length-1;
|
||||
|
||||
#ifdef NOT_TWEAK_COMPILER
|
||||
s->prev_length -= 2;
|
||||
do {
|
||||
if (++s->strstart <= max_insert) {
|
||||
functable.insert_string(s, s->strstart, 1);
|
||||
}
|
||||
} while (--s->prev_length != 0);
|
||||
s->match_available = 0;
|
||||
s->match_length = MIN_MATCH-1;
|
||||
s->strstart++;
|
||||
#else
|
||||
{
|
||||
unsigned int mov_fwd = s->prev_length - 2;
|
||||
if (max_insert > s->strstart) {
|
||||
unsigned int insert_cnt = mov_fwd;
|
||||
if (unlikely(insert_cnt > max_insert - s->strstart))
|
||||
insert_cnt = max_insert - s->strstart;
|
||||
unsigned int mov_fwd = s->prev_length - 2;
|
||||
if (max_insert > s->strstart) {
|
||||
unsigned int insert_cnt = mov_fwd;
|
||||
if (UNLIKELY(insert_cnt > max_insert - s->strstart))
|
||||
insert_cnt = max_insert - s->strstart;
|
||||
|
||||
functable.insert_string(s, s->strstart + 1, insert_cnt);
|
||||
}
|
||||
s->prev_length = 0;
|
||||
s->match_available = 0;
|
||||
s->match_length = MIN_MATCH-1;
|
||||
s->strstart += mov_fwd + 1;
|
||||
functable.insert_string(s, s->strstart + 1, insert_cnt);
|
||||
}
|
||||
#endif /*NOT_TWEAK_COMPILER*/
|
||||
s->prev_length = 0;
|
||||
s->match_available = 0;
|
||||
s->strstart += mov_fwd + 1;
|
||||
|
||||
if (bflush) FLUSH_BLOCK(s, 0);
|
||||
if (UNLIKELY(bflush))
|
||||
FLUSH_BLOCK(s, 0);
|
||||
|
||||
} else if (s->match_available) {
|
||||
/* If there was no match at the previous position, output a
|
||||
* single literal. If there was a match but the current match
|
||||
* is longer, truncate the previous match to a single literal.
|
||||
*/
|
||||
Tracevv((stderr, "%c", s->window[s->strstart-1]));
|
||||
_tr_tally_lit(s, s->window[s->strstart-1], bflush);
|
||||
if (bflush) {
|
||||
bflush = zng_tr_tally_lit(s, s->window[s->strstart-1]);
|
||||
if (UNLIKELY(bflush))
|
||||
FLUSH_BLOCK_ONLY(s, 0);
|
||||
}
|
||||
s->prev_length = match_len;
|
||||
s->strstart++;
|
||||
s->lookahead--;
|
||||
if (s->strm->avail_out == 0)
|
||||
if (UNLIKELY(s->strm->avail_out == 0))
|
||||
return need_more;
|
||||
} else {
|
||||
/* There is no previous match to compare with, wait for
|
||||
* the next step to decide.
|
||||
*/
|
||||
s->prev_length = match_len;
|
||||
s->match_available = 1;
|
||||
s->strstart++;
|
||||
s->lookahead--;
|
||||
}
|
||||
}
|
||||
Assert(flush != Z_NO_FLUSH, "no flush?");
|
||||
if (s->match_available) {
|
||||
Tracevv((stderr, "%c", s->window[s->strstart-1]));
|
||||
_tr_tally_lit(s, s->window[s->strstart-1], bflush);
|
||||
if (UNLIKELY(s->match_available)) {
|
||||
(void) zng_tr_tally_lit(s, s->window[s->strstart-1]);
|
||||
s->match_available = 0;
|
||||
}
|
||||
s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
|
||||
if (flush == Z_FINISH) {
|
||||
if (UNLIKELY(flush == Z_FINISH)) {
|
||||
FLUSH_BLOCK(s, 1);
|
||||
return finish_done;
|
||||
}
|
||||
if (s->sym_next)
|
||||
if (UNLIKELY(s->sym_next))
|
||||
FLUSH_BLOCK(s, 0);
|
||||
return block_done;
|
||||
}
|
||||
|
||||
44
libs/zlibng/fallback_builtins.h
Normal file
44
libs/zlibng/fallback_builtins.h
Normal file
@ -0,0 +1,44 @@
|
||||
#ifndef X86_BUILTIN_CTZ_H
|
||||
#define X86_BUILTIN_CTZ_H
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
#if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined(_M_ARM) || defined(_M_ARM64)
|
||||
|
||||
#include <intrin.h>
|
||||
#ifdef X86_FEATURES
|
||||
# include "arch/x86/x86.h"
|
||||
#endif
|
||||
|
||||
/* This is not a general purpose replacement for __builtin_ctz. The function expects that value is != 0
|
||||
* Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward is not checked
|
||||
*/
|
||||
static __forceinline unsigned long __builtin_ctz(uint32_t value) {
|
||||
#ifdef X86_FEATURES
|
||||
if (x86_cpu_has_tzcnt)
|
||||
return _tzcnt_u32(value);
|
||||
#endif
|
||||
unsigned long trailing_zero;
|
||||
_BitScanForward(&trailing_zero, value);
|
||||
return trailing_zero;
|
||||
}
|
||||
#define HAVE_BUILTIN_CTZ
|
||||
|
||||
#ifdef _M_AMD64
|
||||
/* This is not a general purpose replacement for __builtin_ctzll. The function expects that value is != 0
|
||||
* Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward64 is not checked
|
||||
*/
|
||||
static __forceinline unsigned long long __builtin_ctzll(uint64_t value) {
|
||||
#ifdef X86_FEATURES
|
||||
if (x86_cpu_has_tzcnt)
|
||||
return _tzcnt_u64(value);
|
||||
#endif
|
||||
unsigned long trailing_zero;
|
||||
_BitScanForward64(&trailing_zero, value);
|
||||
return trailing_zero;
|
||||
}
|
||||
#define HAVE_BUILTIN_CTZLL
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
@ -4,40 +4,95 @@
|
||||
*/
|
||||
|
||||
#include "zbuild.h"
|
||||
#include "functable.h"
|
||||
#include "zendian.h"
|
||||
#include "deflate.h"
|
||||
#include "deflate_p.h"
|
||||
|
||||
#include "gzendian.h"
|
||||
#include "functable.h"
|
||||
|
||||
/* insert_string */
|
||||
#ifdef X86_SSE4_2_CRC_HASH
|
||||
extern Pos insert_string_sse(deflate_state *const s, const Pos str, unsigned int count);
|
||||
#elif defined(ARM_ACLE_CRC_HASH)
|
||||
extern Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count);
|
||||
#ifdef X86_FEATURES
|
||||
# include "fallback_builtins.h"
|
||||
#endif
|
||||
|
||||
/* fill_window */
|
||||
/* insert_string */
|
||||
extern void insert_string_c(deflate_state *const s, const uint32_t str, uint32_t count);
|
||||
#ifdef X86_SSE42_CRC_HASH
|
||||
extern void insert_string_sse4(deflate_state *const s, const uint32_t str, uint32_t count);
|
||||
#elif defined(ARM_ACLE_CRC_HASH)
|
||||
extern void insert_string_acle(deflate_state *const s, const uint32_t str, uint32_t count);
|
||||
#endif
|
||||
|
||||
/* quick_insert_string */
|
||||
extern Pos quick_insert_string_c(deflate_state *const s, const uint32_t str);
|
||||
#ifdef X86_SSE42_CRC_HASH
|
||||
extern Pos quick_insert_string_sse4(deflate_state *const s, const uint32_t str);
|
||||
#elif defined(ARM_ACLE_CRC_HASH)
|
||||
extern Pos quick_insert_string_acle(deflate_state *const s, const uint32_t str);
|
||||
#endif
|
||||
|
||||
/* slide_hash */
|
||||
#ifdef X86_SSE2
|
||||
extern void fill_window_sse(deflate_state *s);
|
||||
#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
|
||||
extern void fill_window_arm(deflate_state *s);
|
||||
void slide_hash_sse2(deflate_state *s);
|
||||
#elif defined(ARM_NEON_SLIDEHASH)
|
||||
void slide_hash_neon(deflate_state *s);
|
||||
#elif defined(POWER8_VSX_SLIDEHASH)
|
||||
void slide_hash_power8(deflate_state *s);
|
||||
#endif
|
||||
#ifdef X86_AVX2
|
||||
void slide_hash_avx2(deflate_state *s);
|
||||
#endif
|
||||
|
||||
/* adler32 */
|
||||
extern uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len);
|
||||
#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(ARM_NEON_ADLER32)
|
||||
#ifdef ARM_NEON_ADLER32
|
||||
extern uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len);
|
||||
#endif
|
||||
|
||||
ZLIB_INTERNAL uint32_t crc32_generic(uint32_t, const unsigned char *, uint64_t);
|
||||
|
||||
#ifdef DYNAMIC_CRC_TABLE
|
||||
extern volatile int crc_table_empty;
|
||||
extern void make_crc_table(void);
|
||||
#ifdef X86_SSSE3_ADLER32
|
||||
extern uint32_t adler32_ssse3(uint32_t adler, const unsigned char *buf, size_t len);
|
||||
#endif
|
||||
#ifdef X86_AVX2_ADLER32
|
||||
extern uint32_t adler32_avx2(uint32_t adler, const unsigned char *buf, size_t len);
|
||||
#endif
|
||||
#ifdef POWER8_VSX_ADLER32
|
||||
extern uint32_t adler32_power8(uint32_t adler, const unsigned char* buf, size_t len);
|
||||
#endif
|
||||
|
||||
#ifdef __ARM_FEATURE_CRC32
|
||||
/* memory chunking */
|
||||
extern uint32_t chunksize_c(void);
|
||||
extern uint8_t* chunkcopy_c(uint8_t *out, uint8_t const *from, unsigned len);
|
||||
extern uint8_t* chunkcopy_safe_c(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
|
||||
extern uint8_t* chunkunroll_c(uint8_t *out, unsigned *dist, unsigned *len);
|
||||
extern uint8_t* chunkmemset_c(uint8_t *out, unsigned dist, unsigned len);
|
||||
extern uint8_t* chunkmemset_safe_c(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#ifdef X86_SSE2_CHUNKSET
|
||||
extern uint32_t chunksize_sse2(void);
|
||||
extern uint8_t* chunkcopy_sse2(uint8_t *out, uint8_t const *from, unsigned len);
|
||||
extern uint8_t* chunkcopy_safe_sse2(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
|
||||
extern uint8_t* chunkunroll_sse2(uint8_t *out, unsigned *dist, unsigned *len);
|
||||
extern uint8_t* chunkmemset_sse2(uint8_t *out, unsigned dist, unsigned len);
|
||||
extern uint8_t* chunkmemset_safe_sse2(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#endif
|
||||
#ifdef X86_AVX_CHUNKSET
|
||||
extern uint32_t chunksize_avx(void);
|
||||
extern uint8_t* chunkcopy_avx(uint8_t *out, uint8_t const *from, unsigned len);
|
||||
extern uint8_t* chunkcopy_safe_avx(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
|
||||
extern uint8_t* chunkunroll_avx(uint8_t *out, unsigned *dist, unsigned *len);
|
||||
extern uint8_t* chunkmemset_avx(uint8_t *out, unsigned dist, unsigned len);
|
||||
extern uint8_t* chunkmemset_safe_avx(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#endif
|
||||
#ifdef ARM_NEON_CHUNKSET
|
||||
extern uint32_t chunksize_neon(void);
|
||||
extern uint8_t* chunkcopy_neon(uint8_t *out, uint8_t const *from, unsigned len);
|
||||
extern uint8_t* chunkcopy_safe_neon(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
|
||||
extern uint8_t* chunkunroll_neon(uint8_t *out, unsigned *dist, unsigned *len);
|
||||
extern uint8_t* chunkmemset_neon(uint8_t *out, unsigned dist, unsigned len);
|
||||
extern uint8_t* chunkmemset_safe_neon(uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
#endif
|
||||
|
||||
/* CRC32 */
|
||||
Z_INTERNAL uint32_t crc32_generic(uint32_t, const unsigned char *, uint64_t);
|
||||
|
||||
#ifdef ARM_ACLE_CRC_HASH
|
||||
extern uint32_t crc32_acle(uint32_t, const unsigned char *, uint64_t);
|
||||
#endif
|
||||
|
||||
@ -47,87 +102,365 @@ extern uint32_t crc32_little(uint32_t, const unsigned char *, uint64_t);
|
||||
extern uint32_t crc32_big(uint32_t, const unsigned char *, uint64_t);
|
||||
#endif
|
||||
|
||||
/* stub definitions */
|
||||
ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsigned int count);
|
||||
ZLIB_INTERNAL void fill_window_stub(deflate_state *s);
|
||||
ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len);
|
||||
ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len);
|
||||
/* compare258 */
|
||||
extern uint32_t compare258_c(const unsigned char *src0, const unsigned char *src1);
|
||||
#ifdef UNALIGNED_OK
|
||||
extern uint32_t compare258_unaligned_16(const unsigned char *src0, const unsigned char *src1);
|
||||
extern uint32_t compare258_unaligned_32(const unsigned char *src0, const unsigned char *src1);
|
||||
#ifdef UNALIGNED64_OK
|
||||
extern uint32_t compare258_unaligned_64(const unsigned char *src0, const unsigned char *src1);
|
||||
#endif
|
||||
#ifdef X86_SSE42_CMP_STR
|
||||
extern uint32_t compare258_unaligned_sse4(const unsigned char *src0, const unsigned char *src1);
|
||||
#endif
|
||||
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
|
||||
extern uint32_t compare258_unaligned_avx2(const unsigned char *src0, const unsigned char *src1);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* functable init */
|
||||
ZLIB_INTERNAL __thread struct functable_s functable = {fill_window_stub,insert_string_stub,adler32_stub,crc32_stub};
|
||||
/* longest_match */
|
||||
extern uint32_t longest_match_c(deflate_state *const s, Pos cur_match);
|
||||
#ifdef UNALIGNED_OK
|
||||
extern uint32_t longest_match_unaligned_16(deflate_state *const s, Pos cur_match);
|
||||
extern uint32_t longest_match_unaligned_32(deflate_state *const s, Pos cur_match);
|
||||
#ifdef UNALIGNED64_OK
|
||||
extern uint32_t longest_match_unaligned_64(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#ifdef X86_SSE42_CMP_STR
|
||||
extern uint32_t longest_match_unaligned_sse4(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
|
||||
extern uint32_t longest_match_unaligned_avx2(deflate_state *const s, Pos cur_match);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
Z_INTERNAL Z_TLS struct functable_s functable;
|
||||
|
||||
Z_INTERNAL void cpu_check_features(void)
|
||||
{
|
||||
static int features_checked = 0;
|
||||
if (features_checked)
|
||||
return;
|
||||
#if defined(X86_FEATURES)
|
||||
x86_check_features();
|
||||
#elif defined(ARM_FEATURES)
|
||||
arm_check_features();
|
||||
#elif defined(POWER_FEATURES)
|
||||
power_check_features();
|
||||
#endif
|
||||
features_checked = 1;
|
||||
}
|
||||
|
||||
/* stub functions */
|
||||
ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsigned int count) {
|
||||
Z_INTERNAL void insert_string_stub(deflate_state *const s, const uint32_t str, uint32_t count) {
|
||||
// Initialize default
|
||||
functable.insert_string=&insert_string_c;
|
||||
|
||||
#ifdef X86_SSE4_2_CRC_HASH
|
||||
functable.insert_string = &insert_string_c;
|
||||
cpu_check_features();
|
||||
|
||||
#ifdef X86_SSE42_CRC_HASH
|
||||
if (x86_cpu_has_sse42)
|
||||
functable.insert_string=&insert_string_sse;
|
||||
#elif defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH)
|
||||
functable.insert_string = &insert_string_sse4;
|
||||
#elif defined(ARM_ACLE_CRC_HASH)
|
||||
if (arm_cpu_has_crc32)
|
||||
functable.insert_string=&insert_string_acle;
|
||||
#endif
|
||||
functable.insert_string = &insert_string_acle;
|
||||
#endif
|
||||
|
||||
return functable.insert_string(s, str, count);
|
||||
functable.insert_string(s, str, count);
|
||||
}
|
||||
|
||||
ZLIB_INTERNAL void fill_window_stub(deflate_state *s) {
|
||||
// Initialize default
|
||||
functable.fill_window=&fill_window_c;
|
||||
Z_INTERNAL Pos quick_insert_string_stub(deflate_state *const s, const uint32_t str) {
|
||||
functable.quick_insert_string = &quick_insert_string_c;
|
||||
|
||||
#ifdef X86_SSE2
|
||||
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
|
||||
#ifdef X86_SSE42_CRC_HASH
|
||||
if (x86_cpu_has_sse42)
|
||||
functable.quick_insert_string = &quick_insert_string_sse4;
|
||||
#elif defined(ARM_ACLE_CRC_HASH)
|
||||
if (arm_cpu_has_crc32)
|
||||
functable.quick_insert_string = &quick_insert_string_acle;
|
||||
#endif
|
||||
|
||||
return functable.quick_insert_string(s, str);
|
||||
}
|
||||
|
||||
Z_INTERNAL void slide_hash_stub(deflate_state *s) {
|
||||
|
||||
functable.slide_hash = &slide_hash_c;
|
||||
cpu_check_features();
|
||||
|
||||
#ifdef X86_SSE2
|
||||
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
|
||||
if (x86_cpu_has_sse2)
|
||||
# endif
|
||||
functable.fill_window=&fill_window_sse;
|
||||
#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)
|
||||
functable.fill_window=&fill_window_arm;
|
||||
#endif
|
||||
# endif
|
||||
functable.slide_hash = &slide_hash_sse2;
|
||||
#elif defined(ARM_NEON_SLIDEHASH)
|
||||
# ifndef ARM_NOCHECK_NEON
|
||||
if (arm_cpu_has_neon)
|
||||
# endif
|
||||
functable.slide_hash = &slide_hash_neon;
|
||||
#endif
|
||||
#ifdef X86_AVX2
|
||||
if (x86_cpu_has_avx2)
|
||||
functable.slide_hash = &slide_hash_avx2;
|
||||
#endif
|
||||
#ifdef POWER8_VSX_SLIDEHASH
|
||||
if (power_cpu_has_arch_2_07)
|
||||
functable.slide_hash = &slide_hash_power8;
|
||||
#endif
|
||||
|
||||
functable.fill_window(s);
|
||||
functable.slide_hash(s);
|
||||
}
|
||||
|
||||
ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len) {
|
||||
Z_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len) {
|
||||
// Initialize default
|
||||
functable.adler32=&adler32_c;
|
||||
functable.adler32 = &adler32_c;
|
||||
cpu_check_features();
|
||||
|
||||
#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(ARM_NEON_ADLER32)
|
||||
#ifdef ARM_NEON_ADLER32
|
||||
# ifndef ARM_NOCHECK_NEON
|
||||
if (arm_cpu_has_neon)
|
||||
functable.adler32=&adler32_neon;
|
||||
#endif
|
||||
# endif
|
||||
functable.adler32 = &adler32_neon;
|
||||
#endif
|
||||
#ifdef X86_SSSE3_ADLER32
|
||||
if (x86_cpu_has_ssse3)
|
||||
functable.adler32 = &adler32_ssse3;
|
||||
#endif
|
||||
#ifdef X86_AVX2_ADLER32
|
||||
if (x86_cpu_has_avx2)
|
||||
functable.adler32 = &adler32_avx2;
|
||||
#endif
|
||||
#ifdef POWER8_VSX_ADLER32
|
||||
if (power_cpu_has_arch_2_07)
|
||||
functable.adler32 = &adler32_power8;
|
||||
#endif
|
||||
|
||||
return functable.adler32(adler, buf, len);
|
||||
}
|
||||
|
||||
ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len) {
|
||||
Z_INTERNAL uint32_t chunksize_stub(void) {
|
||||
// Initialize default
|
||||
functable.chunksize = &chunksize_c;
|
||||
|
||||
#ifdef X86_SSE2_CHUNKSET
|
||||
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
|
||||
if (x86_cpu_has_sse2)
|
||||
# endif
|
||||
functable.chunksize = &chunksize_sse2;
|
||||
#endif
|
||||
#ifdef X86_AVX_CHUNKSET
|
||||
if (x86_cpu_has_avx2)
|
||||
functable.chunksize = &chunksize_avx;
|
||||
#endif
|
||||
#ifdef ARM_NEON_CHUNKSET
|
||||
if (arm_cpu_has_neon)
|
||||
functable.chunksize = &chunksize_neon;
|
||||
#endif
|
||||
|
||||
Assert(sizeof(uint64_t) >= sizeof(size_t),
|
||||
"crc32_z takes size_t but internally we have a uint64_t len");
|
||||
/* return a function pointer for optimized arches here after a capability test */
|
||||
return functable.chunksize();
|
||||
}
|
||||
|
||||
#ifdef DYNAMIC_CRC_TABLE
|
||||
if (crc_table_empty)
|
||||
make_crc_table();
|
||||
#endif /* DYNAMIC_CRC_TABLE */
|
||||
Z_INTERNAL uint8_t* chunkcopy_stub(uint8_t *out, uint8_t const *from, unsigned len) {
|
||||
// Initialize default
|
||||
functable.chunkcopy = &chunkcopy_c;
|
||||
|
||||
if (sizeof(void *) == sizeof(ptrdiff_t)) {
|
||||
#ifdef X86_SSE2_CHUNKSET
|
||||
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
|
||||
if (x86_cpu_has_sse2)
|
||||
# endif
|
||||
functable.chunkcopy = &chunkcopy_sse2;
|
||||
#endif
|
||||
#ifdef X86_AVX_CHUNKSET
|
||||
if (x86_cpu_has_avx2)
|
||||
functable.chunkcopy = &chunkcopy_avx;
|
||||
#endif
|
||||
#ifdef ARM_NEON_CHUNKSET
|
||||
if (arm_cpu_has_neon)
|
||||
functable.chunkcopy = &chunkcopy_neon;
|
||||
#endif
|
||||
|
||||
return functable.chunkcopy(out, from, len);
|
||||
}
|
||||
|
||||
Z_INTERNAL uint8_t* chunkcopy_safe_stub(uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe) {
|
||||
// Initialize default
|
||||
functable.chunkcopy_safe = &chunkcopy_safe_c;
|
||||
|
||||
#ifdef X86_SSE2_CHUNKSET
|
||||
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
|
||||
if (x86_cpu_has_sse2)
|
||||
# endif
|
||||
functable.chunkcopy_safe = &chunkcopy_safe_sse2;
|
||||
#endif
|
||||
#ifdef X86_AVX_CHUNKSET
|
||||
if (x86_cpu_has_avx2)
|
||||
functable.chunkcopy_safe = &chunkcopy_safe_avx;
|
||||
#endif
|
||||
#ifdef ARM_NEON_CHUNKSET
|
||||
if (arm_cpu_has_neon)
|
||||
functable.chunkcopy_safe = &chunkcopy_safe_neon;
|
||||
#endif
|
||||
|
||||
return functable.chunkcopy_safe(out, from, len, safe);
|
||||
}
|
||||
|
||||
Z_INTERNAL uint8_t* chunkunroll_stub(uint8_t *out, unsigned *dist, unsigned *len) {
|
||||
// Initialize default
|
||||
functable.chunkunroll = &chunkunroll_c;
|
||||
|
||||
#ifdef X86_SSE2_CHUNKSET
|
||||
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
|
||||
if (x86_cpu_has_sse2)
|
||||
# endif
|
||||
functable.chunkunroll = &chunkunroll_sse2;
|
||||
#endif
|
||||
#ifdef X86_AVX_CHUNKSET
|
||||
if (x86_cpu_has_avx2)
|
||||
functable.chunkunroll = &chunkunroll_avx;
|
||||
#endif
|
||||
#ifdef ARM_NEON_CHUNKSET
|
||||
if (arm_cpu_has_neon)
|
||||
functable.chunkunroll = &chunkunroll_neon;
|
||||
#endif
|
||||
|
||||
return functable.chunkunroll(out, dist, len);
|
||||
}
|
||||
|
||||
Z_INTERNAL uint8_t* chunkmemset_stub(uint8_t *out, unsigned dist, unsigned len) {
|
||||
// Initialize default
|
||||
functable.chunkmemset = &chunkmemset_c;
|
||||
|
||||
#ifdef X86_SSE2_CHUNKSET
|
||||
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
|
||||
if (x86_cpu_has_sse2)
|
||||
# endif
|
||||
functable.chunkmemset = &chunkmemset_sse2;
|
||||
#endif
|
||||
#ifdef X86_AVX_CHUNKSET
|
||||
if (x86_cpu_has_avx2)
|
||||
functable.chunkmemset = &chunkmemset_avx;
|
||||
#endif
|
||||
#ifdef ARM_NEON_CHUNKSET
|
||||
if (arm_cpu_has_neon)
|
||||
functable.chunkmemset = &chunkmemset_neon;
|
||||
#endif
|
||||
|
||||
return functable.chunkmemset(out, dist, len);
|
||||
}
|
||||
|
||||
Z_INTERNAL uint8_t* chunkmemset_safe_stub(uint8_t *out, unsigned dist, unsigned len, unsigned left) {
|
||||
// Initialize default
|
||||
functable.chunkmemset_safe = &chunkmemset_safe_c;
|
||||
|
||||
#ifdef X86_SSE2_CHUNKSET
|
||||
# if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
|
||||
if (x86_cpu_has_sse2)
|
||||
# endif
|
||||
functable.chunkmemset_safe = &chunkmemset_safe_sse2;
|
||||
#endif
|
||||
#ifdef X86_AVX_CHUNKSET
|
||||
if (x86_cpu_has_avx2)
|
||||
functable.chunkmemset_safe = &chunkmemset_safe_avx;
|
||||
#endif
|
||||
#ifdef ARM_NEON_CHUNKSET
|
||||
if (arm_cpu_has_neon)
|
||||
functable.chunkmemset_safe = &chunkmemset_safe_neon;
|
||||
#endif
|
||||
|
||||
return functable.chunkmemset_safe(out, dist, len, left);
|
||||
}
|
||||
|
||||
Z_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len) {
|
||||
int32_t use_byfour = sizeof(void *) == sizeof(ptrdiff_t);
|
||||
|
||||
Assert(sizeof(uint64_t) >= sizeof(size_t),
|
||||
"crc32_z takes size_t but internally we have a uint64_t len");
|
||||
/* return a function pointer for optimized arches here after a capability test */
|
||||
|
||||
cpu_check_features();
|
||||
|
||||
if (use_byfour) {
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
functable.crc32=crc32_little;
|
||||
# if defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH)
|
||||
if (arm_cpu_has_crc32)
|
||||
functable.crc32=crc32_acle;
|
||||
functable.crc32 = crc32_little;
|
||||
# if defined(ARM_ACLE_CRC_HASH)
|
||||
if (arm_cpu_has_crc32)
|
||||
functable.crc32 = crc32_acle;
|
||||
# endif
|
||||
#elif BYTE_ORDER == BIG_ENDIAN
|
||||
functable.crc32=crc32_big;
|
||||
functable.crc32 = crc32_big;
|
||||
#else
|
||||
# error No endian defined
|
||||
#endif
|
||||
} else {
|
||||
functable.crc32=crc32_generic;
|
||||
functable.crc32 = crc32_generic;
|
||||
}
|
||||
|
||||
return functable.crc32(crc, buf, len);
|
||||
}
|
||||
|
||||
Z_INTERNAL uint32_t compare258_stub(const unsigned char *src0, const unsigned char *src1) {
|
||||
|
||||
functable.compare258 = &compare258_c;
|
||||
|
||||
#ifdef UNALIGNED_OK
|
||||
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
||||
functable.compare258 = &compare258_unaligned_64;
|
||||
# elif defined(HAVE_BUILTIN_CTZ)
|
||||
functable.compare258 = &compare258_unaligned_32;
|
||||
# else
|
||||
functable.compare258 = &compare258_unaligned_16;
|
||||
# endif
|
||||
# ifdef X86_SSE42_CMP_STR
|
||||
if (x86_cpu_has_sse42)
|
||||
functable.compare258 = &compare258_unaligned_sse4;
|
||||
# endif
|
||||
# if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
|
||||
if (x86_cpu_has_avx2)
|
||||
functable.compare258 = &compare258_unaligned_avx2;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
return functable.compare258(src0, src1);
|
||||
}
|
||||
|
||||
Z_INTERNAL uint32_t longest_match_stub(deflate_state *const s, Pos cur_match) {
|
||||
|
||||
functable.longest_match = &longest_match_c;
|
||||
|
||||
#ifdef UNALIGNED_OK
|
||||
# if defined(UNALIGNED64_OK) && defined(HAVE_BUILTIN_CTZLL)
|
||||
functable.longest_match = &longest_match_unaligned_64;
|
||||
# elif defined(HAVE_BUILTIN_CTZ)
|
||||
functable.longest_match = &longest_match_unaligned_32;
|
||||
# else
|
||||
functable.longest_match = &longest_match_unaligned_16;
|
||||
# endif
|
||||
# ifdef X86_SSE42_CMP_STR
|
||||
if (x86_cpu_has_sse42)
|
||||
functable.longest_match = &longest_match_unaligned_sse4;
|
||||
# endif
|
||||
# if defined(X86_AVX2) && defined(HAVE_BUILTIN_CTZ)
|
||||
if (x86_cpu_has_avx2)
|
||||
functable.longest_match = &longest_match_unaligned_avx2;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
return functable.longest_match(s, cur_match);
|
||||
}
|
||||
|
||||
/* functable init */
|
||||
Z_INTERNAL Z_TLS struct functable_s functable = {
|
||||
insert_string_stub,
|
||||
quick_insert_string_stub,
|
||||
adler32_stub,
|
||||
crc32_stub,
|
||||
slide_hash_stub,
|
||||
compare258_stub,
|
||||
longest_match_stub,
|
||||
chunksize_stub,
|
||||
chunkcopy_stub,
|
||||
chunkcopy_safe_stub,
|
||||
chunkunroll_stub,
|
||||
chunkmemset_stub,
|
||||
chunkmemset_safe_stub
|
||||
};
|
||||
|
||||
@ -9,13 +9,21 @@
|
||||
#include "deflate.h"
|
||||
|
||||
struct functable_s {
|
||||
void (* fill_window) (deflate_state *s);
|
||||
Pos (* insert_string) (deflate_state *const s, const Pos str, unsigned int count);
|
||||
uint32_t (* adler32) (uint32_t adler, const unsigned char *buf, size_t len);
|
||||
uint32_t (* crc32) (uint32_t crc, const unsigned char *buf, uint64_t len);
|
||||
void (* insert_string) (deflate_state *const s, const uint32_t str, uint32_t count);
|
||||
Pos (* quick_insert_string)(deflate_state *const s, const uint32_t str);
|
||||
uint32_t (* adler32) (uint32_t adler, const unsigned char *buf, size_t len);
|
||||
uint32_t (* crc32) (uint32_t crc, const unsigned char *buf, uint64_t len);
|
||||
void (* slide_hash) (deflate_state *s);
|
||||
uint32_t (* compare258) (const unsigned char *src0, const unsigned char *src1);
|
||||
uint32_t (* longest_match) (deflate_state *const s, Pos cur_match);
|
||||
uint32_t (* chunksize) (void);
|
||||
uint8_t* (* chunkcopy) (uint8_t *out, uint8_t const *from, unsigned len);
|
||||
uint8_t* (* chunkcopy_safe) (uint8_t *out, uint8_t const *from, unsigned len, uint8_t *safe);
|
||||
uint8_t* (* chunkunroll) (uint8_t *out, unsigned *dist, unsigned *len);
|
||||
uint8_t* (* chunkmemset) (uint8_t *out, unsigned dist, unsigned len);
|
||||
uint8_t* (* chunkmemset_safe) (uint8_t *out, unsigned dist, unsigned len, unsigned left);
|
||||
};
|
||||
|
||||
ZLIB_INTERNAL extern __thread struct functable_s functable;
|
||||
|
||||
Z_INTERNAL extern Z_TLS struct functable_s functable;
|
||||
|
||||
#endif
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user