[Library] Update zlibng (#1255)

* Update zlibng

* Set cmake path more directly in zlibng to hopefully fix an issue with the build on drone

* I'm dumb, missing / in path

* Mackal helps with a dumb gitignore issue

* Adding all the files, not sure what's ignoring them and im tired of looking

* Some tweaks to zlibng build to hopefully get it to build properly. works on msvc now
This commit is contained in:
Alex
2021-02-23 17:00:26 -08:00
committed by GitHub
parent e6dee96266
commit 2957f5084d
184 changed files with 22029 additions and 11703 deletions
+25 -11
View File
@@ -6,19 +6,27 @@ CC=
CFLAGS=
SFLAGS=
INCLUDES=
ACLEFLAG=
NEONFLAG=
SUFFIX=
SRCDIR=.
SRCTOP=../..
TOPDIR=$(SRCTOP)
all: adler32_neon.o adler32_neon.lo armfeature.o armfeature.lo crc32_acle.o crc32_acle.lo fill_window_arm.o fill_window_arm.lo insert_string_acle.o insert_string_acle.lo
all: \
adler32_neon.o adler32_neon.lo \
armfeature.o armfeature.lo \
chunkset_neon.o chunkset_neon.lo \
crc32_acle.o crc32_acle.lo \
slide_neon.o slide_neon.lo \
insert_string_acle.o insert_string_acle.lo
adler32_neon.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
$(CC) $(CFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
adler32_neon.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
$(CC) $(SFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c
armfeature.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c
@@ -26,23 +34,29 @@ armfeature.o:
armfeature.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c
chunkset_neon.o:
$(CC) $(CFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_neon.c
chunkset_neon.lo:
$(CC) $(SFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/chunkset_neon.c
crc32_acle.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
$(CC) $(CFLAGS) $(ACLEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
crc32_acle.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
$(CC) $(SFLAGS) $(ACLEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c
fill_window_arm.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_arm.c
slide_neon.o:
$(CC) $(CFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_neon.c
fill_window_arm.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_arm.c
slide_neon.lo:
$(CC) $(SFLAGS) $(NEONFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/slide_neon.c
insert_string_acle.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
$(CC) $(CFLAGS) $(ACLEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
insert_string_acle.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
$(CC) $(SFLAGS) $(ACLEFLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c
mostlyclean: clean
clean:
+10 -18
View File
@@ -2,24 +2,16 @@
* Copyright (C) 2017 ARM Holdings Inc.
* Author: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "adler32_neon.h"
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#include <arm_neon.h>
#include "adler32_p.h"
#ifdef ARM_NEON_ADLER32
#ifdef _M_ARM64
# include <arm64_neon.h>
#else
# include <arm_neon.h>
#endif
#include "../../zutil.h"
#include "../../adler32_p.h"
static void NEON_accum32(uint32_t *s, const unsigned char *buf, size_t len) {
static const uint8_t taps[32] = {
@@ -109,7 +101,7 @@ uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len) {
for (i = 0; i < len; i += n) {
if ((i + n) > len)
n = len - i;
n = (int)(len - i);
if (n < 16)
break;
-29
View File
@@ -1,29 +0,0 @@
/* Copyright (C) 1995-2011, 2016 Mark Adler
* Copyright (C) 2017 ARM Holdings Inc.
* Author: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
#ifndef __ADLER32_NEON__
#define __ADLER32_NEON__
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
// Depending on the compiler flavor, size_t may be defined in one or the other header. See:
// http://stackoverflow.com/questions/26410466/gcc-linaro-compiler-throws-error-unknown-type-name-size-t
#include <stdint.h>
#include <stddef.h>
uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len);
#endif
#endif
+1 -1
View File
@@ -8,6 +8,6 @@
extern int arm_cpu_has_neon;
extern int arm_cpu_has_crc32;
void ZLIB_INTERNAL arm_check_features(void);
void Z_INTERNAL arm_check_features(void);
#endif /* ARM_H_ */
+49 -30
View File
@@ -1,50 +1,69 @@
#include "zutil.h"
#include "../../zutil.h"
#if defined(__linux__)
# include <sys/auxv.h>
# include <asm/hwcap.h>
# include <sys/auxv.h>
# include <asm/hwcap.h>
#elif defined(__FreeBSD__) && defined(__aarch64__)
# include <machine/armreg.h>
# ifndef ID_AA64ISAR0_CRC32_VAL
# define ID_AA64ISAR0_CRC32_VAL ID_AA64ISAR0_CRC32
# endif
#elif defined(__APPLE__)
# include <sys/sysctl.h>
#elif defined(_WIN32)
# include <winapifamily.h>
# include <winapifamily.h>
#endif
static int arm_has_crc32() {
#if defined(__linux__) && defined(HWCAP2_CRC32)
return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0 ? 1 : 0;
return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0 ? 1 : 0;
#elif defined(__FreeBSD__) && defined(__aarch64__)
return getenv("QEMU_EMULATING") == NULL
&& ID_AA64ISAR0_CRC32_VAL(READ_SPECIALREG(id_aa64isar0_el1)) >= ID_AA64ISAR0_CRC32_BASE;
#elif defined(__APPLE__)
int hascrc32;
size_t size = sizeof(hascrc32);
return sysctlbyname("hw.optional.armv8_crc32", &hascrc32, &size, NULL, 0) == 0
&& hascrc32 == 1;
#elif defined(ARM_NOCHECK_ACLE)
return 1;
return 1;
#else
return 0;
return 0;
#endif
}
/* AArch64 has neon. */
#if !defined(__aarch64__)
static inline int arm_has_neon()
{
#if defined(__linux__) && defined(HWCAP_NEON)
#if !defined(__aarch64__) && !defined(_M_ARM64)
static inline int arm_has_neon() {
#if defined(__linux__) && defined(HWCAP_NEON)
return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0 ? 1 : 0;
#elif defined(_M_ARM) && defined(WINAPI_FAMILY_PARTITION)
#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP)
#elif defined(__APPLE__)
int hasneon;
size_t size = sizeof(hasneon);
return sysctlbyname("hw.optional.neon", &hasneon, &size, NULL, 0) == 0
&& hasneon == 1;
#elif defined(_M_ARM) && defined(WINAPI_FAMILY_PARTITION)
# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP)
return 1; /* Always supported */
#endif
#endif
# endif
#endif
#if defined(ARM_NOCHECK_NEON)
#if defined(ARM_NOCHECK_NEON)
return 1;
#else
return 0;
#endif
}
#endif
ZLIB_INTERNAL int arm_cpu_has_neon;
ZLIB_INTERNAL int arm_cpu_has_crc32;
void ZLIB_INTERNAL arm_check_features(void) {
#if defined(__aarch64__)
arm_cpu_has_neon = 1; /* always available */
#else
arm_cpu_has_neon = arm_has_neon();
return 0;
#endif
arm_cpu_has_crc32 = arm_has_crc32();
}
#endif
Z_INTERNAL int arm_cpu_has_neon;
Z_INTERNAL int arm_cpu_has_crc32;
void Z_INTERNAL arm_check_features(void) {
#if defined(__aarch64__) || defined(_M_ARM64)
arm_cpu_has_neon = 1; /* always available */
#else
arm_cpu_has_neon = arm_has_neon();
#endif
arm_cpu_has_crc32 = arm_has_crc32();
}
+54
View File
@@ -0,0 +1,54 @@
/* chunkset_neon.c -- NEON inline functions to copy small data chunks.
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifdef ARM_NEON_CHUNKSET
#ifdef _M_ARM64
# include <arm64_neon.h>
#else
# include <arm_neon.h>
#endif
#include "../../zbuild.h"
#include "../../zutil.h"
typedef uint8x16_t chunk_t;
#define HAVE_CHUNKMEMSET_1
#define HAVE_CHUNKMEMSET_2
#define HAVE_CHUNKMEMSET_4
#define HAVE_CHUNKMEMSET_8
static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) {
*chunk = vld1q_dup_u8(from);
}
static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) {
*chunk = vreinterpretq_u8_s16(vdupq_n_s16(*(int16_t *)from));
}
static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) {
*chunk = vreinterpretq_u8_s32(vdupq_n_s32(*(int32_t *)from));
}
static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) {
*chunk = vcombine_u8(vld1_u8(from), vld1_u8(from));
}
#define CHUNKSIZE chunksize_neon
#define CHUNKCOPY chunkcopy_neon
#define CHUNKCOPY_SAFE chunkcopy_safe_neon
#define CHUNKUNROLL chunkunroll_neon
#define CHUNKMEMSET chunkmemset_neon
#define CHUNKMEMSET_SAFE chunkmemset_safe_neon
static inline void loadchunk(uint8_t const *s, chunk_t *chunk) {
*chunk = vld1q_u8(s);
}
static inline void storechunk(uint8_t *out, chunk_t *chunk) {
vst1q_u8(out, *chunk);
}
#include "chunkset_tpl.h"
#endif
+14 -19
View File
@@ -5,21 +5,16 @@
*
*/
#ifdef __ARM_FEATURE_CRC32
# include <arm_acle.h>
# ifdef ZLIB_COMPAT
# include <zconf.h>
# else
# include <zconf-ng.h>
# endif
# ifdef __linux__
# include <stddef.h>
# endif
#ifdef ARM_ACLE_CRC_HASH
#ifndef _MSC_VER
# include <arm_acle.h>
#endif
#include "../../zutil.h"
uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
register uint32_t c;
register const uint16_t *buf2;
register const uint32_t *buf4;
Z_REGISTER uint32_t c;
Z_REGISTER const uint16_t *buf2;
Z_REGISTER const uint32_t *buf4;
c = ~crc;
if (len && ((ptrdiff_t)buf & 1)) {
@@ -36,7 +31,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
buf4 = (const uint32_t *) buf;
}
# if defined(__aarch64__)
#if defined(__aarch64__)
if ((len > sizeof(uint32_t)) && ((ptrdiff_t)buf & sizeof(uint32_t))) {
c = __crc32w(c, *buf4++);
len -= sizeof(uint32_t);
@@ -44,7 +39,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
const uint64_t *buf8 = (const uint64_t *) buf4;
# ifdef UNROLL_MORE
#ifdef UNROLL_MORE
while (len >= 4 * sizeof(uint64_t)) {
c = __crc32d(c, *buf8++);
c = __crc32d(c, *buf8++);
@@ -52,7 +47,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
c = __crc32d(c, *buf8++);
len -= 4 * sizeof(uint64_t);
}
# endif
#endif
while (len >= sizeof(uint64_t)) {
c = __crc32d(c, *buf8++);
@@ -74,7 +69,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
}
buf = (const unsigned char *) buf2;
# else /* __aarch64__ */
#else /* __aarch64__ */
# ifdef UNROLL_MORE
while (len >= 8 * sizeof(uint32_t)) {
@@ -103,7 +98,7 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
} else {
buf = (const unsigned char *) buf4;
}
# endif /* __aarch64__ */
#endif /* __aarch64__ */
if (len) {
c = __crc32b(c, *buf);
@@ -112,4 +107,4 @@ uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
c = ~c;
return c;
}
#endif /* __ARM_FEATURE_CRC32 */
#endif
+1 -1
View File
@@ -5,7 +5,7 @@
#if defined(_MSC_VER) && !defined(__clang__)
static __forceinline unsigned long __builtin_ctzl(unsigned long value) {
return _arm_clz(_arm_rbit(value));
return _arm_clz(_arm_rbit(value));
}
#endif
-169
View File
@@ -1,169 +0,0 @@
/* fill_window_arm.c -- Optimized hash table shifting for ARM with support for NEON instructions
* Copyright (C) 2017 Mika T. Lindqvist
*
* Authors:
* Mika T. Lindqvist <postmaster@raasu.org>
* Jun He <jun.he@arm.com>
*
* For conditions of distribution and use, see copyright notice in zlib.h
*/
/* @(#) $Id$ */
#include "zbuild.h"
#include "deflate.h"
#include "deflate_p.h"
#include "functable.h"
extern ZLIB_INTERNAL int read_buf(PREFIX3(stream) *strm, unsigned char *buf, unsigned size);
#if defined(__ARM_NEON__) || defined(__ARM_NEON)
#include <arm_neon.h>
/* SIMD version of hash_chain rebase */
static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
register uint16x8_t v, *p;
register size_t n;
size_t size = entries*sizeof(table[0]);
Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
Assert(sizeof(Pos) == 2, "Wrong Pos size");
v = vdupq_n_u16(window_size);
p = (uint16x8_t *)table;
n = size / (sizeof(uint16x8_t) * 8);
do {
p[0] = vqsubq_u16(p[0], v);
p[1] = vqsubq_u16(p[1], v);
p[2] = vqsubq_u16(p[2], v);
p[3] = vqsubq_u16(p[3], v);
p[4] = vqsubq_u16(p[4], v);
p[5] = vqsubq_u16(p[5], v);
p[6] = vqsubq_u16(p[6], v);
p[7] = vqsubq_u16(p[7], v);
p += 8;
} while (--n);
}
#else
/* generic version for hash rebase */
static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
unsigned int i;
for (i = 0; i < entries; i++) {
table[i] = (table[i] >= window_size) ? (table[i] - window_size) : NIL;
}
}
#endif
void fill_window_arm(deflate_state *s) {
register unsigned n;
unsigned long more; /* Amount of free space at the end of the window. */
unsigned int wsize = s->w_size;
Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
do {
more = s->window_size - s->lookahead - s->strstart;
/* If the window is almost full and there is insufficient lookahead,
* move the upper half to the lower one to make room in the upper half.
*/
if (s->strstart >= wsize+MAX_DIST(s)) {
memcpy(s->window, s->window+wsize, wsize);
s->match_start -= wsize;
s->strstart -= wsize; /* we now have strstart >= MAX_DIST */
s->block_start -= wsize;
/* Slide the hash table (could be avoided with 32 bit values
at the expense of memory usage). We slide even when level == 0
to keep the hash table consistent if we switch back to level > 0
later. (Using level 0 permanently is not an optimal usage of
zlib, so we don't care about this pathological case.)
*/
slide_hash_chain(s->head, s->hash_size, wsize);
slide_hash_chain(s->prev, wsize, wsize);
more += wsize;
}
if (s->strm->avail_in == 0)
break;
/* If there was no sliding:
* strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
* more == window_size - lookahead - strstart
* => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
* => more >= window_size - 2*WSIZE + 2
* In the BIG_MEM or MMAP case (not yet supported),
* window_size == input_size + MIN_LOOKAHEAD &&
* strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
* Otherwise, window_size == 2*WSIZE so more >= 2.
* If there was sliding, more >= WSIZE. So in all cases, more >= 2.
*/
Assert(more >= 2, "more < 2");
n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
s->lookahead += n;
/* Initialize the hash value now that we have some input: */
if (s->lookahead + s->insert >= MIN_MATCH) {
unsigned int str = s->strstart - s->insert;
unsigned int insert_cnt = s->insert;
unsigned int slen;
s->ins_h = s->window[str];
if (unlikely(s->lookahead < MIN_MATCH))
insert_cnt += s->lookahead - MIN_MATCH;
slen = insert_cnt;
if (str >= (MIN_MATCH - 2))
{
str += 2 - MIN_MATCH;
insert_cnt += MIN_MATCH - 2;
}
if (insert_cnt > 0)
{
functable.insert_string(s, str, insert_cnt);
s->insert -= slen;
}
}
/* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
* but this is not important since only literal bytes will be emitted.
*/
} while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
/* If the WIN_INIT bytes after the end of the current data have never been
* written, then zero those bytes in order to avoid memory check reports of
* the use of uninitialized (or uninitialised as Julian writes) bytes by
* the longest match routines. Update the high water mark for the next
* time through here. WIN_INIT is set to MAX_MATCH since the longest match
* routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
*/
if (s->high_water < s->window_size) {
unsigned long curr = s->strstart + (unsigned long)s->lookahead;
unsigned long init;
if (s->high_water < curr) {
/* Previous high water mark below current data -- zero WIN_INIT
* bytes or up to end of window, whichever is less.
*/
init = s->window_size - curr;
if (init > WIN_INIT)
init = WIN_INIT;
memset(s->window + curr, 0, init);
s->high_water = curr + init;
} else if (s->high_water < curr + WIN_INIT) {
/* High water mark at or above current data, but below current data
* plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
* to end of window, whichever is less.
*/
init = curr + WIN_INIT;
if (init > s->window_size)
init = s->window_size;
init -= s->high_water;
memset(s->window + s->high_water, 0, init);
s->high_water += init;
}
}
Assert((unsigned long)s->strstart <= s->window_size - MIN_LOOKAHEAD, "not enough room for search");
}
+14 -45
View File
@@ -5,49 +5,18 @@
*
*/
#if defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH)
#include <arm_acle.h>
#include "zbuild.h"
#include "deflate.h"
/* ===========================================================================
* Insert string str in the dictionary and set match_head to the previous head
* of the hash chain (the most recent string with same hash key). Return
* the previous length of the hash chain.
* IN assertion: all calls to to INSERT_STRING are made with consecutive
* input characters and the first MIN_MATCH bytes of str are valid
* (except for the last MIN_MATCH-1 bytes of the input file).
*/
Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count) {
Pos p, lp, ret;
if (unlikely(count == 0)) {
return s->prev[str & s->w_mask];
}
ret = 0;
lp = str + count - 1; /* last position */
for (p = str; p <= lp; p++) {
uint32_t val, h, hm;
memcpy(&val, &s->window[p], sizeof(val));
if (s->level >= TRIGGER_LEVEL)
val &= 0xFFFFFF;
h = __crc32w(0, val);
hm = h & s->hash_mask;
Pos head = s->head[hm];
if (head != p) {
s->prev[p & s->w_mask] = head;
s->head[hm] = p;
if (p == lp)
ret = head;
} else if (p == lp) {
ret = p;
}
}
return ret;
}
#ifdef ARM_ACLE_CRC_HASH
#ifndef _MSC_VER
# include <arm_acle.h>
#endif
#include "../../zbuild.h"
#include "../../deflate.h"
#define UPDATE_HASH(s, h, val) \
h = __crc32w(0, val)
#define INSERT_STRING insert_string_acle
#define QUICK_INSERT_STRING quick_insert_string_acle
#include "../../insert_string_tpl.h"
#endif
+52
View File
@@ -0,0 +1,52 @@
/* slide_neon.c -- Optimized hash table shifting for ARM with support for NEON instructions
* Copyright (C) 2017-2020 Mika T. Lindqvist
*
* Authors:
* Mika T. Lindqvist <postmaster@raasu.org>
* Jun He <jun.he@arm.com>
*
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#if defined(ARM_NEON_SLIDEHASH)
#ifdef _M_ARM64
# include <arm64_neon.h>
#else
# include <arm_neon.h>
#endif
#include "../../zbuild.h"
#include "../../deflate.h"
/* SIMD version of hash_chain rebase */
static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
Z_REGISTER uint16x8_t v, *p;
Z_REGISTER size_t n;
size_t size = entries*sizeof(table[0]);
Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
Assert(sizeof(Pos) == 2, "Wrong Pos size");
v = vdupq_n_u16(window_size);
p = (uint16x8_t *)table;
n = size / (sizeof(uint16x8_t) * 8);
do {
p[0] = vqsubq_u16(p[0], v);
p[1] = vqsubq_u16(p[1], v);
p[2] = vqsubq_u16(p[2], v);
p[3] = vqsubq_u16(p[3], v);
p[4] = vqsubq_u16(p[4], v);
p[5] = vqsubq_u16(p[5], v);
p[6] = vqsubq_u16(p[6], v);
p[7] = vqsubq_u16(p[7], v);
p += 8;
} while (--n);
}
Z_INTERNAL void slide_hash_neon(deflate_state *s) {
unsigned int wsize = s->w_size;
slide_hash_chain(s->head, HASH_SIZE, wsize);
slide_hash_chain(s->prev, wsize, wsize);
}
#endif