eqemu-server/libs/zlibng/arch/power/slide_hash_power8.c
Alex 2957f5084d
[Library] Update zlibng (#1255)
* Update zlibng

* Set cmake path more directly in zlibng to hopefully fix an issue with the build on drone

* I'm dumb, missing / in path

* Mackal helps with a dumb gitignore issue

* Adding all the files, not sure what's ignoring them and im tired of looking

* Some tweaks to zlibng build to hopefully get it to build properly. works on msvc now
2021-02-23 19:00:26 -06:00

61 lines
1.8 KiB
C

/* Optimized slide_hash for POWER processors
* Copyright (C) 2019-2020 IBM Corporation
* Author: Matheus Castanho <msc@linux.ibm.com>
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifdef POWER8_VSX_SLIDEHASH
#include <altivec.h>
#include "zbuild.h"
#include "deflate.h"
static inline void slide_hash_power8_loop(deflate_state *s, unsigned n_elems, Pos *table_end) {
vector unsigned short vw, vm, *vp;
unsigned chunks;
/* Each vector register (chunk) corresponds to 128 bits == 8 Posf,
* so instead of processing each of the n_elems in the hash table
* individually, we can do it in chunks of 8 with vector instructions.
*
* This function is only called from slide_hash_power8(), and both calls
* pass n_elems as a power of 2 higher than 2^7, as defined by
* deflateInit2_(), so n_elems will always be a multiple of 8. */
chunks = n_elems >> 3;
Assert(n_elems % 8 == 0, "Weird hash table size!");
/* This type casting is safe since s->w_size is always <= 64KB
* as defined by deflateInit2_() and Posf == unsigned short */
vw[0] = (Pos) s->w_size;
vw = vec_splat(vw,0);
vp = (vector unsigned short *) table_end;
do {
/* Processing 8 elements at a time */
vp--;
vm = *vp;
/* This is equivalent to: m >= w_size ? m - w_size : 0
* Since we are using a saturated unsigned subtraction, any
* values that are > w_size will be set to 0, while the others
* will be subtracted by w_size. */
*vp = vec_subs(vm,vw);
} while (--chunks);
}
void Z_INTERNAL slide_hash_power8(deflate_state *s) {
unsigned int n;
Pos *p;
n = HASH_SIZE;
p = &s->head[n];
slide_hash_power8_loop(s,n,p);
n = s->w_size;
p = &s->prev[n];
slide_hash_power8_loop(s,n,p);
}
#endif /* POWER8_VSX_SLIDEHASH */