Merge fix for master

This commit is contained in:
KimLS 2019-07-05 21:07:56 -07:00
commit f85644a09c
19 changed files with 3386 additions and 55 deletions

2
.gitignore vendored
View File

@ -24,8 +24,6 @@ Makefile
cmake_install.cmake
install_manifest.txt
[Bb]uild*/
x64/
x86/
log/
logs/
vcpkg/

View File

@ -1,5 +1,43 @@
EQEMu Changelog (Started on Sept 24, 2003 15:50)
-------------------------------------------------------
== 7/3/2019 ==
Akkadius/KLS:
- Optimizations to packet updates introduced back into the source post network code overhaul
- Optimizations made to position update packets by sending updates far less frequently when not in line with zone:max_movement_update_range
- Optimizations made to position updates in respect to the much higher resolution of navmesh path-finding that we were using. We have cut down
on the resolution of path finding / updating so that we reduce the CPU overhead of path-finding and subsequent client update packets that
get generated this action
- Optimization made by adjusting ZLIB compression rate that was accidentally set to a compression level of 4 a long time ago
- Added #netstats admin command to troubleshoot connection issues in detail
- Websocket server is now available in zone and is bound to the same UDP port that the zoneserver listens on
- Currently implemented websocket API calls at the zone level
get_packet_statistics
get_opcode_list
get_npc_list_detail
get_door_list_detail
get_corpse_list_detail
get_object_list_detail
get_mob_list_detail
get_client_list_detail
get_zone_attributes
get_logsys_categories
set_logging_level
- Example of a Typescript client: https://gist.github.com/Akkadius/52d12d0379f36cf81c51b3b7da13db37
- Library Changes
- We now use git submodules for libraries / dependencies versus manually downloading to the dependencies folder and/or storing
said dependencies within our codebase itself
- To update dependencies (Required for compiling)
- git submodule init
- git submodule update
- Libraries now in submodules
- [glm] https://github.com/g-truc/glm.git
- [flm] https://github.com/fmtlib/fmt.git
- [libuv] https://github.com/libuv/libuv.git
- [cereal] https://github.com/USCiLab/cereal.git
- [websocketpp] https://github.com/zaphoyd/websocketpp.git
- [recastnavigation] https://github.com/EQEmu/recastnavigation.git
== 6/24/2019 ==
Uleat: Reworked BotDatabase into a functional add-on for ZoneDatabase
- Eliminated the database connection associated with class BotDatabase

View File

@ -31,7 +31,7 @@
*/
#define CURRENT_BINARY_DATABASE_VERSION 9139
#define CURRENT_BINARY_DATABASE_VERSION 9140
#ifdef BOTS
#define CURRENT_BINARY_BOTS_DATABASE_VERSION 9024

View File

@ -0,0 +1,3 @@
fill_window_sse.c SSE2 optimized fill_window
deflate_quick.c SSE4 optimized deflate strategy for use as level 1
crc_folding.c SSE4 + PCLMULQDQ optimized CRC folding implementation

View File

@ -0,0 +1,58 @@
# Makefile for zlib
# Copyright (C) 1995-2013 Jean-loup Gailly, Mark Adler
# For conditions of distribution and use, see copyright notice in zlib.h
CC=
CFLAGS=
SFLAGS=
INCLUDES=
SUFFIX=
SSE2FLAG=-msse2
SSE4FLAG=-msse4
PCLMULFLAG=-mpclmul
SRCDIR=.
SRCTOP=../..
TOPDIR=$(SRCTOP)
all: x86.o x86.lo fill_window_sse.o fill_window_sse.lo deflate_quick.o deflate_quick.lo insert_string_sse.o insert_string_sse.lo crc_folding.o crc_folding.lo
x86.o:
$(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
x86.lo:
$(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/x86.c
fill_window_sse.o:
$(CC) $(CFLAGS) $(SSE2FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_sse.c
fill_window_sse.lo:
$(CC) $(SFLAGS) $(SSE2FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/fill_window_sse.c
deflate_quick.o:
$(CC) $(CFLAGS) $(SSE4FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/deflate_quick.c
deflate_quick.lo:
$(CC) $(SFLAGS) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/deflate_quick.c
insert_string_sse.o:
$(CC) $(CFLAGS) $(SSE4FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse.c
insert_string_sse.lo:
$(CC) $(SFLAGS) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_sse.c
crc_folding.o:
$(CC) $(CFLAGS) $(PCLMULFLAG) $(SSE4FLAG) $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c
crc_folding.lo:
$(CC) $(SFLAGS) $(PCLMULFLAG) $(SSE4FLAG) -DPIC $(INCLUDES) -c -o $@ $(SRCDIR)/crc_folding.c
mostlyclean: clean
clean:
rm -f *.o *.lo *~
rm -rf objs
rm -f *.gcda *.gcno *.gcov
distclean:
rm -f Makefile

View File

@ -0,0 +1,450 @@
/*
* Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ
* instruction.
*
* A white paper describing this algorithm can be found at:
* http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
*
* Copyright (C) 2013 Intel Corporation. All rights reserved.
* Authors:
* Wajdi Feghali <wajdi.k.feghali@intel.com>
* Jim Guilford <james.guilford@intel.com>
* Vinodh Gopal <vinodh.gopal@intel.com>
* Erdinc Ozturk <erdinc.ozturk@intel.com>
* Jim Kukunas <james.t.kukunas@linux.intel.com>
*
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifdef X86_PCLMULQDQ_CRC
#include "zbuild.h"
#include <inttypes.h>
#include <immintrin.h>
#include <wmmintrin.h>
#include "crc_folding.h"
ZLIB_INTERNAL void crc_fold_init(deflate_state *const s) {
/* CRC_SAVE */
_mm_storeu_si128((__m128i *)s->crc0 + 0, _mm_cvtsi32_si128(0x9db42487));
_mm_storeu_si128((__m128i *)s->crc0 + 1, _mm_setzero_si128());
_mm_storeu_si128((__m128i *)s->crc0 + 2, _mm_setzero_si128());
_mm_storeu_si128((__m128i *)s->crc0 + 3, _mm_setzero_si128());
s->strm->adler = 0;
}
static void fold_1(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) {
const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4,
0x00000001, 0xc6e41596);
__m128i x_tmp3;
__m128 ps_crc0, ps_crc3, ps_res;
x_tmp3 = *xmm_crc3;
*xmm_crc3 = *xmm_crc0;
*xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01);
*xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x10);
ps_crc0 = _mm_castsi128_ps(*xmm_crc0);
ps_crc3 = _mm_castsi128_ps(*xmm_crc3);
ps_res = _mm_xor_ps(ps_crc0, ps_crc3);
*xmm_crc0 = *xmm_crc1;
*xmm_crc1 = *xmm_crc2;
*xmm_crc2 = x_tmp3;
*xmm_crc3 = _mm_castps_si128(ps_res);
}
static void fold_2(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) {
const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4,
0x00000001, 0xc6e41596);
__m128i x_tmp3, x_tmp2;
__m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3, ps_res31, ps_res20;
x_tmp3 = *xmm_crc3;
x_tmp2 = *xmm_crc2;
*xmm_crc3 = *xmm_crc1;
*xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x01);
*xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x10);
ps_crc3 = _mm_castsi128_ps(*xmm_crc3);
ps_crc1 = _mm_castsi128_ps(*xmm_crc1);
ps_res31 = _mm_xor_ps(ps_crc3, ps_crc1);
*xmm_crc2 = *xmm_crc0;
*xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01);
*xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x10);
ps_crc0 = _mm_castsi128_ps(*xmm_crc0);
ps_crc2 = _mm_castsi128_ps(*xmm_crc2);
ps_res20 = _mm_xor_ps(ps_crc0, ps_crc2);
*xmm_crc0 = x_tmp2;
*xmm_crc1 = x_tmp3;
*xmm_crc2 = _mm_castps_si128(ps_res20);
*xmm_crc3 = _mm_castps_si128(ps_res31);
}
static void fold_3(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) {
const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4,
0x00000001, 0xc6e41596);
__m128i x_tmp3;
__m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3, ps_res32, ps_res21, ps_res10;
x_tmp3 = *xmm_crc3;
*xmm_crc3 = *xmm_crc2;
*xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x01);
*xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x10);
ps_crc2 = _mm_castsi128_ps(*xmm_crc2);
ps_crc3 = _mm_castsi128_ps(*xmm_crc3);
ps_res32 = _mm_xor_ps(ps_crc2, ps_crc3);
*xmm_crc2 = *xmm_crc1;
*xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x01);
*xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x10);
ps_crc1 = _mm_castsi128_ps(*xmm_crc1);
ps_crc2 = _mm_castsi128_ps(*xmm_crc2);
ps_res21 = _mm_xor_ps(ps_crc1, ps_crc2);
*xmm_crc1 = *xmm_crc0;
*xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01);
*xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x10);
ps_crc0 = _mm_castsi128_ps(*xmm_crc0);
ps_crc1 = _mm_castsi128_ps(*xmm_crc1);
ps_res10 = _mm_xor_ps(ps_crc0, ps_crc1);
*xmm_crc0 = x_tmp3;
*xmm_crc1 = _mm_castps_si128(ps_res10);
*xmm_crc2 = _mm_castps_si128(ps_res21);
*xmm_crc3 = _mm_castps_si128(ps_res32);
}
static void fold_4(__m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2, __m128i *xmm_crc3) {
const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4,
0x00000001, 0xc6e41596);
__m128i x_tmp0, x_tmp1, x_tmp2, x_tmp3;
__m128 ps_crc0, ps_crc1, ps_crc2, ps_crc3;
__m128 ps_t0, ps_t1, ps_t2, ps_t3;
__m128 ps_res0, ps_res1, ps_res2, ps_res3;
x_tmp0 = *xmm_crc0;
x_tmp1 = *xmm_crc1;
x_tmp2 = *xmm_crc2;
x_tmp3 = *xmm_crc3;
*xmm_crc0 = _mm_clmulepi64_si128(*xmm_crc0, xmm_fold4, 0x01);
x_tmp0 = _mm_clmulepi64_si128(x_tmp0, xmm_fold4, 0x10);
ps_crc0 = _mm_castsi128_ps(*xmm_crc0);
ps_t0 = _mm_castsi128_ps(x_tmp0);
ps_res0 = _mm_xor_ps(ps_crc0, ps_t0);
*xmm_crc1 = _mm_clmulepi64_si128(*xmm_crc1, xmm_fold4, 0x01);
x_tmp1 = _mm_clmulepi64_si128(x_tmp1, xmm_fold4, 0x10);
ps_crc1 = _mm_castsi128_ps(*xmm_crc1);
ps_t1 = _mm_castsi128_ps(x_tmp1);
ps_res1 = _mm_xor_ps(ps_crc1, ps_t1);
*xmm_crc2 = _mm_clmulepi64_si128(*xmm_crc2, xmm_fold4, 0x01);
x_tmp2 = _mm_clmulepi64_si128(x_tmp2, xmm_fold4, 0x10);
ps_crc2 = _mm_castsi128_ps(*xmm_crc2);
ps_t2 = _mm_castsi128_ps(x_tmp2);
ps_res2 = _mm_xor_ps(ps_crc2, ps_t2);
*xmm_crc3 = _mm_clmulepi64_si128(*xmm_crc3, xmm_fold4, 0x01);
x_tmp3 = _mm_clmulepi64_si128(x_tmp3, xmm_fold4, 0x10);
ps_crc3 = _mm_castsi128_ps(*xmm_crc3);
ps_t3 = _mm_castsi128_ps(x_tmp3);
ps_res3 = _mm_xor_ps(ps_crc3, ps_t3);
*xmm_crc0 = _mm_castps_si128(ps_res0);
*xmm_crc1 = _mm_castps_si128(ps_res1);
*xmm_crc2 = _mm_castps_si128(ps_res2);
*xmm_crc3 = _mm_castps_si128(ps_res3);
}
static const unsigned ALIGNED_(32) pshufb_shf_table[60] = {
0x84838281, 0x88878685, 0x8c8b8a89, 0x008f8e8d, /* shl 15 (16 - 1)/shr1 */
0x85848382, 0x89888786, 0x8d8c8b8a, 0x01008f8e, /* shl 14 (16 - 3)/shr2 */
0x86858483, 0x8a898887, 0x8e8d8c8b, 0x0201008f, /* shl 13 (16 - 4)/shr3 */
0x87868584, 0x8b8a8988, 0x8f8e8d8c, 0x03020100, /* shl 12 (16 - 4)/shr4 */
0x88878685, 0x8c8b8a89, 0x008f8e8d, 0x04030201, /* shl 11 (16 - 5)/shr5 */
0x89888786, 0x8d8c8b8a, 0x01008f8e, 0x05040302, /* shl 10 (16 - 6)/shr6 */
0x8a898887, 0x8e8d8c8b, 0x0201008f, 0x06050403, /* shl 9 (16 - 7)/shr7 */
0x8b8a8988, 0x8f8e8d8c, 0x03020100, 0x07060504, /* shl 8 (16 - 8)/shr8 */
0x8c8b8a89, 0x008f8e8d, 0x04030201, 0x08070605, /* shl 7 (16 - 9)/shr9 */
0x8d8c8b8a, 0x01008f8e, 0x05040302, 0x09080706, /* shl 6 (16 -10)/shr10*/
0x8e8d8c8b, 0x0201008f, 0x06050403, 0x0a090807, /* shl 5 (16 -11)/shr11*/
0x8f8e8d8c, 0x03020100, 0x07060504, 0x0b0a0908, /* shl 4 (16 -12)/shr12*/
0x008f8e8d, 0x04030201, 0x08070605, 0x0c0b0a09, /* shl 3 (16 -13)/shr13*/
0x01008f8e, 0x05040302, 0x09080706, 0x0d0c0b0a, /* shl 2 (16 -14)/shr14*/
0x0201008f, 0x06050403, 0x0a090807, 0x0e0d0c0b /* shl 1 (16 -15)/shr15*/
};
static void partial_fold(const size_t len, __m128i *xmm_crc0, __m128i *xmm_crc1, __m128i *xmm_crc2,
__m128i *xmm_crc3, __m128i *xmm_crc_part) {
const __m128i xmm_fold4 = _mm_set_epi32( 0x00000001, 0x54442bd4,
0x00000001, 0xc6e41596);
const __m128i xmm_mask3 = _mm_set1_epi32(0x80808080);
__m128i xmm_shl, xmm_shr, xmm_tmp1, xmm_tmp2, xmm_tmp3;
__m128i xmm_a0_0, xmm_a0_1;
__m128 ps_crc3, psa0_0, psa0_1, ps_res;
xmm_shl = _mm_load_si128((__m128i *)pshufb_shf_table + (len - 1));
xmm_shr = xmm_shl;
xmm_shr = _mm_xor_si128(xmm_shr, xmm_mask3);
xmm_a0_0 = _mm_shuffle_epi8(*xmm_crc0, xmm_shl);
*xmm_crc0 = _mm_shuffle_epi8(*xmm_crc0, xmm_shr);
xmm_tmp1 = _mm_shuffle_epi8(*xmm_crc1, xmm_shl);
*xmm_crc0 = _mm_or_si128(*xmm_crc0, xmm_tmp1);
*xmm_crc1 = _mm_shuffle_epi8(*xmm_crc1, xmm_shr);
xmm_tmp2 = _mm_shuffle_epi8(*xmm_crc2, xmm_shl);
*xmm_crc1 = _mm_or_si128(*xmm_crc1, xmm_tmp2);
*xmm_crc2 = _mm_shuffle_epi8(*xmm_crc2, xmm_shr);
xmm_tmp3 = _mm_shuffle_epi8(*xmm_crc3, xmm_shl);
*xmm_crc2 = _mm_or_si128(*xmm_crc2, xmm_tmp3);
*xmm_crc3 = _mm_shuffle_epi8(*xmm_crc3, xmm_shr);
*xmm_crc_part = _mm_shuffle_epi8(*xmm_crc_part, xmm_shl);
*xmm_crc3 = _mm_or_si128(*xmm_crc3, *xmm_crc_part);
xmm_a0_1 = _mm_clmulepi64_si128(xmm_a0_0, xmm_fold4, 0x10);
xmm_a0_0 = _mm_clmulepi64_si128(xmm_a0_0, xmm_fold4, 0x01);
ps_crc3 = _mm_castsi128_ps(*xmm_crc3);
psa0_0 = _mm_castsi128_ps(xmm_a0_0);
psa0_1 = _mm_castsi128_ps(xmm_a0_1);
ps_res = _mm_xor_ps(ps_crc3, psa0_0);
ps_res = _mm_xor_ps(ps_res, psa0_1);
*xmm_crc3 = _mm_castps_si128(ps_res);
}
ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s, unsigned char *dst, const unsigned char *src, long len) {
unsigned long algn_diff;
__m128i xmm_t0, xmm_t1, xmm_t2, xmm_t3;
/* CRC_LOAD */
__m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0);
__m128i xmm_crc1 = _mm_loadu_si128((__m128i *)s->crc0 + 1);
__m128i xmm_crc2 = _mm_loadu_si128((__m128i *)s->crc0 + 2);
__m128i xmm_crc3 = _mm_loadu_si128((__m128i *)s->crc0 + 3);
__m128i xmm_crc_part;
if (len < 16) {
if (len == 0)
return;
xmm_crc_part = _mm_loadu_si128((__m128i *)src);
goto partial;
}
algn_diff = (0 - (uintptr_t)src) & 0xF;
if (algn_diff) {
xmm_crc_part = _mm_loadu_si128((__m128i *)src);
_mm_storeu_si128((__m128i *)dst, xmm_crc_part);
dst += algn_diff;
src += algn_diff;
len -= algn_diff;
partial_fold(algn_diff, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part);
}
while ((len -= 64) >= 0) {
/* CRC_LOAD */
xmm_t0 = _mm_load_si128((__m128i *)src);
xmm_t1 = _mm_load_si128((__m128i *)src + 1);
xmm_t2 = _mm_load_si128((__m128i *)src + 2);
xmm_t3 = _mm_load_si128((__m128i *)src + 3);
fold_4(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
/* CRC_SAVE */
_mm_storeu_si128((__m128i *)dst, xmm_t0);
_mm_storeu_si128((__m128i *)dst + 1, xmm_t1);
_mm_storeu_si128((__m128i *)dst + 2, xmm_t2);
_mm_storeu_si128((__m128i *)dst + 3, xmm_t3);
xmm_crc0 = _mm_xor_si128(xmm_crc0, xmm_t0);
xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_t1);
xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t2);
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t3);
src += 64;
dst += 64;
}
/*
* len = num bytes left - 64
*/
if (len + 16 >= 0) {
len += 16;
xmm_t0 = _mm_load_si128((__m128i *)src);
xmm_t1 = _mm_load_si128((__m128i *)src + 1);
xmm_t2 = _mm_load_si128((__m128i *)src + 2);
fold_3(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
_mm_storeu_si128((__m128i *)dst, xmm_t0);
_mm_storeu_si128((__m128i *)dst + 1, xmm_t1);
_mm_storeu_si128((__m128i *)dst + 2, xmm_t2);
xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_t0);
xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t1);
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t2);
if (len == 0)
goto done;
dst += 48;
xmm_crc_part = _mm_load_si128((__m128i *)src + 3);
} else if (len + 32 >= 0) {
len += 32;
xmm_t0 = _mm_load_si128((__m128i *)src);
xmm_t1 = _mm_load_si128((__m128i *)src + 1);
fold_2(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
_mm_storeu_si128((__m128i *)dst, xmm_t0);
_mm_storeu_si128((__m128i *)dst + 1, xmm_t1);
xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_t0);
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t1);
if (len == 0)
goto done;
dst += 32;
xmm_crc_part = _mm_load_si128((__m128i *)src + 2);
} else if (len + 48 >= 0) {
len += 48;
xmm_t0 = _mm_load_si128((__m128i *)src);
fold_1(&xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3);
_mm_storeu_si128((__m128i *)dst, xmm_t0);
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_t0);
if (len == 0)
goto done;
dst += 16;
xmm_crc_part = _mm_load_si128((__m128i *)src + 1);
} else {
len += 64;
if (len == 0)
goto done;
xmm_crc_part = _mm_load_si128((__m128i *)src);
}
partial:
_mm_storeu_si128((__m128i *)dst, xmm_crc_part);
partial_fold(len, &xmm_crc0, &xmm_crc1, &xmm_crc2, &xmm_crc3, &xmm_crc_part);
done:
/* CRC_SAVE */
_mm_storeu_si128((__m128i *)s->crc0 + 0, xmm_crc0);
_mm_storeu_si128((__m128i *)s->crc0 + 1, xmm_crc1);
_mm_storeu_si128((__m128i *)s->crc0 + 2, xmm_crc2);
_mm_storeu_si128((__m128i *)s->crc0 + 3, xmm_crc3);
_mm_storeu_si128((__m128i *)s->crc0 + 4, xmm_crc_part);
}
static const unsigned ALIGNED_(16) crc_k[] = {
0xccaa009e, 0x00000000, /* rk1 */
0x751997d0, 0x00000001, /* rk2 */
0xccaa009e, 0x00000000, /* rk5 */
0x63cd6124, 0x00000001, /* rk6 */
0xf7011640, 0x00000001, /* rk7 */
0xdb710640, 0x00000001 /* rk8 */
};
static const unsigned ALIGNED_(16) crc_mask[4] = {
0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000
};
static const unsigned ALIGNED_(16) crc_mask2[4] = {
0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
};
uint32_t ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s) {
const __m128i xmm_mask = _mm_load_si128((__m128i *)crc_mask);
const __m128i xmm_mask2 = _mm_load_si128((__m128i *)crc_mask2);
uint32_t crc;
__m128i x_tmp0, x_tmp1, x_tmp2, crc_fold;
/* CRC_LOAD */
__m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0);
__m128i xmm_crc1 = _mm_loadu_si128((__m128i *)s->crc0 + 1);
__m128i xmm_crc2 = _mm_loadu_si128((__m128i *)s->crc0 + 2);
__m128i xmm_crc3 = _mm_loadu_si128((__m128i *)s->crc0 + 3);
/*
* k1
*/
crc_fold = _mm_load_si128((__m128i *)crc_k);
x_tmp0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x10);
xmm_crc0 = _mm_clmulepi64_si128(xmm_crc0, crc_fold, 0x01);
xmm_crc1 = _mm_xor_si128(xmm_crc1, x_tmp0);
xmm_crc1 = _mm_xor_si128(xmm_crc1, xmm_crc0);
x_tmp1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x10);
xmm_crc1 = _mm_clmulepi64_si128(xmm_crc1, crc_fold, 0x01);
xmm_crc2 = _mm_xor_si128(xmm_crc2, x_tmp1);
xmm_crc2 = _mm_xor_si128(xmm_crc2, xmm_crc1);
x_tmp2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x10);
xmm_crc2 = _mm_clmulepi64_si128(xmm_crc2, crc_fold, 0x01);
xmm_crc3 = _mm_xor_si128(xmm_crc3, x_tmp2);
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2);
/*
* k5
*/
crc_fold = _mm_load_si128((__m128i *)crc_k + 1);
xmm_crc0 = xmm_crc3;
xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0);
xmm_crc0 = _mm_srli_si128(xmm_crc0, 8);
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0);
xmm_crc0 = xmm_crc3;
xmm_crc3 = _mm_slli_si128(xmm_crc3, 4);
xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10);
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc0);
xmm_crc3 = _mm_and_si128(xmm_crc3, xmm_mask2);
/*
* k7
*/
xmm_crc1 = xmm_crc3;
xmm_crc2 = xmm_crc3;
crc_fold = _mm_load_si128((__m128i *)crc_k + 2);
xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0);
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2);
xmm_crc3 = _mm_and_si128(xmm_crc3, xmm_mask);
xmm_crc2 = xmm_crc3;
xmm_crc3 = _mm_clmulepi64_si128(xmm_crc3, crc_fold, 0x10);
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc2);
xmm_crc3 = _mm_xor_si128(xmm_crc3, xmm_crc1);
crc = _mm_extract_epi32(xmm_crc3, 2);
return ~crc;
}
#endif

View File

@ -0,0 +1,19 @@
/* crc_folding.h
*
* Compute the CRC32 using a parallelized folding approach with the PCLMULQDQ
* instruction.
*
* Copyright (C) 2013 Intel Corporation Jim Kukunas
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifndef CRC_FOLDING_H_
#define CRC_FOLDING_H_
#include "deflate.h"
ZLIB_INTERNAL void crc_fold_init(deflate_state *const);
ZLIB_INTERNAL uint32_t crc_fold_512to32(deflate_state *const);
ZLIB_INTERNAL void crc_fold_copy(deflate_state *const, unsigned char *, const unsigned char *, long);
#endif

View File

@ -0,0 +1,25 @@
#ifndef X86_CTZL_H
#define X86_CTZL_H
#include <intrin.h>
#ifdef X86_CPUID
# include "x86.h"
#endif
#if defined(_MSC_VER) && !defined(__clang__)
/* This is not a general purpose replacement for __builtin_ctzl. The function expects that value is != 0
* Because of that assumption trailing_zero is not initialized and the return value of _BitScanForward is not checked
*/
static __forceinline unsigned long __builtin_ctzl(unsigned long value)
{
#ifdef X86_CPUID
if (x86_cpu_has_tzcnt)
return _tzcnt_u32(value);
#endif
unsigned long trailing_zero;
_BitScanForward(&trailing_zero, value);
return trailing_zero;
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,175 @@
/*
* Fill Window with SSE2-optimized hash shifting
*
* Copyright (C) 2013 Intel Corporation
* Authors:
* Arjan van de Ven <arjan@linux.intel.com>
* Jim Kukunas <james.t.kukunas@linux.intel.com>
*
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifdef X86_SSE2
#include "zbuild.h"
#include <immintrin.h>
#include "deflate.h"
#include "deflate_p.h"
#include "functable.h"
extern int read_buf(PREFIX3(stream) *strm, unsigned char *buf, unsigned size);
ZLIB_INTERNAL void fill_window_sse(deflate_state *s) {
const __m128i xmm_wsize = _mm_set1_epi16(s->w_size);
register unsigned n;
register Pos *p;
unsigned more; /* Amount of free space at the end of the window. */
unsigned int wsize = s->w_size;
Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead");
do {
more = (unsigned)(s->window_size -(unsigned long)s->lookahead -(unsigned long)s->strstart);
/* Deal with !@#$% 64K limit: */
if (sizeof(int) <= 2) {
if (more == 0 && s->strstart == 0 && s->lookahead == 0) {
more = wsize;
} else if (more == (unsigned)(-1)) {
/* Very unlikely, but possible on 16 bit machine if
* strstart == 0 && lookahead == 1 (input done a byte at time)
*/
more--;
}
}
/* If the window is almost full and there is insufficient lookahead,
* move the upper half to the lower one to make room in the upper half.
*/
if (s->strstart >= wsize+MAX_DIST(s)) {
memcpy(s->window, s->window+wsize, (unsigned)wsize);
s->match_start = (s->match_start >= wsize) ? s->match_start - wsize : 0;
s->strstart -= wsize; /* we now have strstart >= MAX_DIST */
s->block_start -= (long) wsize;
/* Slide the hash table (could be avoided with 32 bit values
at the expense of memory usage). We slide even when level == 0
to keep the hash table consistent if we switch back to level > 0
later. (Using level 0 permanently is not an optimal usage of
zlib, so we don't care about this pathological case.)
*/
n = s->hash_size;
p = &s->head[n];
p -= 8;
do {
__m128i value, result;
value = _mm_loadu_si128((__m128i *)p);
result = _mm_subs_epu16(value, xmm_wsize);
_mm_storeu_si128((__m128i *)p, result);
p -= 8;
n -= 8;
} while (n > 0);
n = wsize;
p = &s->prev[n];
p -= 8;
do {
__m128i value, result;
value = _mm_loadu_si128((__m128i *)p);
result = _mm_subs_epu16(value, xmm_wsize);
_mm_storeu_si128((__m128i *)p, result);
p -= 8;
n -= 8;
} while (n > 0);
more += wsize;
}
if (s->strm->avail_in == 0) break;
/* If there was no sliding:
* strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 &&
* more == window_size - lookahead - strstart
* => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1)
* => more >= window_size - 2*WSIZE + 2
* In the BIG_MEM or MMAP case (not yet supported),
* window_size == input_size + MIN_LOOKAHEAD &&
* strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD.
* Otherwise, window_size == 2*WSIZE so more >= 2.
* If there was sliding, more >= WSIZE. So in all cases, more >= 2.
*/
Assert(more >= 2, "more < 2");
n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more);
s->lookahead += n;
/* Initialize the hash value now that we have some input: */
if (s->lookahead + s->insert >= MIN_MATCH) {
unsigned int str = s->strstart - s->insert;
s->ins_h = s->window[str];
if (str >= 1)
functable.insert_string(s, str + 2 - MIN_MATCH, 1);
#if MIN_MATCH != 3
#error Call insert_string() MIN_MATCH-3 more times
while (s->insert) {
functable.insert_string(s, str, 1);
str++;
s->insert--;
if (s->lookahead + s->insert < MIN_MATCH)
break;
}
#else
unsigned int count;
if (unlikely(s->lookahead == 1)){
count = s->insert - 1;
}else{
count = s->insert;
}
functable.insert_string(s, str, count);
s->insert -= count;
#endif
}
/* If the whole input has less than MIN_MATCH bytes, ins_h is garbage,
* but this is not important since only literal bytes will be emitted.
*/
} while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0);
/* If the WIN_INIT bytes after the end of the current data have never been
* written, then zero those bytes in order to avoid memory check reports of
* the use of uninitialized (or uninitialised as Julian writes) bytes by
* the longest match routines. Update the high water mark for the next
* time through here. WIN_INIT is set to MAX_MATCH since the longest match
* routines allow scanning to strstart + MAX_MATCH, ignoring lookahead.
*/
if (s->high_water < s->window_size) {
unsigned long curr = s->strstart + (unsigned long)(s->lookahead);
unsigned long init;
if (s->high_water < curr) {
/* Previous high water mark below current data -- zero WIN_INIT
* bytes or up to end of window, whichever is less.
*/
init = s->window_size - curr;
if (init > WIN_INIT)
init = WIN_INIT;
memset(s->window + curr, 0, (unsigned)init);
s->high_water = curr + init;
} else if (s->high_water < (unsigned long)curr + WIN_INIT) {
/* High water mark at or above current data, but below current data
* plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
* to end of window, whichever is less.
*/
init = (unsigned long)curr + WIN_INIT - s->high_water;
if (init > s->window_size - s->high_water)
init = s->window_size - s->high_water;
memset(s->window + s->high_water, 0, (unsigned)init);
s->high_water += init;
}
}
Assert((unsigned long)s->strstart <= s->window_size - MIN_LOOKAHEAD, "not enough room for search");
}
#endif

View File

@ -0,0 +1,56 @@
/* insert_string_sse -- insert_string variant using SSE4.2's CRC instructions
*
* Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
* For conditions of distribution and use, see copyright notice in zlib.h
*
*/
#include "zbuild.h"
#include "deflate.h"
/* ===========================================================================
* Insert string str in the dictionary and set match_head to the previous head
* of the hash chain (the most recent string with same hash key). Return
* the previous length of the hash chain.
* IN assertion: all calls to to INSERT_STRING are made with consecutive
* input characters and the first MIN_MATCH bytes of str are valid
* (except for the last MIN_MATCH-1 bytes of the input file).
*/
#ifdef X86_SSE4_2_CRC_HASH
ZLIB_INTERNAL Pos insert_string_sse(deflate_state *const s, const Pos str, unsigned int count) {
Pos ret = 0;
unsigned int idx;
unsigned int *ip, val, h;
for (idx = 0; idx < count; idx++) {
ip = (unsigned *)&s->window[str+idx];
memcpy(&val, ip, sizeof(val));
h = 0;
if (s->level >= TRIGGER_LEVEL)
val &= 0xFFFFFF;
#ifdef _MSC_VER
h = _mm_crc32_u32(h, val);
#elif defined(X86_SSE4_2_CRC_INTRIN)
h = __builtin_ia32_crc32si(h, val);
#else
__asm__ __volatile__ (
"crc32 %1,%0\n\t"
: "+r" (h)
: "r" (val)
);
#endif
Pos head = s->head[h & s->hash_mask];
if (head != str+idx) {
s->prev[(str+idx) & s->w_mask] = head;
s->head[h & s->hash_mask] = str+idx;
if (idx == count-1)
ret = head;
} else if (idx == count - 1) {
ret = str + idx;
}
}
return ret;
}
#endif

View File

@ -0,0 +1,68 @@
/*
* x86 feature check
*
* Copyright (C) 2013 Intel Corporation. All rights reserved.
* Author:
* Jim Kukunas
*
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include "zutil.h"
#ifdef _MSC_VER
#include <intrin.h>
#else
// Newer versions of GCC and clang come with cpuid.h
#include <cpuid.h>
#endif
ZLIB_INTERNAL int x86_cpu_has_sse2;
ZLIB_INTERNAL int x86_cpu_has_sse42;
ZLIB_INTERNAL int x86_cpu_has_pclmulqdq;
ZLIB_INTERNAL int x86_cpu_has_tzcnt;
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) {
#ifdef _MSC_VER
unsigned int registers[4];
__cpuid(registers, info);
*eax = registers[0];
*ebx = registers[1];
*ecx = registers[2];
*edx = registers[3];
#else
unsigned int _eax;
unsigned int _ebx;
unsigned int _ecx;
unsigned int _edx;
__cpuid(info, _eax, _ebx, _ecx, _edx);
*eax = _eax;
*ebx = _ebx;
*ecx = _ecx;
*edx = _edx;
#endif
}
void ZLIB_INTERNAL x86_check_features(void) {
unsigned eax, ebx, ecx, edx;
unsigned maxbasic;
cpuid(0, &maxbasic, &ebx, &ecx, &edx);
cpuid(1 /*CPU_PROCINFO_AND_FEATUREBITS*/, &eax, &ebx, &ecx, &edx);
x86_cpu_has_sse2 = edx & 0x4000000;
x86_cpu_has_sse42 = ecx & 0x100000;
x86_cpu_has_pclmulqdq = ecx & 0x2;
if (maxbasic >= 7) {
cpuid(7, &eax, &ebx, &ecx, &edx);
// check BMI1 bit
// Reference: https://software.intel.com/sites/default/files/article/405250/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family.pdf
x86_cpu_has_tzcnt = ebx & 0x8;
} else {
x86_cpu_has_tzcnt = 0;
}
}

View File

@ -0,0 +1,16 @@
/* cpu.h -- check for CPU features
* Copyright (C) 2013 Intel Corporation Jim Kukunas
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#ifndef CPU_H_
#define CPU_H_
extern int x86_cpu_has_sse2;
extern int x86_cpu_has_sse42;
extern int x86_cpu_has_pclmulqdq;
extern int x86_cpu_has_tzcnt;
void ZLIB_INTERNAL x86_check_features(void);
#endif /* CPU_H_ */

View File

@ -401,6 +401,9 @@ sub build_linux_source {
mkdir($source_dir . "/Server/build") if (!-e $source_dir . "/Server/build");
chdir($source_dir . "/Server/build");
print `git submodule init`;
print `git submodule update`;
print "Generating CMake build files...\n";
if ($os_flavor eq "fedora_core") {
print `cmake $cmake_options -DEQEMU_BUILD_LOGIN=ON -DEQEMU_BUILD_LUA=ON -DLUA_INCLUDE_DIR=/usr/include/lua-5.1/ -G "Unix Makefiles" ..`;

View File

@ -126,8 +126,9 @@ if [[ "$OS" == "Debian" ]]; then
apt-get $apt_options install libssl-dev
# Install libsodium
wget http://ftp.us.debian.org/debian/pool/main/libs/libsodium/libsodium-dev_1.0.11-1~bpo8+1_amd64.deb -O /home/eqemu/libsodium-dev.deb
wget http://ftp.us.debian.org/debian/pool/main/libs/libsodium/libsodium18_1.0.11-1~bpo8+1_amd64.deb -O /home/eqemu/libsodium18.deb
wget http://ftp.us.debian.org/debian/pool/main/libs/libsodium/libsodium-dev_1.0.11-2_amd64.deb -O /home/eqemu/libsodium-dev.deb
wget http://ftp.us.debian.org/debian/pool/main/libs/libsodium/libsodium18_1.0.11-2_amd64.deb -O /home/eqemu/libsodium18.deb
dpkg -i /home/eqemu/libsodium*.deb
# Cleanup after ourselves
rm -f /home/eqemu/libsodium-dev.deb
@ -146,61 +147,75 @@ if [[ "$OS" == "Debian" ]]; then
elif [[ "$OS" == "red_hat" ]]; then
# Do RedHat / CentOS stuff
# Add the MariaDB repository to yum
cat <<EOF > /etc/yum.repos.d/mariadb.repo
# MariaDB 10.1 CentOS repository list - created 2016-08-20 05:42 UTC
# http://downloads.mariadb.org/mariadb/repositories/
[mariadb]
name = MariaDB
baseurl = http://yum.mariadb.org/10.1/centos7-amd64
gpgkey=https://yum.mariadb.org/RPM-GPG-KEY-MariaDB
enabled=1
gpgcheck=1
EOF
# Install prereqs
yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
yum -y install deltarpm
yum -y install open-vm-tools vim cmake boost-* zlib-devel mariadb-server mariadb-client mariadb-devel mariadb-libs mariadb-compat perl-* lua* dos2unix php-mysql proftpd libuuid-devel
yum -y groupinstall "Development Tools" "Basic Web Server" "Compatibility Libraries"
yum -y install epel-release deltarpm
yum -y update
yum -y install \
open-vm-tools \
vim \
cmake \
boost-* \
zlib-devel \
mariadb \
mariadb-server \
mariadb-devel \
mariadb-libs \
perl-DBD-MySQL \
perl-JSON \
perl-IO-stringy \
perl-devel \
perl-Time-HiRes \
lua-devel \
dos2unix \
php-mysql \
proftpd \
libuuid-devel \
libsodium \
libsodium-devel \
openssl-devel
yum -y groupinstall \
"Development Tools" \
"Basic Web Server" \
"Compatibility Libraries"
elif [[ "$OS" == "fedora_core" ]]; then
# Do Fedora stuff
dnf -y install open-vm-tools
dnf -y install vim
dnf -y install cmake
dnf -y install boost-devel
dnf -y install zlib-devel
dnf -y install mariadb-server
dnf -y install mariadb-devel
dnf -y install mariadb-libs
dnf -y install perl
dnf -y install perl-DBD-MySQL
dnf -y install perl-IO-stringy
dnf -y install perl-devel
dnf -y install lua-devel
dnf -y install lua-sql-mysql
dnf -y install dos2unix
dnf -y install php-mysql
dnf -y install php-mysqlnd
dnf -y install proftpd
dnf -y install wget
dnf -y install compat-lua-libs
dnf -y install compat-lua-devel
dnf -y install compat-lua
dnf -y install perl-Time-HiRes
dnf -y install libuuid-devel
dnf -y install libsodium
dnf -y install libsodium-devel
dnf -y groupinstall "Development Tools"
dnf -y groupinstall "Basic Web Server"
dnf -y groupinstall "C Development Tools and Libraries"
dnf -y install \
open-vm-tools \
vim \
cmake \
boost-devel \
zlib-devel \
mariadb-server \
mariadb-devel \
perl \
perl-DBD-MySQL \
perl-IO-stringy \
perl-devel \
lua-devel \
lua-sql-mysql \
dos2unix \
php-mysqlnd \
proftpd \
wget \
compat-lua-libs \
compat-lua-devel \
compat-lua \
perl-Time-HiRes \
perl-JSON \
libuuid-devel \
libsodium \
libsodium-devel \
openssl-devel
dnf -y group install "Development Tools" "Basic Web Server" "C Development Tools and Libraries"
fi
if [[ "$OS" == "fedora_core" ]] || [[ "$OS" == "red_hat" ]]; then
# Start MariaDB server and set root password
echo "Starting MariaDB server..."
systemctl enable mariadb.service
systemctl start mariadb.service
systemctl enable mariadb.service --now
sleep 5
/usr/bin/mysqladmin -u root password $eqemu_db_root_password
fi

View File

@ -393,6 +393,7 @@
9137|2018_12_12_client_faction_tables.sql|SHOW TABLES LIKE 'faction_base_data'|empty|
9138|2018_12_12_convert_to_client_functions.sql|SELECT `id` FROM `faction_list` WHERE `id` > 4999|empty|
9139|2019_03_25_optional_npc_model.sql|SHOW COLUMNS FROM `npc_types` LIKE 'model'|empty|
9140|2019_07_03_update_range.sql|SHOW COLUMNS FROM `npc_types` LIKE 'max_movement_update_range'|empty|
# Upgrade conditions:
# This won't be needed after this system is implemented, but it is used database that are not

View File

@ -82,6 +82,7 @@ perl_event_export_settings
pets
pets_equipmentset
pets_equipmentset_entries
profanity_list
proximities
races
saylink

View File

@ -1773,10 +1773,10 @@ void WorldServer::HandleMessage(uint16 opcode, const EQ::Net::Packet &p)
}
case ServerOP_ReloadRules: {
worldserver.SendEmoteMessage(
0, 0, 0, 15,
"Rules reloaded for Zone: '%s' Instance ID: %u",
zone->GetLongName(),
zone->GetInstanceID()
0, 0, 100, 15,
"Rules reloaded for Zone: '%s' Instance ID: %u",
zone->GetLongName(),
zone->GetInstanceID()
);
RuleManager::Instance()->LoadRules(&database, RuleManager::Instance()->GetActiveRuleset(), true);
break;