diff --git a/.gitignore b/.gitignore index 2f37a33f1..627d3c630 100644 --- a/.gitignore +++ b/.gitignore @@ -68,3 +68,4 @@ compile_flags.txt # CMake Files cmake-build-relwithdebinfo/* +skill-caps.diff diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index ff130adb0..c8105eb83 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -98,6 +98,7 @@ SET(common_sources json/json.hpp json/jsoncpp.cpp zone_store.cpp + memory/ksm.hpp net/console_server.cpp net/console_server_connection.cpp net/crc32.cpp diff --git a/common/eqemu_logsys.h b/common/eqemu_logsys.h index 7ec2bab3f..069773959 100644 --- a/common/eqemu_logsys.h +++ b/common/eqemu_logsys.h @@ -144,6 +144,7 @@ namespace Logs { XTargets, EvolveItem, PositionUpdate, + KSM, MaxCategoryID /* Don't Remove this */ }; @@ -246,7 +247,8 @@ namespace Logs { "Corpses", "XTargets", "EvolveItem", - "PositionUpdate" + "PositionUpdate", + "KSM" // Kernel Samepage Merging }; } diff --git a/common/eqemu_logsys_log_aliases.h b/common/eqemu_logsys_log_aliases.h index bb65cece5..4f1eadde7 100644 --- a/common/eqemu_logsys_log_aliases.h +++ b/common/eqemu_logsys_log_aliases.h @@ -861,7 +861,17 @@ #define LogPositionUpdateDetail(message, ...) do {\ if (LogSys.IsLogEnabled(Logs::Detail, Logs::PositionUpdate))\ - OutF(LogSys, Logs::Detail, Logs::PositionUpdate, __FILE__, __func__, __LINE__, message, ##__VA_ARGS__);\ + OutF(LogSys, Logs::Detail, Logs::PositionUpdate, __FILE__, __func__, __LINE__, message, ##__VA_ARGS__); \ +} while (0) + +#define LogKSM(message, ...) do {\ + if (LogSys.IsLogEnabled(Logs::General, Logs::KSM))\ + OutF(LogSys, Logs::General, Logs::KSM, __FILE__, __func__, __LINE__, message, ##__VA_ARGS__);\ +} while (0) + +#define LogKSMDetail(message, ...) do {\ + if (LogSys.IsLogEnabled(Logs::Detail, Logs::KSM))\ + OutF(LogSys, Logs::Detail, Logs::KSM, __FILE__, __func__, __LINE__, message, ##__VA_ARGS__);\ } while (0) #define Log(debug_level, log_category, message, ...) do {\ diff --git a/common/memory/ksm.hpp b/common/memory/ksm.hpp new file mode 100644 index 000000000..079b83f1a --- /dev/null +++ b/common/memory/ksm.hpp @@ -0,0 +1,220 @@ +#ifndef EQEMU_KSM_HPP +#define EQEMU_KSM_HPP + +#include "../eqemu_logsys.h" +#include +#include +#include +#ifdef _WIN32 +#include // For _aligned_malloc, _aligned_free +#include +#else +#include // For madvise +#include // For sysconf, sbrk +#endif + + +// Page-aligned allocator for std::vector +template +class PageAlignedAllocator { +public: + using value_type = T; + + PageAlignedAllocator() noexcept = default; + template + PageAlignedAllocator(const PageAlignedAllocator&) noexcept {} + + T* allocate(std::size_t n) { + void* ptr = nullptr; + size_t size = n * sizeof(T); + +#ifdef _WIN32 + // Simply allocate memory without alignment + ptr = malloc(size); + if (!ptr) throw std::bad_alloc(); +#else + size_t alignment = getPageSize(); // Get the system's page size + if (posix_memalign(&ptr, alignment, size) != 0) { + throw std::bad_alloc(); + } +#endif + return static_cast(ptr); + } + + void deallocate(T* p, std::size_t) noexcept { + free(p); + } + +private: + size_t getPageSize() const + { +#ifdef _WIN32 + SYSTEM_INFO sysInfo; + GetSystemInfo(&sysInfo); + return sysInfo.dwPageSize; // Page size in bytes +#else + return static_cast(sysconf(_SC_PAGESIZE)); +#endif + }; +}; + +template +bool operator==(const PageAlignedAllocator&, const PageAlignedAllocator&) noexcept { + return true; +} + +template +bool operator!=(const PageAlignedAllocator&, const PageAlignedAllocator&) noexcept { + return false; +} + +// Kernel Samepage Merging (KSM) functionality +namespace KSM { + +#ifdef _WIN32 + // Windows-specific placeholder functions (no-op) + inline void CheckPageAlignment(void* ptr) { + } + + inline void* AllocatePageAligned(size_t size) { + return memset(malloc(size), 0, size); + } + + inline void MarkMemoryForKSM(void* start, size_t size) { + } + + inline void AlignHeapToPageBoundary() { + } + + inline void* MarkHeapStart() { + return nullptr; + } + + inline size_t MeasureHeapUsage(void* start) { + return 0; + } +#else + // Linux-specific functionality + inline void CheckPageAlignment(void* ptr) { + size_t page_size = sysconf(_SC_PAGESIZE); + if (reinterpret_cast(ptr) % page_size == 0) { + LogKSMDetail("Memory is page-aligned [{}]", ptr); + } else { + LogKSMDetail("Memory is NOT page-aligned [{}]", ptr); + } + } + + inline void* AllocatePageAligned(size_t size) { + size_t page_size = sysconf(_SC_PAGESIZE); + void* aligned_ptr = nullptr; + if (posix_memalign(&aligned_ptr, page_size, size) != 0) { + LogKSM("Failed to allocate page-aligned memory on Linux. page_size [{}] size [{}] bytes", page_size, size); + } + std::memset(aligned_ptr, 0, size); + return aligned_ptr; + } + + inline void MarkMemoryForKSM(void* start, size_t size) { + if (madvise(start, size, MADV_MERGEABLE) == 0) { + LogKSM("Marked memory for KSM | start [{}] size [{}] bytes", start, size); + } else { + perror("madvise failed"); + } + } + + inline void AlignHeapToPageBoundary() { + size_t page_size = sysconf(_SC_PAGESIZE); + if (page_size == 0) { + LogKSM("Failed to retrieve page size SC_PAGESIZE [{}]", page_size); + return; + } + + void* current_break = sbrk(0); + if (current_break == (void*)-1) { + LogKSM("Failed to retrieve the current program break"); + return; + } + + uintptr_t current_address = reinterpret_cast(current_break); + size_t misalignment = current_address % page_size; + + if (misalignment != 0) { + size_t adjustment = page_size - misalignment; + if (sbrk(adjustment) == (void*)-1) { + LogKSM("Failed to align heap to page boundary. adjustment [{}] bytes", adjustment); + return; + } + } + + LogKSMDetail("Heap aligned to next page boundary. Current break [{}]", sbrk(0)); + } + + inline void* MarkHeapStart() { + void* current_pos = sbrk(0); + AlignHeapToPageBoundary(); + return current_pos; + } + + inline size_t MeasureHeapUsage(void* start) { + void* current_break = sbrk(0); + return static_cast(current_break) - static_cast(start); + } +#endif + + + inline size_t getPageSize() + { +#ifdef _WIN32 + SYSTEM_INFO sysInfo; + GetSystemInfo(&sysInfo); + return sysInfo.dwPageSize; // Page size in bytes +#else + return static_cast(sysconf(_SC_PAGESIZE)); // POSIX page size +#endif + }; + + template + inline void PageAlignVectorAligned(std::vector>& vec) { + if (vec.empty()) { + return; + } + + size_t page_size = getPageSize(); + void* start = vec.data(); + size_t size = vec.size() * sizeof(T); + + // Check if the memory is page-aligned + if (reinterpret_cast(start) % page_size != 0) { + // Allocate a new aligned vector + std::vector> aligned_vec(vec.get_allocator()); + aligned_vec.reserve(vec.capacity()); // Match capacity to avoid reallocation during copy + + // Copy elements from the original vector + aligned_vec.insert(aligned_vec.end(), vec.begin(), vec.end()); + + // Swap the aligned vector with the original vector + vec.swap(aligned_vec); + + // Clear the temporary aligned vector to free its memory + aligned_vec.clear(); + + // Verify the new alignment + start = vec.data(); + if (reinterpret_cast(start) % page_size != 0) { + throw std::runtime_error("Failed to align vector memory to page boundaries."); + } + + LogKSMDetail("Vector reallocated to ensure page alignment. start [{}] size [{}] bytes", start, size); + } else { + LogKSMDetail("Vector is already page-aligned. start [{}] size [{}] bytes", start, size); + } + +#ifndef _WIN32 + // Mark memory for KSM (only on non-Windows systems) + MarkMemoryForKSM(start, size); +#endif + } + +} + +#endif // EQEMU_KSM_HPP diff --git a/world/cli/test.cpp b/world/cli/test.cpp index 738741723..172d46d21 100644 --- a/world/cli/test.cpp +++ b/world/cli/test.cpp @@ -1,6 +1,8 @@ #include #include +#include #include "../../common/events/player_events.h" +#include "../../common/memory/ksm.hpp" void WorldserverCLI::TestCommand(int argc, char **argv, argh::parser &cmd, std::string &description) { @@ -10,5 +12,21 @@ void WorldserverCLI::TestCommand(int argc, char **argv, argh::parser &cmd, std:: return; } - + void* start_marker = KSM::MarkHeapStart(); + std::cout << "Start marker: " << start_marker << "\n"; + + std::vector vec = {}; + for (int i = 0; i < 100000; i++) { + vec.push_back("Some random string"); + } + + // Measure allocated memory size + size_t allocated_size = KSM::MeasureHeapUsage(start_marker); + // Convert to MB as a float and output with precision + double allocated_size_mb = static_cast(allocated_size) / (1024 * 1024); + std::cout << std::fixed << std::setprecision(3) + << "Allocated size: " << allocated_size_mb << " MB\n"; + + // Mark memory for KSM + KSM::MarkMemoryForKSM(start_marker, allocated_size); } diff --git a/zone/gm_commands/loc.cpp b/zone/gm_commands/loc.cpp index 62fdf4be4..4e4085add 100755 --- a/zone/gm_commands/loc.cpp +++ b/zone/gm_commands/loc.cpp @@ -9,6 +9,19 @@ void command_loc(Client *c, const Seperator *sep) auto target_position = target->GetPosition(); + // check los benchmark + BenchTimer timer; + for (int i = 0; i < 1000; i++) { + zone->zonemap->CheckLoS(c->GetPosition(), target_position); + } + c->Message( + Chat::White, + fmt::format( + "CheckLoS benchmark took [{}]", + timer.elapsed() + ).c_str() + ); + c->Message( Chat::White, fmt::format( diff --git a/zone/map.cpp b/zone/map.cpp index 96feddb9c..7e3d8822c 100644 --- a/zone/map.cpp +++ b/zone/map.cpp @@ -7,6 +7,7 @@ #include "raycast_mesh.h" #include "zone.h" #include "../common/file.h" +#include "../common/memory/ksm.hpp" #include #include @@ -953,6 +954,7 @@ bool Map::LoadV2(FILE *f) { return true; } + void Map::RotateVertex(glm::vec3 &v, float rx, float ry, float rz) { glm::vec3 nv = v; diff --git a/zone/raycast_mesh.cpp b/zone/raycast_mesh.cpp index 8873c293e..20c3f6b71 100644 --- a/zone/raycast_mesh.cpp +++ b/zone/raycast_mesh.cpp @@ -1,4 +1,6 @@ #include "raycast_mesh.h" +#include "../common/memory/ksm.hpp" +#include "../common/eqemu_logsys.h" #include #include #include @@ -9,7 +11,7 @@ // This code snippet allows you to create an axis aligned bounding volume tree for a triangle mesh so that you can do // high-speed raycasting. // -// There are much better implementations of this available on the internet. In particular I recommend that you use +// There are much better implementations of this available on the internet. In particular I recommend that you use // OPCODE written by Pierre Terdiman. // @see: http://www.codercorner.com/Opcode.htm // @@ -17,7 +19,7 @@ // // I am providing this code snippet for the use case where you *only* want to do quick and dirty optimized raycasting. // I have not done performance testing between this version and OPCODE; so I don't know how much slower it is. However, -// anytime you switch to using a spatial data structure for raycasting, you increase your performance by orders and orders +// anytime you switch to using a spatial data structure for raycasting, you increase your performance by orders and orders // of magnitude; so this implementation should work fine for simple tools and utilities. // // It also serves as a nice sample for people who are trying to learn the algorithm of how to implement AABB trees. @@ -32,14 +34,14 @@ // // The official source can be found at: http://code.google.com/p/raycastmesh/ // -// +// #pragma warning(disable:4100) namespace RAYCAST_MESH { -typedef std::vector< RmUint32 > TriVector; +typedef std::vector> TriVector; /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /** @@ -365,7 +367,7 @@ public: { RmUint32 ret = 0; - if ( p[0] < mMin[0] ) + if ( p[0] < mMin[0] ) { ret|=CC_MINX; } @@ -374,7 +376,7 @@ public: ret|=CC_MAXX; } - if ( p[1] < mMin[1] ) + if ( p[1] < mMin[1] ) { ret|=CC_MINY; } @@ -383,7 +385,7 @@ public: ret|=CC_MAXY; } - if ( p[2] < mMin[2] ) + if ( p[2] < mMin[2] ) { ret|=CC_MINZ; } @@ -514,7 +516,7 @@ public: // the width of the longest axis is less than the minimum axis size then... // we create the leaf node and copy the triangles into the leaf node triangle array. if ( count < minLeafSize || depth >= maxDepth || laxis < minAxisSize ) - { + { // Copy the triangle indices into the leaf triangles array mLeafTriangleIndex = leafTriangles.size(); // assign the array start location for these leaf triangles. leafTriangles.push_back(count); @@ -542,7 +544,7 @@ public: // and another array that includes all triangles which intersect the 'right' half of the bounding volume node. for (auto i = triangles.begin(); i != triangles.end(); ++i) { - RmUint32 tri = (*i); + RmUint32 tri = (*i); { RmUint32 i1 = indices[tri*3+0]; @@ -590,7 +592,7 @@ public: { leftBounds.clamp(b1); // we have to clamp the bounding volume so it stays inside the parent volume. mLeft = callback->getNode(); // get a new AABB node - new ( mLeft ) NodeAABB(leftBounds); // initialize it to default constructor values. + new ( mLeft ) NodeAABB(leftBounds); // initialize it to default constructor values. // Then recursively split this node. mLeft->split(leftTriangles,vcount,vertices,tcount,indices,depth+1,maxDepth,minLeafSize,minAxisSize,callback,leafTriangles); } @@ -662,7 +664,7 @@ public: RmReal nd = nearestDistance; if ( !intersectLineSegmentAABB(mBounds.mMin,mBounds.mMax,from,dir,nd,sect) ) { - return; + return; } if ( mLeafTriangleIndex != TRI_EOF ) { @@ -754,28 +756,60 @@ public: { mMaxNodeCount+=pow2Table[i]; } - mNodes = new NodeAABB[mMaxNodeCount]; + // Allocate page-aligned memory + mNodes = static_cast(KSM::AllocatePageAligned(sizeof(NodeAABB) * mMaxNodeCount)); + if (!mNodes) { + throw std::bad_alloc(); + } mNodeCount = 0; + KSM::CheckPageAlignment(mNodes); + + mVertices = static_cast(KSM::AllocatePageAligned(sizeof(RmReal) * 3 * vcount)); + if (!mVertices) { + throw std::bad_alloc(); + } + std::memcpy(mVertices, vertices, sizeof(RmReal) * 3 * vcount); mVcount = vcount; - mVertices = (RmReal *)::malloc(sizeof(RmReal)*3*vcount); - memcpy(mVertices,vertices,sizeof(RmReal)*3*vcount); + + mIndices = static_cast(KSM::AllocatePageAligned(sizeof(RmUint32) * 3 * tcount)); + if (!mIndices) { + throw std::bad_alloc(); + } + std::memcpy(mIndices, indices, sizeof(RmUint32) * 3 * tcount); mTcount = tcount; - mIndices = (RmUint32 *)::malloc(sizeof(RmUint32)*tcount*3); - memcpy(mIndices,indices,sizeof(RmUint32)*tcount*3); - mRaycastTriangles = (RmUint32 *)::malloc(tcount*sizeof(RmUint32)); - memset(mRaycastTriangles,0,tcount*sizeof(RmUint32)); + + mRaycastTriangles = static_cast(KSM::AllocatePageAligned(sizeof(RmUint32) * tcount)); + if (!mRaycastTriangles) { + throw std::bad_alloc(); + } + std::memset(mRaycastTriangles, 0, sizeof(RmUint32) * tcount); + + mFaceNormals = static_cast(KSM::AllocatePageAligned(sizeof(RmReal) * 3 * tcount)); + if (!mFaceNormals) { + throw std::bad_alloc(); + } + std::memset(mFaceNormals, 0, sizeof(RmReal) * 3 * tcount); + + // Mark memory as mergeable for KSM + KSM::MarkMemoryForKSM(mVertices, sizeof(RmReal) * 3 * vcount); + KSM::MarkMemoryForKSM(mIndices, sizeof(RmUint32) * 3 * tcount); + KSM::MarkMemoryForKSM(mRaycastTriangles, sizeof(RmUint32) * tcount); + KSM::MarkMemoryForKSM(mFaceNormals, sizeof(RmReal) * 3 * tcount); + mRoot = getNode(); mFaceNormals = NULL; new ( mRoot ) NodeAABB(mVcount,mVertices,mTcount,mIndices,maxDepth,minLeafSize,minAxisSize,this,mLeafTriangles); + + KSM::MarkMemoryForKSM(mLeafTriangles.data(), mLeafTriangles.size() * sizeof(RmUint32)); } ~MyRaycastMesh(void) { - delete []mNodes; - ::free(mVertices); - ::free(mIndices); - ::free(mFaceNormals); - ::free(mRaycastTriangles); + if (mNodes) { free(mNodes); } + if (mVertices) { free(mVertices); } + if (mIndices) { free(mIndices); } + if (mRaycastTriangles) { free(mRaycastTriangles); } + if (mFaceNormals) { free(mFaceNormals); } } virtual bool raycast(const RmReal *from,const RmReal *to,RmReal *hitLocation,RmReal *hitNormal,RmReal *hitDistance) @@ -812,7 +846,7 @@ public: return mRoot->mBounds.mMax; } - virtual NodeAABB * getNode(void) + virtual NodeAABB * getNode(void) { assert( mNodeCount < mMaxNodeCount ); NodeAABB *ret = &mNodes[mNodeCount]; @@ -820,7 +854,7 @@ public: return ret; } - virtual void getFaceNormal(RmUint32 tri,RmReal *faceNormal) + virtual void getFaceNormal(RmUint32 tri,RmReal *faceNormal) { if ( mFaceNormals == NULL ) { @@ -938,6 +972,29 @@ RaycastMesh * createRaycastMesh(RmUint32 vcount, // The number of vertices in t ) { auto m = new MyRaycastMesh(vcount, vertices, tcount, indices, maxDepth, minLeafSize, minAxisSize); + + // Calculate memory usage + size_t vertex_size = vcount * sizeof(RmReal) * 3; // Each vertex has 3 floats + size_t index_size = tcount * 3 * sizeof(RmUint32); // Each triangle has 3 indices + size_t bvh_node_size = m->mNodeCount * sizeof(NodeAABB); // BVH Node memory usage + size_t bvh_leaf_size = m->mLeafTriangles.size() * sizeof(RmUint32); // BVH leaf triangles + + size_t bvh_size = bvh_node_size + bvh_leaf_size; // Total BVH size + size_t total_size = vertex_size + index_size + bvh_size; + + KSM::CheckPageAlignment(m->mNodes); + KSM::CheckPageAlignment(m->mVertices); + + LogInfo( + "Map Raycast Memory Usage | Vertices [{:.2f}] MB Indices [{:.2f}] MB BVH Nodes [{:.2f}] MB BVH Leaves [{:.2f}] MB BVH Total [{:.2f}] MB", + vertex_size / (1024.0 * 1024.0), + index_size / (1024.0 * 1024.0), + bvh_node_size / (1024.0 * 1024.0), + bvh_leaf_size / (1024.0 * 1024.0), + bvh_size / (1024.0 * 1024.0) + ); + LogInfo("Total Raycast Memory [{:.2f}] MB", total_size / (1024.0 * 1024.0)); + return static_cast< RaycastMesh * >(m); } @@ -984,12 +1041,12 @@ MyRaycastMesh::MyRaycastMesh(std::vector& rm_buffer) return; char* buf = rm_buffer.data(); - + chunk_size = sizeof(RmUint32); memcpy(&mVcount, buf, chunk_size); buf += chunk_size; bytes_read += chunk_size; - + chunk_size = (sizeof(RmReal) * (3 * mVcount)); mVertices = (RmReal *)::malloc(chunk_size); memcpy(mVertices, buf, chunk_size); @@ -1037,7 +1094,7 @@ MyRaycastMesh::MyRaycastMesh(std::vector& rm_buffer) buf += chunk_size; bytes_read += chunk_size; } - + chunk_size = sizeof(RmUint32); memcpy(&mNodeCount, buf, chunk_size); buf += chunk_size; @@ -1071,7 +1128,7 @@ MyRaycastMesh::MyRaycastMesh(std::vector& rm_buffer) mNodes[index].mLeft = &mNodes[lNodeIndex]; buf += chunk_size; bytes_read += chunk_size; - + RmUint32 rNodeIndex; chunk_size = sizeof(RmUint32); memcpy(&rNodeIndex, buf, chunk_size); @@ -1106,7 +1163,7 @@ MyRaycastMesh::MyRaycastMesh(std::vector& rm_buffer) void MyRaycastMesh::serialize(std::vector& rm_buffer) { rm_buffer.clear(); - + size_t rm_buffer_size_ = 0; rm_buffer_size_ += sizeof(RmUint32); // mVcount