高级内存优化技术与大页内存
深入探讨大页内存、内存压缩、透明巨页等高级内存优化技术及其在实际应用中的性能提升
在内存密集型应用中,传统的4KB页面大小往往成为性能瓶颈。大页内存(Huge Pages)和透明巨页(THP)等高级内存优化技术通过增加页面大小,显著减少TLB(转换后备缓冲器)缺失和页表开销,从而大幅提升系统性能。本文将深入探讨这些技术的原理、配置方法和实际应用场景。
大页内存原理
TLB与页面大小关系
Rendering diagram...
TLB是CPU中用于缓存虚拟地址到物理地址映射的高速缓存,其容量有限。使用更大的页面可以显著提高TLB的覆盖率:
- 4KB页面:每个TLB条目覆盖4KB内存
- 2MB页面:每个TLB条目覆盖2MB内存(512倍提升)
- 1GB页面:每个TLB条目覆盖1GB内存(262144倍提升)
大页内存性能优势
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <time.h>
#include <errno.h>
#define ALLOCATION_SIZE (256 * 1024 * 1024) // 256MB
#define ITERATIONS 1000000
// 检查大页内存支持
void check_hugepage_support() {
printf("大页内存支持检查\n");
printf("=============================\n");
// 检查大页内存是否可用
FILE *fp = fopen("/proc/meminfo","r");
if (fp) {
char line[256];
while (fgets(line,sizeof(line),fp)) {
if (strstr(line,"HugePages_Total:") ||
strstr(line,"HugePages_Free:") ||
strstr(line,"Hugepagesize:")) {
printf("%s",line);
}
}
fclose(fp);
}
// 检查透明巨页状态
fp = fopen("/sys/kernel/mm/transparent_hugepage/enabled","r");
if (fp) {
char line[256];
if (fgets(line,sizeof(line),fp)) {
printf("\n透明巨页状态: %s",line);
}
fclose(fp);
}
}
// 使用普通4KB页面进行内存访问测试
void test_normal_pages() {
printf("\n普通4KB页面性能测试\n");
printf("=============================\n");
// 使用mmap分配普通内存
void *memory = mmap(NULL,ALLOCATION_SIZE,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,-1,0);
if (memory == MAP_FAILED) {
perror("mmap failed");
return;
}
printf("分配内存地址: %p\n",memory);
printf("分配大小: %d MB\n",ALLOCATION_SIZE / (1024 * 1024));
// 初始化内存
memset(memory,0,ALLOCATION_SIZE);
// 性能测试:随机访问模式
struct timeval start,end;
gettimeofday(&start,NULL);
int *array = (int *)memory;
size_t num_elements = ALLOCATION_SIZE / sizeof(int);
for (int iter = 0; iter < ITERATIONS; iter++) {
// 随机访问模式
size_t index = rand() % num_elements;
array[index] = array[index] + 1;
}
gettimeofday(&end,NULL);
double elapsed = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf("随机访问时间: %.4f 秒\n",elapsed);
printf("访问速度: %.2f M ops/sec\n",ITERATIONS / elapsed / 1e6);
// 顺序访问测试
gettimeofday(&start,NULL);
for (int iter = 0; iter < ITERATIONS; iter++) {
for (size_t i = 0; i < num_elements; i += 64) {
array[i] = array[i] + 1;
}
}
gettimeofday(&end,NULL);
elapsed = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf("顺序访问时间: %.4f 秒\n",elapsed);
printf("访问速度: %.2f M ops/sec\n",ITERATIONS / elapsed / 1e6);
munmap(memory,ALLOCATION_SIZE);
}
// 使用大页内存进行测试
void test_huge_pages() {
printf("\n大页内存性能测试\n");
printf("=============================\n");
// 尝试使用大页内存
void *memory = mmap(NULL,ALLOCATION_SIZE,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-1,0);
if (memory == MAP_FAILED) {
printf("大页内存分配失败: %s\n",strerror(errno));
printf("尝试使用透明巨页...\n");
// 回退到普通分配,依赖透明巨页
memory = mmap(NULL,ALLOCATION_SIZE,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,-1,0);
if (memory == MAP_FAILED) {
perror("内存分配失败");
return;
}
printf("使用透明巨页分配: %p\n",memory);
} else {
printf("显式大页内存分配成功: %p\n",memory);
}
printf("分配大小: %d MB\n",ALLOCATION_SIZE / (1024 * 1024));
// 初始化内存
memset(memory,0,ALLOCATION_SIZE);
// 性能测试
struct timeval start,end;
int *array = (int *)memory;
size_t num_elements = ALLOCATION_SIZE / sizeof(int);
// 随机访问测试
gettimeofday(&start,NULL);
for (int iter = 0; iter < ITERATIONS; iter++) {
size_t index = rand() % num_elements;
array[index] = array[index] + 1;
}
gettimeofday(&end,NULL);
double elapsed = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf("随机访问时间: %.4f 秒\n",elapsed);
printf("访问速度: %.2f M ops/sec\n",ITERATIONS / elapsed / 1e6);
// 顺序访问测试
gettimeofday(&start,NULL);
for (int iter = 0; iter < ITERATIONS; iter++) {
for (size_t i = 0; i < num_elements; i += 64) {
array[i] = array[i] + 1;
}
}
gettimeofday(&end,NULL);
elapsed = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf("顺序访问时间: %.4f 秒\n",elapsed);
printf("访问速度: %.2f M ops/sec\n",ITERATIONS / elapsed / 1e6);
munmap(memory,ALLOCATION_SIZE);
}
// 数据库工作负载模拟
void database_workload_simulation() {
printf("\n数据库工作负载模拟\n");
printf("=============================\n");
const int num_records = 1000000;
const int record_size = 256;
const int total_size = num_records * record_size;
printf("记录数量: %d\n",num_records);
printf("记录大小: %d 字节\n",record_size);
printf("总内存需求: %.2f MB\n",total_size / (1024.0 * 1024.0));
// 分配内存
char *database = malloc(total_size);
if (!database) {
printf("内存分配失败\n");
return;
}
// 初始化数据库记录
for (int i = 0; i < num_records; i++) {
char *record = database + i * record_size;
snprintf(record,record_size,"Record_%d_Data",i);
}
// 模拟数据库查询工作负载
struct timeval start,end;
printf("\n模拟随机查询...\n");
gettimeofday(&start,NULL);
for (int i = 0; i < 1000000; i++) {
int record_id = rand() % num_records;
char *record = database + record_id * record_size;
// 模拟记录处理
volatile int len = strlen(record);
(void)len; // 防止编译器优化
}
gettimeofday(&end,NULL);
double elapsed = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf("查询时间: %.4f 秒\n",elapsed);
printf("查询速度: %.2f K queries/sec\n",1000000 / elapsed / 1000);
// 模拟范围查询
printf("\n模拟范围查询...\n");
gettimeofday(&start,NULL);
for (int i = 0; i < 10000; i++) {
int start_id = rand() % (num_records - 1000);
int end_id = start_id + 1000;
for (int j = start_id; j < end_id; j++) {
char *record = database + j * record_size;
volatile int len = strlen(record);
(void)len;
}
}
gettimeofday(&end,NULL);
elapsed = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf("查询时间: %.4f 秒\n",elapsed);
printf("查询速度: %.2f K queries/sec\n",10000 / elapsed / 1000);
free(database);
}
// TLB性能测试
void tlb_performance_test() {
printf("\nTLB性能测试\n");
printf("=============================\n");
const int array_size = 1024 * 1024; // 4MB数组
const int stride_sizes[] = {4,16,64,256,1024,4096};
const int num_strides = sizeof(stride_sizes) / sizeof(stride_sizes[0]);
int *array = malloc(array_size * sizeof(int));
if (!array) {
printf("内存分配失败\n");
return;
}
// 初始化数组
for (int i = 0; i < array_size; i++) {
array[i] = i;
}
printf("测试不同步长下的TLB性能:\n");
printf("数组大小: %d 元素 (%.2f MB)\n\n",array_size,
array_size * sizeof(int) / (1024.0 * 1024.0));
for (int s = 0; s < num_strides; s++) {
int stride = stride_sizes[s];
struct timeval start,end;
volatile int sum = 0;
gettimeofday(&start,NULL);
// 使用不同步长访问数组
for (int iter = 0; iter < 1000; iter++) {
for (int i = 0; i < array_size; i += stride) {
sum += array[i];
}
}
gettimeofday(&end,NULL);
double elapsed = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf("步长 %d: %.4f 秒 (%.2f M accesses/sec)\n",
stride,elapsed,1000.0 * array_size / stride / elapsed / 1e6);
}
free(array);
}
int main() {
check_hugepage_support();
test_normal_pages();
test_huge_pages();
database_workload_simulation();
tlb_performance_test();
return 0;
}
透明巨页(THP)配置
THP工作原理
Rendering diagram...
THP配置与管理
# 查看THP状态
cat /sys/kernel/mm/transparent_hugepage/enabled
cat /sys/kernel/mm/transparent_hugepage/defrag
# 启用THP
echo always > /sys/kernel/mm/transparent_hugepage/enabled
# 设置THP为建议模式
echo madvise > /sys/kernel/mm/transparent_hugepage/enabled
# 禁用THP
echo never > /sys/kernel/mm/transparent_hugepage/enabled
# 启用THP碎片整理
echo always > /sys/kernel/mm/transparent_hugepage/defrag
# 查看THP统计信息
cat /sys/kernel/mm/transparent_hugepage/*
grep -H . /sys/kernel/mm/transparent_hugepage/*
# 监控THP使用情况
cat /proc/meminfo | grep -i huge
watch -n 1 'cat /proc/meminfo | grep -i huge'
# 查看进程的THP使用情况
cat /proc/<pid>/smaps | grep -i huge
pmap -x <pid> | grep huge
# 强制THP分配
# 应用程序中使用madvise
madvise(addr,length,MADV_HUGEPAGE);
THP性能影响分析
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <errno.h>
// 分析进程的内存映射
void analyze_memory_mapping() {
char filename[256];
snprintf(filename,sizeof(filename),"/proc/%d/smaps",getpid());
FILE *fp = fopen(filename,"r");
if (!fp) {
perror("无法打开smaps文件");
return;
}
printf("当前进程内存映射分析\n");
printf("=============================\n\n");
char line[1024];
int has_huge_pages = 0;
while (fgets(line,sizeof(line),fp)) {
if (strstr(line,"KernelPageSize") || strstr(line,"MMUPageSize")) {
printf("%s",line);
if (strstr(line,"2048 kB")) {
has_huge_pages = 1;
}
}
}
fclose(fp);
if (has_huge_pages) {
printf("\n检测到大页内存使用!\n");
} else {
printf("\n未检测到大页内存使用\n");
}
}
// 使用madvise控制THP
void test_madvise_hugepage() {
printf("\n使用madvise控制THP\n");
printf("=============================\n");
size_t size = 4 * 1024 * 1024; // 4MB
// 分配内存
void *memory = mmap(NULL,size,PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,-1,0);
if (memory == MAP_FAILED) {
perror("mmap失败");
return;
}
printf("分配内存: %p,大小: %zu MB\n",memory,size / (1024 * 1024));
// 建议使用大页
if (madvise(memory,size,MADV_HUGEPAGE) == 0) {
printf("成功建议使用大页内存\n");
} else {
perror("madvise失败");
}
// 初始化内存
memset(memory,0,size);
// 检查是否真的使用了大页
analyze_memory_mapping();
munmap(memory,size);
}
// THP性能对比测试
void thp_performance_comparison() {
printf("\nTHP性能对比测试\n");
printf("=============================\n");
const size_t size = 64 * 1024 * 1024; // 64MB
const int iterations = 10000000;
// 测试1: 不建议使用THP
printf("测试1: 不建议使用THP\n");
void *memory1 = mmap(NULL,size,PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,-1,0);
if (memory1 != MAP_FAILED) {
madvise(memory1,size,MADV_NOHUGEPAGE);
memset(memory1,0,size);
struct timeval start,end;
volatile int *array = (int *)memory1;
size_t num_elements = size / sizeof(int);
gettimeofday(&start,NULL);
for (int i = 0; i < iterations; i++) {
array[i % num_elements]++;
}
gettimeofday(&end,NULL);
double time1 = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf(" 执行时间: %.4f 秒\n",time1);
munmap(memory1,size);
}
// 测试2: 建议使用THP
printf("测试2: 建议使用THP\n");
void *memory2 = mmap(NULL,size,PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,-1,0);
if (memory2 != MAP_FAILED) {
madvise(memory2,size,MADV_HUGEPAGE);
memset(memory2,0,size);
struct timeval start,end;
volatile int *array = (int *)memory2;
size_t num_elements = size / sizeof(int);
gettimeofday(&start,NULL);
for (int i = 0; i < iterations; i++) {
array[i % num_elements]++;
}
gettimeofday(&end,NULL);
double time2 = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf(" 执行时间: %.4f 秒\n",time2);
printf(" 性能提升: %.2f%%\n",(time1 - time2) / time1 * 100);
munmap(memory2,size);
}
}
int main() {
analyze_memory_mapping();
test_madvise_hugepage();
thp_performance_comparison();
return 0;
}
内存压缩技术
zRAM压缩内存
Rendering diagram...
zRAM配置与使用
# 加载zRAM模块
modprobe zram
# 创建zRAM设备
zramctl --find --size 2G
# 或
echo 2G > /sys/block/zram0/disksize
# 设置压缩算法
echo lz4 > /sys/block/zram0/comp_algorithm
# 创建文件系统
mkswap /dev/zram0
mkfs.ext4 /dev/zram0
# 启用交换
swapon /dev/zram0
# 挂载为普通文件系统
mount /dev/zram0 /mnt/compressed
# 查看zRAM状态
zramctl
cat /sys/block/zram0/mm_stat
cat /sys/block/zram0/io_stat
# 监控压缩效果
watch -n 1 'cat /sys/block/zram0/mm_stat'
# 调整zRAM大小
echo 4G > /sys/block/zram0/disksize
# 禁用zRAM
swapoff /dev/zram0
umount /dev/zram0
echo 1 > /sys/block/zram0/reset
内存压缩性能测试
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <time.h>
// 模拟内存压缩工作负载
void memory_compression_workload() {
printf("内存压缩工作负载模拟\n");
printf("=============================\n");
const size_t data_size = 512 * 1024 * 1024; // 512MB
const int pattern_size = 1024; // 1KB模式
// 分配内存
char *data = malloc(data_size);
if (!data) {
printf("内存分配失败\n");
return;
}
// 创建可压缩数据模式(重复模式)
printf("创建可压缩数据模式...\n");
for (size_t i = 0; i < data_size; i++) {
data[i] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % pattern_size];
}
// 计算数据熵值(可压缩性)
int histogram[256] = {0};
for (size_t i = 0; i < data_size; i++) {
histogram[(unsigned char)data[i]]++;
}
double entropy = 0.0;
for (int i = 0; i < 256; i++) {
if (histogram[i] > 0) {
double probability = (double)histogram[i] / data_size;
entropy -= probability * log2(probability);
}
}
printf("数据熵值: %.4f bits/byte (低熵值表示高可压缩性)\n",entropy);
printf("理论压缩率: %.2f%%\n",(1 - entropy / 8) * 100);
// 性能测试:顺序访问
printf("\n顺序访问性能测试:\n");
struct timeval start,end;
gettimeofday(&start,NULL);
volatile int sum = 0;
for (size_t i = 0; i < data_size; i++) {
sum += data[i];
}
gettimeofday(&end,NULL);
double seq_time = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf(" 顺序访问时间: %.4f 秒\n",seq_time);
printf(" 访问速度: %.2f MB/sec\n",data_size / seq_time / (1024 * 1024));
// 创建不可压缩数据(随机数据)
printf("\n创建不可压缩数据模式...\n");
for (size_t i = 0; i < data_size; i++) {
data[i] = rand() % 256;
}
// 计算随机数据熵值
memset(histogram,0,sizeof(histogram));
for (size_t i = 0; i < data_size; i++) {
histogram[(unsigned char)data[i]]++;
}
entropy = 0.0;
for (int i = 0; i < 256; i++) {
if (histogram[i] > 0) {
double probability = (double)histogram[i] / data_size;
entropy -= probability * log2(probability);
}
}
printf("数据熵值: %.4f bits/byte\n",entropy);
printf("理论压缩率: %.2f%%\n",(1 - entropy / 8) * 100);
// 性能测试:随机访问
printf("\n随机访问性能测试:\n");
gettimeofday(&start,NULL);
for (int iter = 0; iter < 10000000; iter++) {
size_t index = rand() % data_size;
sum += data[index];
}
gettimeofday(&end,NULL);
double rand_time = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf(" 随机访问时间: %.4f 秒\n",rand_time);
printf(" 访问速度: %.2f M accesses/sec\n",10000000.0 / rand_time / 1e6);
free(data);
}
// zRAM性能对比
void zram_performance_comparison() {
printf("\nzRAM性能对比测试\n");
printf("=============================\n");
printf("测试场景:\n");
printf("1. 创建大量重复数据(高压缩率)\n");
printf("2. 触发内存压力\n");
printf("3. 观察zRAM压缩效果\n");
printf("4. 对比传统交换性能\n");
printf("\n监控命令:\n");
printf(" watch -n 1 'cat /proc/meminfo | grep -E \"MemAvailable|SwapTotal|SwapFree\"'\n");
printf(" watch -n 1 'cat /sys/block/zram0/mm_stat'\n");
printf(" watch -n 1 'cat /proc/vmstat | grep -E \"pswpin|pswpout\"'\n");
}
int main() {
memory_compression_workload();
zram_performance_comparison();
return 0;
}
NUMA感知的内存分配
NUMA内存访问优化
#include <stdio.h>
#include <stdlib.h>
#include <numa.h>
#include <string.h>
#include <time.h>
// NUMA感知的内存分配测试
void numa_aware_allocation() {
printf("NUMA感知的内存分配\n");
printf("=============================\n");
if (!numa_available()) {
printf("NUMA不可用\n");
return;
}
int max_node = numa_max_node();
printf("可用NUMA节点: %d\n",max_node + 1);
// 测试本地内存访问
printf("\n本地内存访问测试:\n");
int local_node = 0;
numa_run_on_node(local_node);
numa_set_preferred(local_node);
size_t size = 256 * 1024 * 1024; // 256MB
void *local_mem = numa_alloc_onnode(size,local_node);
if (local_mem) {
memset(local_mem,0,size);
struct timeval start,end;
volatile int *array = (int *)local_mem;
size_t num_elements = size / sizeof(int);
gettimeofday(&start,NULL);
for (size_t i = 0; i < num_elements; i += 64) {
array[i] = i;
}
gettimeofday(&end,NULL);
double local_time = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf(" 本地访问时间: %.4f 秒\n",local_time);
printf(" 访问速度: %.2f MB/sec\n",size / local_time / (1024 * 1024));
numa_free(local_mem,size);
}
// 测试远程内存访问
if (max_node >= 1) {
printf("\n远程内存访问测试:\n");
int remote_node = 1;
numa_run_on_node(local_node); // 在节点0上运行
numa_set_preferred(local_node);
void *remote_mem = numa_alloc_onnode(size,remote_node);
if (remote_mem) {
memset(remote_mem,0,size);
struct timeval start,end;
volatile int *array = (int *)remote_mem;
size_t num_elements = size / sizeof(int);
gettimeofday(&start,NULL);
for (size_t i = 0; i < num_elements; i += 64) {
array[i] = i;
}
gettimeofday(&end,NULL);
double remote_time = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf(" 远程访问时间: %.4f 秒\n",remote_time);
printf(" 访问速度: %.2f MB/sec\n",size / remote_time / (1024 * 1024));
printf(" 性能差异: %.2f%%\n",(remote_time - local_time) / local_time * 100);
numa_free(remote_mem,size);
}
}
}
// NUMA interleaved分配
void numa_interleaved_allocation() {
printf("\nNUMA交错分配\n");
printf("=============================\n");
if (!numa_available()) {
printf("NUMA不可用\n");
return;
}
size_t size = 256 * 1024 * 1024; // 256MB
// 使用交错分配策略
void *interleaved_mem = numa_alloc_interleaved(size);
if (interleaved_mem) {
printf("交错分配内存: %p,大小: %zu MB\n",
interleaved_mem,size / (1024 * 1024));
// 初始化内存
memset(interleaved_mem,0,size);
// 性能测试
struct timeval start,end;
volatile int *array = (int *)interleaved_mem;
size_t num_elements = size / sizeof(int);
gettimeofday(&start,NULL);
for (size_t i = 0; i < num_elements; i += 64) {
array[i] = i;
}
gettimeofday(&end,NULL);
double elapsed = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf("交错访问时间: %.4f 秒\n",elapsed);
printf("访问速度: %.2f MB/sec\n",size / elapsed / (1024 * 1024));
numa_free(interleaved_mem,size);
}
}
int main() {
numa_aware_allocation();
numa_interleaved_allocation();
return 0;
}
内存优化最佳实践
应用层内存优化
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// 内存池实现
typedef struct {
void **free_blocks;
size_t block_size;
size_t total_blocks;
size_t free_count;
void *memory_pool;
} memory_pool;
// 创建内存池
memory_pool* create_memory_pool(size_t block_size,size_t num_blocks) {
memory_pool *pool = malloc(sizeof(memory_pool));
if (!pool) return NULL;
pool->block_size = block_size;
pool->total_blocks = num_blocks;
pool->free_count = num_blocks;
// 分配内存池
pool->memory_pool = malloc(block_size * num_blocks);
if (!pool->memory_pool) {
free(pool);
return NULL;
}
// 初始化空闲块链表
pool->free_blocks = malloc(num_blocks * sizeof(void *));
if (!pool->free_blocks) {
free(pool->memory_pool);
free(pool);
return NULL;
}
for (size_t i = 0; i < num_blocks; i++) {
pool->free_blocks[i] = (char *)pool->memory_pool + i * block_size;
}
printf("内存池创建成功: 块大小=%zu,总块数=%zu\n",
block_size,num_blocks);
return pool;
}
// 从内存池分配
void* pool_alloc(memory_pool *pool) {
if (pool->free_count == 0) {
return NULL; // 内存池已满
}
return pool->free_blocks[--pool->free_count];
}
// 释放到内存池
void pool_free(memory_pool *pool,void *block) {
if (pool->free_count < pool->total_blocks) {
pool->free_blocks[pool->free_count++] = block;
}
}
// 销毁内存池
void destroy_memory_pool(memory_pool *pool) {
free(pool->free_blocks);
free(pool->memory_pool);
free(pool);
}
// 内存优化示例
void memory_optimization_example() {
printf("内存优化示例\n");
printf("=============================\n");
const int num_allocations = 10000;
const int allocation_size = 256;
// 传统malloc方式
printf("传统malloc方式:\n");
struct timeval start,end;
gettimeofday(&start,NULL);
void *traditional_ptrs[num_allocations];
for (int i = 0; i < num_allocations; i++) {
traditional_ptrs[i] = malloc(allocation_size);
}
gettimeofday(&end,NULL);
double malloc_time = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf(" 分配时间: %.4f 秒\n",malloc_time);
gettimeofday(&start,NULL);
for (int i = 0; i < num_allocations; i++) {
free(traditional_ptrs[i]);
}
gettimeofday(&end,NULL);
double free_time = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf(" 释放时间: %.4f 秒\n",free_time);
// 内存池方式
printf("\n内存池方式:\n");
memory_pool *pool = create_memory_pool(allocation_size,num_allocations);
if (pool) {
gettimeofday(&start,NULL);
void *pool_ptrs[num_allocations];
for (int i = 0; i < num_allocations; i++) {
pool_ptrs[i] = pool_alloc(pool);
}
gettimeofday(&end,NULL);
double pool_alloc_time = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf(" 分配时间: %.4f 秒 (加速比: %.2fx)\n",
pool_alloc_time,malloc_time / pool_alloc_time);
gettimeofday(&start,NULL);
for (int i = 0; i < num_allocations; i++) {
pool_free(pool,pool_ptrs[i]);
}
gettimeofday(&end,NULL);
double pool_free_time = (end.tv_sec - start.tv_sec) +
(end.tv_usec - start.tv_usec) / 1000000.0;
printf(" 释放时间: %.4f 秒 (加速比: %.2fx)\n",
pool_free_time,free_time / pool_free_time);
destroy_memory_pool(pool);
}
}
int main() {
memory_optimization_example();
printf("\n内存优化建议:\n");
printf("=============================\n");
printf("1. 使用内存池减少分配开销\n");
printf("2. 避免频繁的小内存分配\n");
printf("3. 重用内存对象\n");
printf("4. 使用适当的内存对齐\n");
printf("5. 考虑NUMA架构的内存分配\n");
printf("6. 监控内存使用模式\n");
printf("7. 及时释放不再使用的内存\n");
return 0;
}
通过合理应用大页内存、透明巨页、内存压缩等高级技术,可以显著提升内存密集型应用的性能,特别是在数据库、虚拟化、大数据处理等场景中,性能提升可达20-50%。