高级内存优化技术与大页内存

深入探讨大页内存、内存压缩、透明巨页等高级内存优化技术及其在实际应用中的性能提升

在内存密集型应用中,传统的4KB页面大小往往成为性能瓶颈。大页内存(Huge Pages)和透明巨页(THP)等高级内存优化技术通过增加页面大小,显著减少TLB(转换后备缓冲器)缺失和页表开销,从而大幅提升系统性能。本文将深入探讨这些技术的原理、配置方法和实际应用场景。

大页内存原理

TLB与页面大小关系

Rendering diagram...

TLB是CPU中用于缓存虚拟地址到物理地址映射的高速缓存,其容量有限。使用更大的页面可以显著提高TLB的覆盖率:

  • 4KB页面:每个TLB条目覆盖4KB内存
  • 2MB页面:每个TLB条目覆盖2MB内存(512倍提升)
  • 1GB页面:每个TLB条目覆盖1GB内存(262144倍提升)

大页内存性能优势

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <time.h>
#include <errno.h>

#define ALLOCATION_SIZE (256 * 1024 * 1024)  // 256MB
#define ITERATIONS 1000000

// 检查大页内存支持
void check_hugepage_support() {
    printf("大页内存支持检查\n");
    printf("=============================\n");
    
    // 检查大页内存是否可用
    FILE *fp = fopen("/proc/meminfo","r");
    if (fp) {
     char line[256];
     while (fgets(line,sizeof(line),fp)) {
         if (strstr(line,"HugePages_Total:") || 
             strstr(line,"HugePages_Free:") ||
             strstr(line,"Hugepagesize:")) {
             printf("%s",line);
         }
     }
     fclose(fp);
    }
    
    // 检查透明巨页状态
    fp = fopen("/sys/kernel/mm/transparent_hugepage/enabled","r");
    if (fp) {
     char line[256];
     if (fgets(line,sizeof(line),fp)) {
         printf("\n透明巨页状态: %s",line);
     }
     fclose(fp);
    }
}

// 使用普通4KB页面进行内存访问测试
void test_normal_pages() {
    printf("\n普通4KB页面性能测试\n");
    printf("=============================\n");
    
    // 使用mmap分配普通内存
    void *memory = mmap(NULL,ALLOCATION_SIZE,
                    PROT_READ | PROT_WRITE,
                    MAP_PRIVATE | MAP_ANONYMOUS,-1,0);
    
    if (memory == MAP_FAILED) {
     perror("mmap failed");
     return;
    }
    
    printf("分配内存地址: %p\n",memory);
    printf("分配大小: %d MB\n",ALLOCATION_SIZE / (1024 * 1024));
    
    // 初始化内存
    memset(memory,0,ALLOCATION_SIZE);
    
    // 性能测试:随机访问模式
    struct timeval start,end;
    gettimeofday(&start,NULL);
    
    int *array = (int *)memory;
    size_t num_elements = ALLOCATION_SIZE / sizeof(int);
    
    for (int iter = 0; iter < ITERATIONS; iter++) {
     // 随机访问模式
     size_t index = rand() % num_elements;
     array[index] = array[index] + 1;
    }
    
    gettimeofday(&end,NULL);
    double elapsed = (end.tv_sec - start.tv_sec) + 
                 (end.tv_usec - start.tv_usec) / 1000000.0;
    
    printf("随机访问时间: %.4f 秒\n",elapsed);
    printf("访问速度: %.2f M ops/sec\n",ITERATIONS / elapsed / 1e6);
    
    // 顺序访问测试
    gettimeofday(&start,NULL);
    
    for (int iter = 0; iter < ITERATIONS; iter++) {
     for (size_t i = 0; i < num_elements; i += 64) {
         array[i] = array[i] + 1;
     }
    }
    
    gettimeofday(&end,NULL);
    elapsed = (end.tv_sec - start.tv_sec) + 
          (end.tv_usec - start.tv_usec) / 1000000.0;
    
    printf("顺序访问时间: %.4f 秒\n",elapsed);
    printf("访问速度: %.2f M ops/sec\n",ITERATIONS / elapsed / 1e6);
    
    munmap(memory,ALLOCATION_SIZE);
}

// 使用大页内存进行测试
void test_huge_pages() {
    printf("\n大页内存性能测试\n");
    printf("=============================\n");
    
    // 尝试使用大页内存
    void *memory = mmap(NULL,ALLOCATION_SIZE,
                    PROT_READ | PROT_WRITE,
                    MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
                    -1,0);
    
    if (memory == MAP_FAILED) {
     printf("大页内存分配失败: %s\n",strerror(errno));
     printf("尝试使用透明巨页...\n");
     
     // 回退到普通分配,依赖透明巨页
     memory = mmap(NULL,ALLOCATION_SIZE,
                  PROT_READ | PROT_WRITE,
                  MAP_PRIVATE | MAP_ANONYMOUS,-1,0);
     
     if (memory == MAP_FAILED) {
         perror("内存分配失败");
         return;
     }
     
     printf("使用透明巨页分配: %p\n",memory);
    } else {
     printf("显式大页内存分配成功: %p\n",memory);
    }
    
    printf("分配大小: %d MB\n",ALLOCATION_SIZE / (1024 * 1024));
    
    // 初始化内存
    memset(memory,0,ALLOCATION_SIZE);
    
    // 性能测试
    struct timeval start,end;
    int *array = (int *)memory;
    size_t num_elements = ALLOCATION_SIZE / sizeof(int);
    
    // 随机访问测试
    gettimeofday(&start,NULL);
    
    for (int iter = 0; iter < ITERATIONS; iter++) {
     size_t index = rand() % num_elements;
     array[index] = array[index] + 1;
    }
    
    gettimeofday(&end,NULL);
    double elapsed = (end.tv_sec - start.tv_sec) + 
                 (end.tv_usec - start.tv_usec) / 1000000.0;
    
    printf("随机访问时间: %.4f 秒\n",elapsed);
    printf("访问速度: %.2f M ops/sec\n",ITERATIONS / elapsed / 1e6);
    
    // 顺序访问测试
    gettimeofday(&start,NULL);
    
    for (int iter = 0; iter < ITERATIONS; iter++) {
     for (size_t i = 0; i < num_elements; i += 64) {
         array[i] = array[i] + 1;
     }
    }
    
    gettimeofday(&end,NULL);
    elapsed = (end.tv_sec - start.tv_sec) + 
          (end.tv_usec - start.tv_usec) / 1000000.0;
    
    printf("顺序访问时间: %.4f 秒\n",elapsed);
    printf("访问速度: %.2f M ops/sec\n",ITERATIONS / elapsed / 1e6);
    
    munmap(memory,ALLOCATION_SIZE);
}

// 数据库工作负载模拟
void database_workload_simulation() {
    printf("\n数据库工作负载模拟\n");
    printf("=============================\n");
    
    const int num_records = 1000000;
    const int record_size = 256;
    const int total_size = num_records * record_size;
    
    printf("记录数量: %d\n",num_records);
    printf("记录大小: %d 字节\n",record_size);
    printf("总内存需求: %.2f MB\n",total_size / (1024.0 * 1024.0));
    
    // 分配内存
    char *database = malloc(total_size);
    if (!database) {
     printf("内存分配失败\n");
     return;
    }
    
    // 初始化数据库记录
    for (int i = 0; i < num_records; i++) {
     char *record = database + i * record_size;
     snprintf(record,record_size,"Record_%d_Data",i);
    }
    
    // 模拟数据库查询工作负载
    struct timeval start,end;
    
    printf("\n模拟随机查询...\n");
    gettimeofday(&start,NULL);
    
    for (int i = 0; i < 1000000; i++) {
     int record_id = rand() % num_records;
     char *record = database + record_id * record_size;
     // 模拟记录处理
     volatile int len = strlen(record);
     (void)len;  // 防止编译器优化
    }
    
    gettimeofday(&end,NULL);
    double elapsed = (end.tv_sec - start.tv_sec) + 
                 (end.tv_usec - start.tv_usec) / 1000000.0;
    
    printf("查询时间: %.4f 秒\n",elapsed);
    printf("查询速度: %.2f K queries/sec\n",1000000 / elapsed / 1000);
    
    // 模拟范围查询
    printf("\n模拟范围查询...\n");
    gettimeofday(&start,NULL);
    
    for (int i = 0; i < 10000; i++) {
     int start_id = rand() % (num_records - 1000);
     int end_id = start_id + 1000;
     
     for (int j = start_id; j < end_id; j++) {
         char *record = database + j * record_size;
         volatile int len = strlen(record);
         (void)len;
     }
    }
    
    gettimeofday(&end,NULL);
    elapsed = (end.tv_sec - start.tv_sec) + 
          (end.tv_usec - start.tv_usec) / 1000000.0;
    
    printf("查询时间: %.4f 秒\n",elapsed);
    printf("查询速度: %.2f K queries/sec\n",10000 / elapsed / 1000);
    
    free(database);
}

// TLB性能测试
void tlb_performance_test() {
    printf("\nTLB性能测试\n");
    printf("=============================\n");
    
    const int array_size = 1024 * 1024;  // 4MB数组
    const int stride_sizes[] = {4,16,64,256,1024,4096};
    const int num_strides = sizeof(stride_sizes) / sizeof(stride_sizes[0]);
    
    int *array = malloc(array_size * sizeof(int));
    if (!array) {
     printf("内存分配失败\n");
     return;
    }
    
    // 初始化数组
    for (int i = 0; i < array_size; i++) {
     array[i] = i;
    }
    
    printf("测试不同步长下的TLB性能:\n");
    printf("数组大小: %d 元素 (%.2f MB)\n\n",array_size,
        array_size * sizeof(int) / (1024.0 * 1024.0));
    
    for (int s = 0; s < num_strides; s++) {
     int stride = stride_sizes[s];
     
     struct timeval start,end;
     volatile int sum = 0;
     
     gettimeofday(&start,NULL);
     
     // 使用不同步长访问数组
     for (int iter = 0; iter < 1000; iter++) {
         for (int i = 0; i < array_size; i += stride) {
             sum += array[i];
         }
     }
     
     gettimeofday(&end,NULL);
     double elapsed = (end.tv_sec - start.tv_sec) + 
                     (end.tv_usec - start.tv_usec) / 1000000.0;
     
     printf("步长 %d: %.4f 秒 (%.2f M accesses/sec)\n",
            stride,elapsed,1000.0 * array_size / stride / elapsed / 1e6);
    }
    
    free(array);
}

int main() {
    check_hugepage_support();
    test_normal_pages();
    test_huge_pages();
    database_workload_simulation();
    tlb_performance_test();
    
    return 0;
}

透明巨页(THP)配置

THP工作原理

Rendering diagram...

THP配置与管理

# 查看THP状态
cat /sys/kernel/mm/transparent_hugepage/enabled
cat /sys/kernel/mm/transparent_hugepage/defrag

# 启用THP
echo always > /sys/kernel/mm/transparent_hugepage/enabled

# 设置THP为建议模式
echo madvise > /sys/kernel/mm/transparent_hugepage/enabled

# 禁用THP
echo never > /sys/kernel/mm/transparent_hugepage/enabled

# 启用THP碎片整理
echo always > /sys/kernel/mm/transparent_hugepage/defrag

# 查看THP统计信息
cat /sys/kernel/mm/transparent_hugepage/*
grep -H . /sys/kernel/mm/transparent_hugepage/*

# 监控THP使用情况
cat /proc/meminfo | grep -i huge
watch -n 1 'cat /proc/meminfo | grep -i huge'

# 查看进程的THP使用情况
cat /proc/<pid>/smaps | grep -i huge
pmap -x <pid> | grep huge

# 强制THP分配
# 应用程序中使用madvise
madvise(addr,length,MADV_HUGEPAGE);

THP性能影响分析

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <errno.h>

// 分析进程的内存映射
void analyze_memory_mapping() {
    char filename[256];
    snprintf(filename,sizeof(filename),"/proc/%d/smaps",getpid());
    
    FILE *fp = fopen(filename,"r");
    if (!fp) {
     perror("无法打开smaps文件");
     return;
    }
    
    printf("当前进程内存映射分析\n");
    printf("=============================\n\n");
    
    char line[1024];
    int has_huge_pages = 0;
    
    while (fgets(line,sizeof(line),fp)) {
     if (strstr(line,"KernelPageSize") || strstr(line,"MMUPageSize")) {
         printf("%s",line);
         if (strstr(line,"2048 kB")) {
             has_huge_pages = 1;
         }
     }
    }
    
    fclose(fp);
    
    if (has_huge_pages) {
     printf("\n检测到大页内存使用!\n");
    } else {
     printf("\n未检测到大页内存使用\n");
    }
}

// 使用madvise控制THP
void test_madvise_hugepage() {
    printf("\n使用madvise控制THP\n");
    printf("=============================\n");
    
    size_t size = 4 * 1024 * 1024;  // 4MB
    
    // 分配内存
    void *memory = mmap(NULL,size,PROT_READ | PROT_WRITE,
                    MAP_PRIVATE | MAP_ANONYMOUS,-1,0);
    
    if (memory == MAP_FAILED) {
     perror("mmap失败");
     return;
    }
    
    printf("分配内存: %p,大小: %zu MB\n",memory,size / (1024 * 1024));
    
    // 建议使用大页
    if (madvise(memory,size,MADV_HUGEPAGE) == 0) {
     printf("成功建议使用大页内存\n");
    } else {
     perror("madvise失败");
    }
    
    // 初始化内存
    memset(memory,0,size);
    
    // 检查是否真的使用了大页
    analyze_memory_mapping();
    
    munmap(memory,size);
}

// THP性能对比测试
void thp_performance_comparison() {
    printf("\nTHP性能对比测试\n");
    printf("=============================\n");
    
    const size_t size = 64 * 1024 * 1024;  // 64MB
    const int iterations = 10000000;
    
    // 测试1: 不建议使用THP
    printf("测试1: 不建议使用THP\n");
    void *memory1 = mmap(NULL,size,PROT_READ | PROT_WRITE,
                     MAP_PRIVATE | MAP_ANONYMOUS,-1,0);
    
    if (memory1 != MAP_FAILED) {
     madvise(memory1,size,MADV_NOHUGEPAGE);
     memset(memory1,0,size);
     
     struct timeval start,end;
     volatile int *array = (int *)memory1;
     size_t num_elements = size / sizeof(int);
     
     gettimeofday(&start,NULL);
     for (int i = 0; i < iterations; i++) {
         array[i % num_elements]++;
     }
     gettimeofday(&end,NULL);
     
     double time1 = (end.tv_sec - start.tv_sec) + 
                   (end.tv_usec - start.tv_usec) / 1000000.0;
     printf("  执行时间: %.4f 秒\n",time1);
     
     munmap(memory1,size);
    }
    
    // 测试2: 建议使用THP
    printf("测试2: 建议使用THP\n");
    void *memory2 = mmap(NULL,size,PROT_READ | PROT_WRITE,
                     MAP_PRIVATE | MAP_ANONYMOUS,-1,0);
    
    if (memory2 != MAP_FAILED) {
     madvise(memory2,size,MADV_HUGEPAGE);
     memset(memory2,0,size);
     
     struct timeval start,end;
     volatile int *array = (int *)memory2;
     size_t num_elements = size / sizeof(int);
     
     gettimeofday(&start,NULL);
     for (int i = 0; i < iterations; i++) {
         array[i % num_elements]++;
     }
     gettimeofday(&end,NULL);
     
     double time2 = (end.tv_sec - start.tv_sec) + 
                   (end.tv_usec - start.tv_usec) / 1000000.0;
     printf("  执行时间: %.4f 秒\n",time2);
     printf("  性能提升: %.2f%%\n",(time1 - time2) / time1 * 100);
     
     munmap(memory2,size);
    }
}

int main() {
    analyze_memory_mapping();
    test_madvise_hugepage();
    thp_performance_comparison();
    
    return 0;
}

内存压缩技术

zRAM压缩内存

Rendering diagram...

zRAM配置与使用

# 加载zRAM模块
modprobe zram

# 创建zRAM设备
zramctl --find --size 2G
# 或
echo 2G > /sys/block/zram0/disksize

# 设置压缩算法
echo lz4 > /sys/block/zram0/comp_algorithm

# 创建文件系统
mkswap /dev/zram0
mkfs.ext4 /dev/zram0

# 启用交换
swapon /dev/zram0

# 挂载为普通文件系统
mount /dev/zram0 /mnt/compressed

# 查看zRAM状态
zramctl
cat /sys/block/zram0/mm_stat
cat /sys/block/zram0/io_stat

# 监控压缩效果
watch -n 1 'cat /sys/block/zram0/mm_stat'

# 调整zRAM大小
echo 4G > /sys/block/zram0/disksize

# 禁用zRAM
swapoff /dev/zram0
umount /dev/zram0
echo 1 > /sys/block/zram0/reset

内存压缩性能测试

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#include <time.h>

// 模拟内存压缩工作负载
void memory_compression_workload() {
    printf("内存压缩工作负载模拟\n");
    printf("=============================\n");
    
    const size_t data_size = 512 * 1024 * 1024;  // 512MB
    const int pattern_size = 1024;  // 1KB模式
    
    // 分配内存
    char *data = malloc(data_size);
    if (!data) {
     printf("内存分配失败\n");
     return;
    }
    
    // 创建可压缩数据模式(重复模式)
    printf("创建可压缩数据模式...\n");
    for (size_t i = 0; i < data_size; i++) {
     data[i] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % pattern_size];
    }
    
    // 计算数据熵值(可压缩性)
    int histogram[256] = {0};
    for (size_t i = 0; i < data_size; i++) {
     histogram[(unsigned char)data[i]]++;
    }
    
    double entropy = 0.0;
    for (int i = 0; i < 256; i++) {
     if (histogram[i] > 0) {
         double probability = (double)histogram[i] / data_size;
         entropy -= probability * log2(probability);
     }
    }
    
    printf("数据熵值: %.4f bits/byte (低熵值表示高可压缩性)\n",entropy);
    printf("理论压缩率: %.2f%%\n",(1 - entropy / 8) * 100);
    
    // 性能测试:顺序访问
    printf("\n顺序访问性能测试:\n");
    struct timeval start,end;
    
    gettimeofday(&start,NULL);
    volatile int sum = 0;
    for (size_t i = 0; i < data_size; i++) {
     sum += data[i];
    }
    gettimeofday(&end,NULL);
    
    double seq_time = (end.tv_sec - start.tv_sec) + 
                  (end.tv_usec - start.tv_usec) / 1000000.0;
    printf("  顺序访问时间: %.4f 秒\n",seq_time);
    printf("  访问速度: %.2f MB/sec\n",data_size / seq_time / (1024 * 1024));
    
    // 创建不可压缩数据(随机数据)
    printf("\n创建不可压缩数据模式...\n");
    for (size_t i = 0; i < data_size; i++) {
     data[i] = rand() % 256;
    }
    
    // 计算随机数据熵值
    memset(histogram,0,sizeof(histogram));
    for (size_t i = 0; i < data_size; i++) {
     histogram[(unsigned char)data[i]]++;
    }
    
    entropy = 0.0;
    for (int i = 0; i < 256; i++) {
     if (histogram[i] > 0) {
         double probability = (double)histogram[i] / data_size;
         entropy -= probability * log2(probability);
     }
    }
    
    printf("数据熵值: %.4f bits/byte\n",entropy);
    printf("理论压缩率: %.2f%%\n",(1 - entropy / 8) * 100);
    
    // 性能测试:随机访问
    printf("\n随机访问性能测试:\n");
    gettimeofday(&start,NULL);
    
    for (int iter = 0; iter < 10000000; iter++) {
     size_t index = rand() % data_size;
     sum += data[index];
    }
    
    gettimeofday(&end,NULL);
    
    double rand_time = (end.tv_sec - start.tv_sec) + 
                   (end.tv_usec - start.tv_usec) / 1000000.0;
    printf("  随机访问时间: %.4f 秒\n",rand_time);
    printf("  访问速度: %.2f M accesses/sec\n",10000000.0 / rand_time / 1e6);
    
    free(data);
}

// zRAM性能对比
void zram_performance_comparison() {
    printf("\nzRAM性能对比测试\n");
    printf("=============================\n");
    
    printf("测试场景:\n");
    printf("1. 创建大量重复数据(高压缩率)\n");
    printf("2. 触发内存压力\n");
    printf("3. 观察zRAM压缩效果\n");
    printf("4. 对比传统交换性能\n");
    
    printf("\n监控命令:\n");
    printf("  watch -n 1 'cat /proc/meminfo | grep -E \"MemAvailable|SwapTotal|SwapFree\"'\n");
    printf("  watch -n 1 'cat /sys/block/zram0/mm_stat'\n");
    printf("  watch -n 1 'cat /proc/vmstat | grep -E \"pswpin|pswpout\"'\n");
}

int main() {
    memory_compression_workload();
    zram_performance_comparison();
    
    return 0;
}

NUMA感知的内存分配

NUMA内存访问优化

#include <stdio.h>
#include <stdlib.h>
#include <numa.h>
#include <string.h>
#include <time.h>

// NUMA感知的内存分配测试
void numa_aware_allocation() {
    printf("NUMA感知的内存分配\n");
    printf("=============================\n");
    
    if (!numa_available()) {
     printf("NUMA不可用\n");
     return;
    }
    
    int max_node = numa_max_node();
    printf("可用NUMA节点: %d\n",max_node + 1);
    
    // 测试本地内存访问
    printf("\n本地内存访问测试:\n");
    int local_node = 0;
    numa_run_on_node(local_node);
    numa_set_preferred(local_node);
    
    size_t size = 256 * 1024 * 1024;  // 256MB
    void *local_mem = numa_alloc_onnode(size,local_node);
    
    if (local_mem) {
     memset(local_mem,0,size);
     
     struct timeval start,end;
     volatile int *array = (int *)local_mem;
     size_t num_elements = size / sizeof(int);
     
     gettimeofday(&start,NULL);
     for (size_t i = 0; i < num_elements; i += 64) {
         array[i] = i;
     }
     gettimeofday(&end,NULL);
     
     double local_time = (end.tv_sec - start.tv_sec) + 
                        (end.tv_usec - start.tv_usec) / 1000000.0;
     printf("  本地访问时间: %.4f 秒\n",local_time);
     printf("  访问速度: %.2f MB/sec\n",size / local_time / (1024 * 1024));
     
     numa_free(local_mem,size);
    }
    
    // 测试远程内存访问
    if (max_node >= 1) {
     printf("\n远程内存访问测试:\n");
     int remote_node = 1;
     numa_run_on_node(local_node);  // 在节点0上运行
     numa_set_preferred(local_node);
     
     void *remote_mem = numa_alloc_onnode(size,remote_node);
     
     if (remote_mem) {
         memset(remote_mem,0,size);
         
         struct timeval start,end;
         volatile int *array = (int *)remote_mem;
         size_t num_elements = size / sizeof(int);
         
         gettimeofday(&start,NULL);
         for (size_t i = 0; i < num_elements; i += 64) {
             array[i] = i;
         }
         gettimeofday(&end,NULL);
         
         double remote_time = (end.tv_sec - start.tv_sec) + 
                             (end.tv_usec - start.tv_usec) / 1000000.0;
         printf("  远程访问时间: %.4f 秒\n",remote_time);
         printf("  访问速度: %.2f MB/sec\n",size / remote_time / (1024 * 1024));
         printf("  性能差异: %.2f%%\n",(remote_time - local_time) / local_time * 100);
         
         numa_free(remote_mem,size);
     }
    }
}

// NUMA interleaved分配
void numa_interleaved_allocation() {
    printf("\nNUMA交错分配\n");
    printf("=============================\n");
    
    if (!numa_available()) {
     printf("NUMA不可用\n");
     return;
    }
    
    size_t size = 256 * 1024 * 1024;  // 256MB
    
    // 使用交错分配策略
    void *interleaved_mem = numa_alloc_interleaved(size);
    
    if (interleaved_mem) {
     printf("交错分配内存: %p,大小: %zu MB\n",
            interleaved_mem,size / (1024 * 1024));
     
     // 初始化内存
     memset(interleaved_mem,0,size);
     
     // 性能测试
     struct timeval start,end;
     volatile int *array = (int *)interleaved_mem;
     size_t num_elements = size / sizeof(int);
     
     gettimeofday(&start,NULL);
     for (size_t i = 0; i < num_elements; i += 64) {
         array[i] = i;
     }
     gettimeofday(&end,NULL);
     
     double elapsed = (end.tv_sec - start.tv_sec) + 
                     (end.tv_usec - start.tv_usec) / 1000000.0;
     printf("交错访问时间: %.4f 秒\n",elapsed);
     printf("访问速度: %.2f MB/sec\n",size / elapsed / (1024 * 1024));
     
     numa_free(interleaved_mem,size);
    }
}

int main() {
    numa_aware_allocation();
    numa_interleaved_allocation();
    
    return 0;
}

内存优化最佳实践

应用层内存优化

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// 内存池实现
typedef struct {
    void **free_blocks;
    size_t block_size;
    size_t total_blocks;
    size_t free_count;
    void *memory_pool;
} memory_pool;

// 创建内存池
memory_pool* create_memory_pool(size_t block_size,size_t num_blocks) {
    memory_pool *pool = malloc(sizeof(memory_pool));
    if (!pool) return NULL;
    
    pool->block_size = block_size;
    pool->total_blocks = num_blocks;
    pool->free_count = num_blocks;
    
    // 分配内存池
    pool->memory_pool = malloc(block_size * num_blocks);
    if (!pool->memory_pool) {
     free(pool);
     return NULL;
    }
    
    // 初始化空闲块链表
    pool->free_blocks = malloc(num_blocks * sizeof(void *));
    if (!pool->free_blocks) {
     free(pool->memory_pool);
     free(pool);
     return NULL;
    }
    
    for (size_t i = 0; i < num_blocks; i++) {
     pool->free_blocks[i] = (char *)pool->memory_pool + i * block_size;
    }
    
    printf("内存池创建成功: 块大小=%zu,总块数=%zu\n",
        block_size,num_blocks);
    
    return pool;
}

// 从内存池分配
void* pool_alloc(memory_pool *pool) {
    if (pool->free_count == 0) {
     return NULL;  // 内存池已满
    }
    
    return pool->free_blocks[--pool->free_count];
}

// 释放到内存池
void pool_free(memory_pool *pool,void *block) {
    if (pool->free_count < pool->total_blocks) {
     pool->free_blocks[pool->free_count++] = block;
    }
}

// 销毁内存池
void destroy_memory_pool(memory_pool *pool) {
    free(pool->free_blocks);
    free(pool->memory_pool);
    free(pool);
}

// 内存优化示例
void memory_optimization_example() {
    printf("内存优化示例\n");
    printf("=============================\n");
    
    const int num_allocations = 10000;
    const int allocation_size = 256;
    
    // 传统malloc方式
    printf("传统malloc方式:\n");
    struct timeval start,end;
    
    gettimeofday(&start,NULL);
    void *traditional_ptrs[num_allocations];
    for (int i = 0; i < num_allocations; i++) {
     traditional_ptrs[i] = malloc(allocation_size);
    }
    gettimeofday(&end,NULL);
    
    double malloc_time = (end.tv_sec - start.tv_sec) + 
                     (end.tv_usec - start.tv_usec) / 1000000.0;
    printf("  分配时间: %.4f 秒\n",malloc_time);
    
    gettimeofday(&start,NULL);
    for (int i = 0; i < num_allocations; i++) {
     free(traditional_ptrs[i]);
    }
    gettimeofday(&end,NULL);
    
    double free_time = (end.tv_sec - start.tv_sec) + 
                   (end.tv_usec - start.tv_usec) / 1000000.0;
    printf("  释放时间: %.4f 秒\n",free_time);
    
    // 内存池方式
    printf("\n内存池方式:\n");
    memory_pool *pool = create_memory_pool(allocation_size,num_allocations);
    
    if (pool) {
     gettimeofday(&start,NULL);
     void *pool_ptrs[num_allocations];
     for (int i = 0; i < num_allocations; i++) {
         pool_ptrs[i] = pool_alloc(pool);
     }
     gettimeofday(&end,NULL);
     
     double pool_alloc_time = (end.tv_sec - start.tv_sec) + 
                             (end.tv_usec - start.tv_usec) / 1000000.0;
     printf("  分配时间: %.4f 秒 (加速比: %.2fx)\n",
            pool_alloc_time,malloc_time / pool_alloc_time);
     
     gettimeofday(&start,NULL);
     for (int i = 0; i < num_allocations; i++) {
         pool_free(pool,pool_ptrs[i]);
     }
     gettimeofday(&end,NULL);
     
     double pool_free_time = (end.tv_sec - start.tv_sec) + 
                            (end.tv_usec - start.tv_usec) / 1000000.0;
     printf("  释放时间: %.4f 秒 (加速比: %.2fx)\n",
            pool_free_time,free_time / pool_free_time);
     
     destroy_memory_pool(pool);
    }
}

int main() {
    memory_optimization_example();
    
    printf("\n内存优化建议:\n");
    printf("=============================\n");
    printf("1. 使用内存池减少分配开销\n");
    printf("2. 避免频繁的小内存分配\n");
    printf("3. 重用内存对象\n");
    printf("4. 使用适当的内存对齐\n");
    printf("5. 考虑NUMA架构的内存分配\n");
    printf("6. 监控内存使用模式\n");
    printf("7. 及时释放不再使用的内存\n");
    
    return 0;
}

通过合理应用大页内存、透明巨页、内存压缩等高级技术,可以显著提升内存密集型应用的性能,特别是在数据库、虚拟化、大数据处理等场景中,性能提升可达20-50%。