Lookup Caching by .rodata Section String Inference

Rather hacky, but working way of string key lookup acceleration.

– Created: July 28, 2024 UTC

– Tags: Programming, Optimization, C, Linux


While working on our immediate no-state engine, the need for texture lookup optimization arose. API is designed in a way, where every single pushed triangle means resolution of texture by path.

My insane mind came to such optimization then: detect is given path pointer is in .rodata and if so, - just lookup by hash of the pointer, not whole varying-size string. Constant time and all that.

For that I ended up writing a limited ELF parsing routine that expects /proc/self/exe. Virtual address space randomization was tricky until I realized that getauxval(AT_ENTRY) - ehdr.e_entry could be used to get the base process address.

After the section bounds are known, - it’s as simple as checking vm_start >= ptr && ptr < vm_end.

Code

/* code is fully self-contained, feel free to use it :) */

#include <fcntl.h>
#include <unistd.h>
#include <sys/auxv.h>
#include <elf.h>
#include <linux/limits.h>

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

bool infer_elf_section_bounds(const char *const restrict name,
                              const char **restrict vm_start,
                              const char **restrict vm_end)
{
    bool result = false;
    char buf[PATH_MAX];
    ssize_t l = readlink("/proc/self/exe", buf, PATH_MAX);
    if (l == -1)
        goto ERR_CANT_READLINK;
    buf[l] = 0; /* readlink() doesn't write a terminator */

    int elf = open(buf, O_RDONLY);
    if (elf == -1)
        goto ERR_CANT_OPEN_SELF;

    /* elf header */
    Elf64_Ehdr ehdr;
    read(elf, &ehdr, sizeof ehdr);
    if (ehdr.e_ident[EI_MAG0] != ELFMAG0 ||
            ehdr.e_ident[EI_MAG1] != ELFMAG1 ||
            ehdr.e_ident[EI_MAG2] != ELFMAG2 ||
            ehdr.e_ident[EI_MAG3] != ELFMAG3)
        goto ERR_NOT_ELF;

    /* section header string table */
    Elf64_Shdr shstrdr;
    lseek(elf, ehdr.e_shoff + ehdr.e_shstrndx * sizeof (Elf64_Shdr), SEEK_SET);
    read(elf, &shstrdr, sizeof shstrdr);
    char *sh = malloc(shstrdr.sh_size);
    lseek(elf, shstrdr.sh_offset, SEEK_SET);
    read(elf, sh, shstrdr.sh_size);

    /* walk sections searching for needed name */
    lseek(elf, ehdr.e_shoff, SEEK_SET);
    for (size_t s = 0; s < ehdr.e_shnum; ++s) {
        Elf64_Shdr shdr;
        read(elf, &shdr, sizeof shdr);

        if (strcmp(&sh[shdr.sh_name], name) == 0) {
            result = true;
            *vm_start = getauxval(AT_ENTRY) - ehdr.e_entry + (char *)shdr.sh_addr;
            *vm_end   = getauxval(AT_ENTRY) - ehdr.e_entry + (char *)shdr.sh_addr + shdr.sh_size;
            break;
        }
    }

    free(sh);

ERR_NOT_ELF:
    close(elf);

ERR_CANT_OPEN_SELF:
ERR_CANT_READLINK:
    return result;
}