From a2191ec4ffa87d8a33d437c491eb03ba2bdeaf7f Mon Sep 17 00:00:00 2001 From: Konstantin Baladurin Date: Thu, 9 Jul 2015 16:00:59 +0300 Subject: [PATCH] [IMPROVE] add tool for ELF parsing Change-Id: I39d0f954b0d471fe0d559d99a870f2625bc72a1d Signed-off-by: Konstantin Baladurin --- Makefile | 20 ++- elf_parsing/parse_elf.c | 407 ++++++++++++++++++++++++++++++++++++++++++++++ elf_parsing/parse_elf.h | 37 +++++ packaging/swap-probe.spec | 12 ++ scripts/gen_headers.py | 11 +- 5 files changed, 480 insertions(+), 7 deletions(-) create mode 100644 elf_parsing/parse_elf.c create mode 100644 elf_parsing/parse_elf.h diff --git a/Makefile b/Makefile index 661f4b2..8bc4759 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,5 @@ INSTALLDIR = usr/lib +BIN_INSTALLDIR = /usr/local/bin HEADER_INSTALLDIR = /usr/local/include/ ## Since include directives do not impose additional dependencies, we can make @@ -114,16 +115,19 @@ TIZEN_SRCS = $(COMMON_SRCS) $(CAPI_SRCS)\ ASM_SRC = ./helper/da_call_original.S +PARSE_ELF_SRC = ./elf_parsing/parse_elf.c + ## Totally brain-dead. ## FIXME: Rewrite this normally with eval. ASM_OBJ = $(patsubst %.S,%.o, $(ASM_SRC)) CAPI_OBJS = $(patsubst %.c,%.o, $(CAPI_SRCS)) $(ASM_OBJ) TIZEN_OBJS = $(patsubst %.cpp,%.o, $(patsubst %.c,%.o, $(TIZEN_SRCS))) $(ASM_OBJ) DUMMY_OBJS = $(patsubst %.c,%.o, $(DUMMY_SRCS)) - +PARSE_ELF_OBJ = $(patsubst %.c,%.o, $(PARSE_ELF_SRC)) TIZEN_TARGET = da_probe_tizen.so DUMMY_TARGET = libdaprobe.so +PARSE_ELF_TARGET = parse_elf CPPFLAGS = $(INCLUDE_CPPFLAGS) -D_GNU_SOURCE -DSELF_LIB_NAME="\"/$(INSTALLDIR)/$(TIZEN_TARGET)\"" CFLAGS = $(WARN_CFLAGS) -fPIC @@ -132,13 +136,18 @@ CXXFLAGS = $(WARN_CFLAGS) -fPIC TIZEN_CPPFLAGS = -DTIZENAPP $(SWAP_PROBE_DEFS) TIZEN_LDFLAGS = -lstdc++ -all: capi tizen dummy + +all: capi tizen dummy elfparser tizen: headers $(TIZEN_TARGET) dummy: headers $(DUMMY_TARGET) +elfparser: $(PARSE_ELF_OBJ) $(PARSE_ELF_TARGET) $(ASM_OBJ): $(ASM_SRC) $(CC) $(ASMFLAG) -c $^ -o $@ +$(PARSE_ELF_OBJ): $(PARSE_ELF_SRC) + $(CC) -c $^ -o $@ + API_NAME_LIST = scripts/api_names_all.txt GENERATED_HEADERS = include/api_id_mapping.h include/x_define_api_id_list.h SOURCE_HEADERS = include/api_ld_mapping.h @@ -174,9 +183,12 @@ $(TIZEN_TARGET): $(TIZEN_OBJS) $(DUMMY_TARGET): $(DUMMY_OBJS) $(CC) $(LDFLAGS) $^ -o $@ +$(PARSE_ELF_TARGET): $(PARSE_ELF_OBJ) + $(CC) $^ -o $@ + ldheader: $(SOURCE_HEADERS) -install: install_da install_ld +install: install_da install_ld install_elf install_da: all [ -d "$(DESTDIR)/$(INSTALLDIR)" ] || mkdir -p $(DESTDIR)/$(INSTALLDIR) @@ -190,6 +202,8 @@ install_ld: ldheader # var_addr install -m 644 include/x_define_api_id_list.h $(DESTDIR)/$(HEADER_INSTALLDIR)/x_define_api_id_list.h install -m 644 include/app_protocol.h $(DESTDIR)/$(HEADER_INSTALLDIR)/app_protocol.h +install_elf: elfparser + install -m 755 $(PARSE_ELF_TARGET) $(DESTDIR)/$(BIN_INSTALLDIR)/parse_elf clean: diff --git a/elf_parsing/parse_elf.c b/elf_parsing/parse_elf.c new file mode 100644 index 0000000..3ae334f --- /dev/null +++ b/elf_parsing/parse_elf.c @@ -0,0 +1,407 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "parse_elf.h" + + +int print_error(const char* msg) +{ + return fprintf(stderr, "Error: %s\n", msg); +} + +size_t fsize(int fd) +{ + struct stat buf; + if (fstat(fd, &buf) != 0) { + print_error("cannot get file size"); + return 0; + } + return buf.st_size; +} + +void *mmap_file(const char *filepath, size_t *len) +{ + int fd = open(filepath, O_RDONLY); + if (fd < 0) + return NULL; + *len = fsize(fd); + void *mem = mmap(NULL, *len, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + return mem == MAP_FAILED ? NULL : mem; +} + +int check_elf(const void *elf) +{ + const Elf_Ehdr *elf_header = elf; + int ret = 0; + + if ((elf_header->e_ident[EI_MAG0] != ELFMAG0) || + (elf_header->e_ident[EI_MAG1] != ELFMAG1) || + (elf_header->e_ident[EI_MAG2] != ELFMAG2) || + (elf_header->e_ident[EI_MAG3] != ELFMAG3)) { + print_error("File is not ELF object file"); + ret = -1; + goto exit; + } + + if (elf_header->e_ident[EI_CLASS] != ELFCLASS) { + print_error("Invalid ELF class"); + ret = -2; + goto exit; + } + +exit: + return ret; +} + +const Elf_Shdr *get_section_by_index(const void *elf, unsigned int index) +{ + const Elf_Ehdr *elf_header = elf; + const Elf_Shdr *section_table = elf + elf_header->e_shoff; + if (index > elf_header->e_shnum) + return NULL; + + return section_table + index; +} + +const Elf_Shdr *get_section_by_name(const void *elf, const char* name) +{ + const Elf_Ehdr *elf_header = elf; + const Elf_Shdr *section_table = elf + elf_header->e_shoff; + const Elf_Shdr *string_entry = section_table + elf_header->e_shstrndx; + const char *string_section = elf + string_entry->sh_offset; + int i; + + for (i = 0; i != elf_header->e_shnum; ++i) { + const Elf_Shdr *entry = section_table + i; + if (!strcmp(name, string_section + entry->sh_name)) { + return entry; + } + } + return NULL; +} + +const Elf_Shdr *get_section_by_type(const void *elf, Elf_Word sh_type) +{ + const Elf_Ehdr *elf_header = elf; + const Elf_Shdr *section_table = elf + elf_header->e_shoff; + int i; + + for (i = 0; i != elf_header->e_shnum; ++i) { + const Elf_Shdr *entry = section_table + i; + if (entry->sh_type == sh_type) + return entry; + } + return NULL; +} + +struct sym_table_entry *get_sym_addr(const void *elf, const char* table_name, const char* sym_names[], size_t *n, bool is_only_func) +{ + const Elf_Shdr *shdr; + const Elf_Shdr *str_section; + const Elf_Sym *table; + const char *name; + int entries_num; + int i, j; + struct sym_table_entry *res; + regex_t regex; + + shdr = get_section_by_name(elf, table_name); + if (!shdr || shdr->sh_type == SHT_NOBITS) { + res = NULL; + goto exit; + } + + entries_num = shdr->sh_size / sizeof(Elf_Sym); + if (!entries_num) { + res = NULL; + goto exit; + } + + str_section = get_section_by_index(elf, ((Elf_Ehdr *)elf)->e_shstrndx); + if (!str_section) { + res = NULL; + goto exit; + } + + table = elf + shdr->sh_offset; + + str_section = get_section_by_index(elf, shdr->sh_link); + if (!str_section) { + res = NULL; + goto exit; + } + + if (sym_names) + res = (struct sym_table_entry *)calloc(*n, sizeof(struct sym_table_entry)); + else { + *n = entries_num; + res = (struct sym_table_entry *)calloc(entries_num, sizeof(struct sym_table_entry)); + } + + if (!res) + goto exit; + + if (sym_names) { + for (j = 0; j < *n; j++) { + if (regcomp(®ex, sym_names[j], REG_EXTENDED)) + continue; + + for (i = 0; i < entries_num; i++) { + name = elf + str_section->sh_offset + table[i].st_name; + + if (is_only_func && ELF_ST_TYPE(table[i].st_info) != STT_FUNC) + continue; + + if (!res[j].entry && !regexec(®ex, name, 0, NULL, 0)) { + res[j].name = name; + res[j].entry = &table[i]; + } + } + + regfree(®ex); + } + } else { + for (i = 0; i < entries_num; i++) { + name = elf + str_section->sh_offset + table[i].st_name; + res[i].name = name; + res[i].entry = &table[i]; + } + } + +exit: + return res; +} + +int get_got_plt_addrs(const void *elf, const char* entry_names[], Elf_Addr addrs[], size_t n) +{ + const Elf_Shdr *shdr; + const Elf_Sym *sym_table; + const char* name; + int plt_table_size; + int i, j; + int ret = 0; + regex_t regex; + + shdr = get_section_by_name(elf, ".rel.plt"); + if (!shdr || shdr->sh_type == SHT_NOBITS) { + ret = -1; + goto exit; + } + + plt_table_size = shdr->sh_size / sizeof(Elf_Rel); + const Elf_Rel *plt_table = elf + shdr->sh_offset; + + const Elf_Shdr* tmp_shdr = get_section_by_index(elf, shdr->sh_link); + sym_table = elf + tmp_shdr->sh_offset; + tmp_shdr = get_section_by_index(elf, tmp_shdr->sh_link); + if (!tmp_shdr) { + ret = -1; + goto exit; + } + + for (j = 0; j < n; j++) { + if (regcomp(®ex, entry_names[j], REG_EXTENDED)) + continue; + + for (i = 0; i < plt_table_size; i++) { + name = elf + tmp_shdr->sh_offset + sym_table[ELF_R_SYM(plt_table[i].r_info)].st_name; + + if (!regexec(®ex, name, 0, NULL, 0)) { + addrs[j] = plt_table[i].r_offset; + } + } + + regfree(®ex); + } + +exit: + return ret; +} + +int get_plt_addrs(const void *elf, const char* func_names[], Elf_Addr addrs[], size_t n) +{ + int ret = get_got_plt_addrs(elf, func_names, addrs, n); + if (ret) + goto exit; + + const Elf_Shdr *got_plt_shdr = get_section_by_name(elf, ".got.plt"); + if (!got_plt_shdr || got_plt_shdr->sh_type == SHT_NOBITS) { + ret = -1; + goto exit; + } + + int base = got_plt_shdr->sh_addr - got_plt_shdr->sh_offset; + + int j; + for (j = 0; j < n; j++) { + if (addrs[j]) + addrs[j] = *(Elf_Addr *)(elf + addrs[j] - base) - 6; + } + + ret = 0; + +exit: + return ret; +} + +const char *get_interp(const void *elf) +{ + const Elf_Shdr *shdr = get_section_by_name(elf, ".interp"); + if (shdr && shdr->sh_type != SHT_NOBITS) + return (const char *)(elf + shdr->sh_offset); + else return NULL; +} + +void add_res_sym(struct sym_table_entry *dst, struct sym_table_entry *src, size_t len) +{ + int i; + + if (!dst || !src) + return; + + for (i = 0; i < len; i++) { + if (!dst[i].entry && src[i].entry) + dst[i] = src[i]; + } +} + +void print_sym_table(struct sym_table_entry *entry, size_t len) +{ + int i; + + if (!entry) + return; + + for (i = 0; i < len; i++) { + if (entry[i].name && *entry[i].name) + printf("%08x %s\n", entry[i].entry->st_value, entry[i].name); + } +} + +void print_sym_addrs(struct sym_table_entry *entry, size_t len) +{ + int i; + int addr; + + if (!entry) + return; + + for (i = 0; i < len; i++) { + if (!entry[i].entry) + addr = 0; + else + addr = entry[i].entry->st_value; + + printf("%08x\n", addr); + } +} + +void print_usage(const char* pr_path) +{ + fprintf(stderr, + "usage: %s path_to_elf %s\n", + pr_path, + "[ -i | -sa | -s sym_name1 [sym_name2 ...] | -r rel_name1 [rel_name2 ...]]" + ); +} + +int main(int argc, char **argv) +{ + const char *filename = argv[1]; + size_t elf_len; + void *elf; + struct sym_table_entry *res, *tmp; + Elf_Addr *addrs; + size_t entries_num = 0; + int i; + int ret = 0; + + if (argc >= 3) { + elf = mmap_file(filename, &elf_len); + + if (!elf) { + print_error("mmap file error"); + ret = -1; + goto fail_exit; + } + + if (check_elf(elf)) { + ret = -1; + goto fail_exit; + } + + if (argc == 3) { + if (!strcmp(argv[2], "-i")) { + const char* interp = get_interp(elf); + if (interp) + printf("%s\n", interp); + } else if (!strcmp(argv[2], "-sa")) { + res = get_sym_addr(elf, ".symtab", NULL, &entries_num, false); + print_sym_table(res, entries_num); + free(res); + + res = get_sym_addr(elf, ".dynsym", NULL, &entries_num, false); + print_sym_table(res, entries_num); + free(res); + } else { + ret = -1; + goto print_usage_exit; + } + + } else if (argc > 3) { + const char **names = (const char **)&argv[3]; + entries_num = argc - 3; + + if (!strcmp(argv[2], "-s") || !strcmp(argv[2], "-sf")) { + bool is_only_func = argv[2][2] == 'f'; + + res = get_sym_addr(elf, ".symtab", names, &entries_num, is_only_func); + tmp = get_sym_addr(elf, ".dynsym", names, &entries_num, is_only_func); + + if (res) { + add_res_sym(res, tmp, entries_num); + print_sym_addrs(res, entries_num); + } else + print_sym_addrs(tmp, entries_num); + + free(res); + free(tmp); + } else if (!strcmp(argv[2], "-r")) { + addrs = (Elf_Addr *)calloc(entries_num, sizeof(Elf_Addr)); + if (addrs) { + if (!get_plt_addrs(elf, names, addrs, entries_num)) { + for (i = 0; i < entries_num; i++) + printf("%08x\n", addrs[i]); + } else + ret = -1; + free(addrs); + } else + ret = -1; + } else { + ret = -1; + goto print_usage_exit; + } + } else { + ret = -1; + goto print_usage_exit; + } + } else { + ret = -1; + goto print_usage_exit; + } + + return ret; + +print_usage_exit: + print_usage(argv[0]); + +fail_exit: + return ret; +} diff --git a/elf_parsing/parse_elf.h b/elf_parsing/parse_elf.h new file mode 100644 index 0000000..5aeffb9 --- /dev/null +++ b/elf_parsing/parse_elf.h @@ -0,0 +1,37 @@ +#ifndef _PARSE_ELF_ +#define _PARSE_ELF_ + +#include + +#define SIZEOF_VOID_P 4 +#if SIZEOF_VOID_P == 8 +typedef Elf64_Ehdr Elf_Ehdr; +typedef Elf64_Shdr Elf_Shdr; +typedef Elf64_Sym Elf_Sym; +typedef Elf64_Addr Elf_Addr; +typedef Elf64_Word Elf_Word; +typedef Elf64_Rel Elf_Rel; +#define ELFCLASS ELFCLASS64 +#define ELF_R_SYM(x) ELF64_R_SYM(x) +#define ELF_ST_TYPE(x) ELF64_ST_TYPE(x) +#elif SIZEOF_VOID_P == 4 +typedef Elf32_Ehdr Elf_Ehdr; +typedef Elf32_Shdr Elf_Shdr; +typedef Elf32_Sym Elf_Sym; +typedef Elf32_Addr Elf_Addr; +typedef Elf32_Word Elf_Word; +typedef Elf32_Rel Elf_Rel; +#define ELFCLASS ELFCLASS32 +#define ELF_R_SYM(x) ELF32_R_SYM(x) +#define ELF_ST_TYPE(x) ELF32_ST_TYPE(x) +#else +#error "Unknown void* size" +#endif + + +struct sym_table_entry { + const char* name; + const Elf_Sym *entry; +}; + +#endif /* _PARSE_ELF_ */ diff --git a/packaging/swap-probe.spec b/packaging/swap-probe.spec index 3e872b8..bf3358d 100644 --- a/packaging/swap-probe.spec +++ b/packaging/swap-probe.spec @@ -43,6 +43,14 @@ Summary: SWAP probe library SWAP probe is a part of data collection back-end for DA. This library will be installed in target. +%package -n swap-probe-elf +Provides: swap-probe-elf +Summary: Swap elf parsing tool + +%description -n swap-probe-elf +SWAP probe is a part of data collection back-end for DA. +This tool will be installed in target + %prep %setup -q -n %{name}_%{version} @@ -59,6 +67,7 @@ cp LICENSE.MIT %{buildroot}/usr/share/license/%{name} cat LICENSE.LGPLv2.1+ >> %{buildroot}/usr/share/license/%{name} mkdir -p %{buildroot}/usr/local/include +mkdir -p %{buildroot}/usr/local/bin %make_install @@ -75,5 +84,8 @@ mkdir -p %{buildroot}/usr/local/include /usr/local/include/x_define_api_id_list.h /usr/local/include/app_protocol.h +%files -n swap-probe-elf +/usr/local/bin/parse_elf + %changelog diff --git a/scripts/gen_headers.py b/scripts/gen_headers.py index 3a15d75..0a13f49 100755 --- a/scripts/gen_headers.py +++ b/scripts/gen_headers.py @@ -120,15 +120,18 @@ def parse_apis(func_list_file): def __lib_syms(libname): probe_data = {} - p = subprocess.Popen(["readelf -sW \"" + libname + "\""], shell=True, stdout=subprocess.PIPE) + p = subprocess.Popen(["./parse_elf \"" + libname + "\" -sa"], shell=True, stdout=subprocess.PIPE) read_probe = p.communicate() for line in read_probe: if line is None: continue - tokens = re.findall("\d+:\s+([a-f0-9]+)\s+\d+\s+\w+\s+\w+\s+\w+\s+\w+\s+(.*)", line) + + tokens = line.split('\n') for t in tokens: - if all(c in string.hexdigits for c in t[0]) and (int(t[0], 16) != 0): - probe_data[t[1]] = t[0] + parts = t.split(' ') + if len(parts) == 2 and int(parts[0], 16) != 0: + probe_data[parts[1]] = parts[0] + return probe_data -- 2.7.4