From b91a2d84094bc1fa609cdc7d9515f595d182b554 Mon Sep 17 00:00:00 2001 From: Baptiste DURAND Date: Mon, 8 Jul 2013 20:28:20 +0200 Subject: [PATCH] Fix memory corruption. Apply patch from upstream project website: http://code.google.com/p/libiri/issues/detail?id=1 This permits to fix TZPC-3558 : Some tests Application segfault due to a memory corruption --- libiri/p_libiri.h | 2 ++ libiri/parse.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 70 insertions(+), 12 deletions(-) diff --git a/libiri/p_libiri.h b/libiri/p_libiri.h index 077f27b..c428892 100644 --- a/libiri/p_libiri.h +++ b/libiri/p_libiri.h @@ -33,6 +33,8 @@ #ifndef P_LIBIRI_H_ # define P_LIBIRI_H_ 1 +#define LIBIRI_INTERNAL + #include #include #include diff --git a/libiri/parse.c b/libiri/parse.c index 645d106..6f4e556 100644 --- a/libiri/parse.c +++ b/libiri/parse.c @@ -59,6 +59,7 @@ iri__hexnibble(char c) { return c - 'a' + 10; } + return 0; } static inline const char * @@ -109,27 +110,82 @@ iri__allocbuf(const char *src, size_t *len) { size_t sc; const char *p, *c; - - /* Calculate the size of the buffer required to hold a decoded version of - * src, including enough breathing space for null bytes. - */ - /* XXX: This is way too much; we need to actually count it */ - *len = (strlen(src) * 4) + 16; - /* Determine how much space we need for the scheme list */ +/* + Internal format of IRI structure is very hard to understand at first. + The buffer is used to store character strings with every parsed part of + IRI, like host, user, auth, path etc. Start of every character string is + ALIGNED to ALIGNMENT value and finished with NULL byte. + Above that, the buffer is used to keep variable size array of parsed + scheme parts. It consist of the array of addresses pointing to starts + of scheme parts which are kept as all other characters strings, so are + aligned to ALIGMENT and ended with NULL byte. + This function calculates approximation of buffer size to store all the + data of parser IRI. + + Fully filled buffer with scheme parts looks as follows: + 0. start of the buffer + 1. aligned start of the scheme part with added NULL byte + 2. aligned start of the user part with added NULL byte + 3. aligned start of the password part with added NULL byte + 4. aligned start of the array of size schemes_number+1 of pointers that point + to consecutive scheme part character strings (last one is NULL) + schemes_number is a number of scheme tokens delimited with + sign in + scheme part + 5. schems_number of characters strings of scheme parts each of which + aligned and finished with NULL byte. + 6. aligned start of the host part with added NULL byte + 7. aligned start of the path part with added NULL byte + 8. aligned start of the query part with added NULL byte + 9. aligned start of the anchor part with added NULL byte + + There can be indentified 4 kinds of characters in IRI: + - characters which are copied one to one (i.e. letters) + - characters which are removed (special characters like comma in scheme) + - characters which are replaced with other characers where buffer grows + this only happens with scheme part + - characters which are replaced with other characers where buffer decreases + + Alighning a pointer in worst case will advance a buffer pointers + ALIGNMENT-1 bytes + + Knowing all that we can count an approximation of buffer size which can + be trusted that whole parsed IRI content will fit in. +*/ + +/* first approximation - all characers will have to be stored in buffer */ + *len = strlen(src); + +/* second approximation - IRI has all possible parts which have to be + * aligned to ALIGNMENT and have NULL byte an the end. There are 7 different + * parts like that */ + *len += 7 * (ALIGNMENT-1 + 1); + +/* third approximation - we have to make a room for scheme parts array. + * Because the array has an aligned array of n + 1 pointers and n + * characters strings aligned and NULL byte terminated. + */ if(NULL != (c = strchr(src, ':'))) { sc = 1; for(p = src; p < c; p++) { - if(*p == '+') + if(*p == '+') { sc++; } } - /* Ensure we can align each element on an 8-byte boundary */ - *len = (src - c) + 1 + sc + ((sc + 1) * (sizeof(char *) + 7)); - *len += (7 * 11); - } + /* fourth approximation - all characters of scheme part will be stored + * in scheme parts tokens */ + *len += (c - src); + + /* fifth approximation - Ensure we can align each element on an + * ALIGNMENT byte boundary and append NULL byte */ + *len += sc * (ALIGNMENT-1 + 1); + + /* sixth approximation - Ensure we have a room for aligned array + * indexes */ + *len += ALIGNMENT-1 + (sc + 1) * (sizeof(char*)/sizeof(char)); + } return (char *) calloc(1, *len); } -- 2.7.4