#include "p_libiri.h"
+static inline int
+iri__hexnibble(char c)
+{
+ if(c >= '0' && c <= '9')
+ {
+ return c - '0';
+ }
+ if(c >= 'A' && c <= 'F')
+ {
+ return c - 'A' + 10;
+ }
+ if(c >= 'a' && c <= 'f')
+ {
+ return c - 'a' + 10;
+ }
+}
+
static inline const char *
-iri__copychar_decode(char **dest, const char *src)
+iri__copychar(char **dest, const char *src)
{
**dest = *src;
+ (*dest)++;
+ src++;
+ return src;
+}
+
+static inline const char *
+iri__copychar_decode(char **dest, const char *src, int convert_space)
+{
+ unsigned char *p = (unsigned char *) (*dest);
+
+ if(1 == convert_space && '+' == *src)
+ {
+ **dest = ' ';
+ }
+ else if('%' == *src)
+ {
+ if(0 == isxdigit(src[1]) || 0 == isxdigit(src[2]))
+ {
+ /* TODO: Deal with %u<nnnn> non-standard encoding - be liberal in
+ * what you accept, etc.
+ */
+ **dest = '%';
+ }
+ else
+ {
+ *p = (iri__hexnibble(src[1]) << 4) | iri__hexnibble(src[2]);
+ src += 2;
+ }
+ }
+ else
+ {
+ **dest = *src;
+ }
src++;
(*dest)++;
return src;
p->scheme = bufp;
while(*src && *src != ':')
{
- src = iri__copychar_decode(&bufp, src);
+ src = iri__copychar_decode(&bufp, src, 0);
}
*bufp = 0;
bufp++;
p->scheme = bufp;
while(*src && *src != ':')
{
- src = iri__copychar_decode(&bufp, src);
+ src = iri__copychar_decode(&bufp, src, 0);
}
*bufp = 0;
bufp++;
}
while(*src && *src != ':' && *src != '@')
{
- src = iri__copychar_decode(&bufp, src);
+ src = iri__copychar_decode(&bufp, src, 0);
}
*bufp = 0;
bufp++;
p->auth = bufp;
while(*src && *src != ':' && *src != '@')
{
- src = iri__copychar_decode(&bufp, src);
+ src = iri__copychar_decode(&bufp, src, 0);
}
*bufp = 0;
bufp++;
p->auth = bufp;
while(*src && *src != '@')
{
- src = iri__copychar_decode(&bufp, src);
+ src = iri__copychar_decode(&bufp, src, 0);
}
*bufp = 0;
bufp++;
p->user = bufp;
while(*src != '@')
{
- src = iri__copychar_decode(&bufp, src);
+ src = iri__copychar_decode(&bufp, src, 0);
}
*bufp = 0;
bufp++;
p->host = bufp;
while(*src && *src != ':' && *src != '/' && *src != '?' && *src != '#')
{
- src = iri__copychar_decode(&bufp, src);
+ src = iri__copychar_decode(&bufp, src, 0);
}
*bufp = 0;
bufp++;
p->path = bufp;
while(*src && *src != '?' && *src != '#')
{
- src = iri__copychar_decode(&bufp, src);
+ src = iri__copychar_decode(&bufp, src, 0);
}
*bufp = 0;
bufp++;
src++;
while(*src && *src != '#')
{
- src = iri__copychar_decode(&bufp, src);
+ /* Don't actually decode the query itself, otherwise it
+ * can't be reliably split */
+ src = iri__copychar(&bufp, src);
}
*bufp = 0;
bufp++;
p->anchor = bufp;
while(*src)
{
- src = iri__copychar_decode(&bufp, src);
+ src = iri__copychar_decode(&bufp, src, 0);
}
*bufp = 0;
bufp++;
p->path = bufp;
while(*src && *src != '?' && *src != '#')
{
- src = iri__copychar_decode(&bufp, src);
+ src = iri__copychar_decode(&bufp, src, 0);
}
*bufp = 0;
bufp++;