From: jbj Date: Mon, 14 Apr 2003 19:24:29 +0000 (+0000) Subject: Clone for file-4.02 merge. X-Git-Tag: rpm-4.4-release~620 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b3d8f3155ed6fda819fd1d8df881bcb0a88f6717;p=platform%2Fupstream%2Frpm.git Clone for file-4.02 merge. CVS patchset: 6739 CVS date: 2003/04/14 19:24:29 --- diff --git a/file/src/apprentice.c b/file/src/apprentice.c new file mode 100644 index 0000000..9c36dac --- /dev/null +++ b/file/src/apprentice.c @@ -0,0 +1,1118 @@ +/* + * apprentice - make one pass through /etc/magic, learning its secrets. + * + * Copyright (c) Ian F. Darwin, 1987. + * Written by Ian F. Darwin. + * + * This software is not subject to any license of the American Telephone + * and Telegraph Company or of the Regents of the University of California. + * + * Permission is granted to anyone to use this software for any purpose on + * any computer system, and to alter it and redistribute it freely, subject + * to the following restrictions: + * + * 1. The author is not responsible for the consequences of use of this + * software, no matter how awful, even if they arise from flaws in it. + * + * 2. The origin of this software must not be misrepresented, either by + * explicit claim or by omission. Since few users ever read sources, + * credits must appear in the documentation. + * + * 3. Altered versions must be plainly marked as such, and must not be + * misrepresented as being the original software. Since few users + * ever read sources, credits must appear in the documentation. + * + * 4. This notice may not be removed or altered. + */ + +#include "system.h" +#include "file.h" +#include "debug.h" + +FILE_RCSID("@(#)Id: apprentice.c,v 1.49 2002/07/03 19:00:41 christos Exp ") + +/*@access fmagic @*/ + +#define EATAB {while (isascii((unsigned char) *l) && \ + isspace((unsigned char) *l)) ++l;} +#define LOWCASE(l) (isupper((unsigned char) (l)) ? \ + tolower((unsigned char) (l)) : (l)) +/* + * Work around a bug in headers on Digital Unix. + * At least confirmed for: OSF1 V4.0 878 + */ +#if defined(__osf__) && defined(__DECC) +#ifdef MAP_FAILED +#undef MAP_FAILED +#endif +#endif + +#ifndef MAP_FAILED +#define MAP_FAILED (void *) -1 +#endif + +#ifndef MAP_FILE +#define MAP_FILE 0 +#endif + +/*@unchecked@*/ +#ifdef __EMX__ + static char PATHSEP=';'; +#else + static char PATHSEP=':'; +#endif + +/*@unchecked@*/ +static int maxmagic = 0; + +#ifndef MAGIC +# define MAGIC "/etc/magic" +#endif + +/*@unchecked@*/ /*@observer@*/ +const char *default_magicfile = MAGIC; + +/* + * extend the sign bit if the comparison is to be signed + */ +uint32_t +signextend(struct magic *m, uint32_t v) +{ + if (!(m->flag & UNSIGNED)) + switch(m->type) { + /* + * Do not remove the casts below. They are + * vital. When later compared with the data, + * the sign extension must have happened. + */ + case BYTE: + v = (char) v; + break; + case SHORT: + case BESHORT: + case LESHORT: + v = (short) v; + break; + case DATE: + case BEDATE: + case LEDATE: + case LDATE: + case BELDATE: + case LELDATE: + case LONG: + case BELONG: + case LELONG: + v = (int32_t) v; + break; + case STRING: + case PSTRING: + break; + case REGEX: + break; + default: + magwarn("can't happen: m->type=%d\n", m->type); + return -1; + } + return v; +} + +/* + * eatsize(): Eat the size spec from a number [eg. 10UL] + */ +/*@-bounds@*/ +static void +eatsize(/*@out@*/ char **p) + /*@modifies *p @*/ +{ + char *l = *p; + + if (LOWCASE(*l) == 'u') + l++; + + switch (LOWCASE(*l)) { + case 'l': /* long */ + case 's': /* short */ + case 'h': /* short */ + case 'b': /* char/byte */ + case 'c': /* char/byte */ + l++; + /*@fallthrough@*/ + default: + break; + } + + *p = l; +} +/*@=bounds@*/ + +/* Single hex char to int; -1 if not a hex char. */ +static int +hextoint(int c) + /*@*/ +{ + if (!isascii((unsigned char) c)) + return -1; + if (isdigit((unsigned char) c)) + return c - '0'; + if ((c >= 'a')&&(c <= 'f')) + return c + 10 - 'a'; + if (( c>= 'A')&&(c <= 'F')) + return c + 10 - 'A'; + return -1; +} + +/* + * Convert a string containing C character escapes. Stop at an unescaped + * space or tab. + * Copy the converted version to "p", returning its length in *slen. + * Return updated scan pointer as function result. + */ +/*@-shiftimplementation@*/ +/*@-bounds@*/ +static char * +getstr(/*@returned@*/ char *s, char *p, int plen, /*@out@*/ int *slen) + /*@globals fileSystem @*/ + /*@modifies *p, *slen, fileSystem @*/ +{ + char *origs = s, *origp = p; + char *pmax = p + plen - 1; + int c; + int val; + + while ((c = *s++) != '\0') { + if (isspace((unsigned char) c)) + break; + if (p >= pmax) { + fprintf(stderr, "String too long: %s\n", origs); + break; + } + if(c == '\\') { + switch(c = *s++) { + + case '\0': + goto out; + + default: + *p++ = (char) c; + /*@switchbreak@*/ break; + + case 'n': + *p++ = '\n'; + /*@switchbreak@*/ break; + + case 'r': + *p++ = '\r'; + /*@switchbreak@*/ break; + + case 'b': + *p++ = '\b'; + /*@switchbreak@*/ break; + + case 't': + *p++ = '\t'; + /*@switchbreak@*/ break; + + case 'f': + *p++ = '\f'; + /*@switchbreak@*/ break; + + case 'v': + *p++ = '\v'; + /*@switchbreak@*/ break; + + /* \ and up to 3 octal digits */ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + val = c - '0'; + c = *s++; /* try for 2 */ + if(c >= '0' && c <= '7') { + val = (val<<3) | (c - '0'); + c = *s++; /* try for 3 */ + if(c >= '0' && c <= '7') + val = (val<<3) | (c-'0'); + else + --s; + } + else + --s; + *p++ = (char)val; + /*@switchbreak@*/ break; + + /* \x and up to 2 hex digits */ + case 'x': + val = 'x'; /* Default if no digits */ + c = hextoint(*s++); /* Get next char */ + if (c >= 0) { + val = c; + c = hextoint(*s++); + if (c >= 0) + val = (val << 4) + c; + else + --s; + } else + --s; + *p++ = (char)val; + /*@switchbreak@*/ break; + } + } else + *p++ = (char)c; + } +out: + *p = '\0'; + *slen = p - origp; + return s; +} +/*@=bounds@*/ +/*@=shiftimplementation@*/ + +/* + * Read a numeric value from a pointer, into the value union of a magic + * pointer, according to the magic type. Update the string pointer to point + * just after the number read. Return 0 for success, non-zero for failure. + */ +/*@-bounds@*/ +static int +getvalue(struct magic *m, /*@out@*/ char **p) + /*@globals fileSystem @*/ + /*@modifies m, *p, fileSystem @*/ +{ + int slen; + + if (m->type == STRING || m->type == PSTRING || m->type == REGEX) { + *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen); + m->vallen = slen; + } else + if (m->reln != 'x') { + m->value.l = signextend(m, strtoul(*p, p, 0)); + eatsize(p); + } + return 0; +} +/*@=bounds@*/ + +/* + * parse one line from magic file, put into magic[index++] if valid + */ +/*@-bounds@*/ +static int +parse(/*@out@*/ struct magic **magicp, /*@out@*/ uint32_t *nmagicp, + char *l, int action) + /*@globals maxmagic, fileSystem @*/ + /*@modifies *magicp, *nmagicp, maxmagic, fileSystem @*/ +{ + int i = 0; + struct magic *m; + char *t; + +#define ALLOC_INCR 200 + if (*nmagicp + 1 >= maxmagic){ + maxmagic += ALLOC_INCR; +/*@-unqualifiedtrans @*/ + *magicp = xrealloc(*magicp, sizeof(**magicp) * maxmagic); +/*@=unqualifiedtrans @*/ + m = &(*magicp)[*nmagicp]; + memset(m, 0, sizeof(**magicp) * ALLOC_INCR); + } else + m = &(*magicp)[*nmagicp]; + m->flag = 0; + m->cont_level = 0; + + while (*l == '>') { + ++l; /* step over */ + m->cont_level++; + } + + if (m->cont_level != 0 && *l == '(') { + ++l; /* step over */ + m->flag |= INDIR; + } + if (m->cont_level != 0 && *l == '&') { + ++l; /* step over */ + m->flag |= OFFADD; + } + + /* get offset, then skip over it */ + m->offset = (int) strtoul(l,&t,0); + if (l == t) + magwarn("offset %s invalid", l); + l = t; + + if (m->flag & INDIR) { + m->in_type = LONG; + m->in_offset = 0; + /* + * read [.lbs][+-]nnnnn) + */ + if (*l == '.') { + l++; + switch (*l) { + case 'l': + m->in_type = LELONG; + break; + case 'L': + m->in_type = BELONG; + break; + case 'h': + case 's': + m->in_type = LESHORT; + break; + case 'H': + case 'S': + m->in_type = BESHORT; + break; + case 'c': + case 'b': + case 'C': + case 'B': + m->in_type = BYTE; + break; + default: + magwarn("indirect offset type %c invalid", *l); + break; + } + l++; + } + if (*l == '~') { + m->in_op = OPINVERSE; + l++; + } + switch (*l) { + case '&': + m->in_op |= OPAND; + l++; + break; + case '|': + m->in_op |= OPOR; + l++; + break; + case '^': + m->in_op |= OPXOR; + l++; + break; + case '+': + m->in_op |= OPADD; + l++; + break; + case '-': + m->in_op |= OPMINUS; + l++; + break; + case '*': + m->in_op |= OPMULTIPLY; + l++; + break; + case '/': + m->in_op |= OPDIVIDE; + l++; + break; + case '%': + m->in_op |= OPMODULO; + l++; + break; + } + if (isdigit((unsigned char)*l)) + m->in_offset = strtoul(l, &t, 0); + else + t = l; + if (*t++ != ')') + magwarn("missing ')' in indirect offset"); + l = t; + } + + + while (isascii((unsigned char)*l) && isdigit((unsigned char)*l)) + ++l; + EATAB; + +#define NBYTE 4 +#define NSHORT 5 +#define NLONG 4 +#define NSTRING 6 +#define NDATE 4 +#define NBESHORT 7 +#define NBELONG 6 +#define NBEDATE 6 +#define NLESHORT 7 +#define NLELONG 6 +#define NLEDATE 6 +#define NPSTRING 7 +#define NLDATE 5 +#define NBELDATE 7 +#define NLELDATE 7 +#define NREGEX 5 + + if (*l == 'u') { + ++l; + m->flag |= UNSIGNED; + } + + /* get type, skip it */ + if (strncmp(l, "char", NBYTE)==0) { /* HP/UX compat */ + m->type = BYTE; + l += NBYTE; + } else if (strncmp(l, "byte", NBYTE)==0) { + m->type = BYTE; + l += NBYTE; + } else if (strncmp(l, "short", NSHORT)==0) { + m->type = SHORT; + l += NSHORT; + } else if (strncmp(l, "long", NLONG)==0) { + m->type = LONG; + l += NLONG; + } else if (strncmp(l, "string", NSTRING)==0) { + m->type = STRING; + l += NSTRING; + } else if (strncmp(l, "date", NDATE)==0) { + m->type = DATE; + l += NDATE; + } else if (strncmp(l, "beshort", NBESHORT)==0) { + m->type = BESHORT; + l += NBESHORT; + } else if (strncmp(l, "belong", NBELONG)==0) { + m->type = BELONG; + l += NBELONG; + } else if (strncmp(l, "bedate", NBEDATE)==0) { + m->type = BEDATE; + l += NBEDATE; + } else if (strncmp(l, "leshort", NLESHORT)==0) { + m->type = LESHORT; + l += NLESHORT; + } else if (strncmp(l, "lelong", NLELONG)==0) { + m->type = LELONG; + l += NLELONG; + } else if (strncmp(l, "ledate", NLEDATE)==0) { + m->type = LEDATE; + l += NLEDATE; + } else if (strncmp(l, "pstring", NPSTRING)==0) { + m->type = PSTRING; + l += NPSTRING; + } else if (strncmp(l, "ldate", NLDATE)==0) { + m->type = LDATE; + l += NLDATE; + } else if (strncmp(l, "beldate", NBELDATE)==0) { + m->type = BELDATE; + l += NBELDATE; + } else if (strncmp(l, "leldate", NLELDATE)==0) { + m->type = LELDATE; + l += NLELDATE; + } else if (strncmp(l, "regex", NREGEX)==0) { + m->type = REGEX; + l += sizeof("regex"); + } else { + magwarn("type %s invalid", l); + return -1; + } + /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */ + /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ + if (*l == '~') { + if (STRING != m->type && PSTRING != m->type) + m->mask_op = OPINVERSE; + ++l; + } + switch (*l) { + case '&': + m->mask_op |= OPAND; + ++l; + m->mask = signextend(m, strtoul(l, &l, 0)); + eatsize(&l); + break; + case '|': + m->mask_op |= OPOR; + ++l; + m->mask = signextend(m, strtoul(l, &l, 0)); + eatsize(&l); + break; + case '^': + m->mask_op |= OPXOR; + ++l; + m->mask = signextend(m, strtoul(l, &l, 0)); + eatsize(&l); + break; + case '+': + m->mask_op |= OPADD; + ++l; + m->mask = signextend(m, strtoul(l, &l, 0)); + eatsize(&l); + break; + case '-': + m->mask_op |= OPMINUS; + ++l; + m->mask = signextend(m, strtoul(l, &l, 0)); + eatsize(&l); + break; + case '*': + m->mask_op |= OPMULTIPLY; + ++l; + m->mask = signextend(m, strtoul(l, &l, 0)); + eatsize(&l); + break; + case '%': + m->mask_op |= OPMODULO; + ++l; + m->mask = signextend(m, strtoul(l, &l, 0)); + eatsize(&l); + break; + case '/': + if (STRING != m->type && PSTRING != m->type) { + m->mask_op |= OPDIVIDE; + ++l; + m->mask = signextend(m, strtoul(l, &l, 0)); + eatsize(&l); + } else { + m->mask = 0L; + while (!isspace(*++l)) { + switch (*l) { + case CHAR_IGNORE_LOWERCASE: + m->mask |= STRING_IGNORE_LOWERCASE; + /*@switchbreak@*/ break; + case CHAR_COMPACT_BLANK: + m->mask |= STRING_COMPACT_BLANK; + /*@switchbreak@*/ break; + case CHAR_COMPACT_OPTIONAL_BLANK: + m->mask |= + STRING_COMPACT_OPTIONAL_BLANK; + /*@switchbreak@*/ break; + default: + magwarn("string extension %c invalid", + *l); + return -1; + } + } + } + break; + } + /* We used to set mask to all 1's here, instead let's just not do anything + if mask = 0 (unless you have a better idea) */ + EATAB; + + switch (*l) { + case '>': + case '<': + /* Old-style anding: "0 byte &0x80 dynamically linked" */ + case '&': + case '^': + case '=': + m->reln = *l; + ++l; + if (*l == '=') { + /* HP compat: ignore &= etc. */ + ++l; + } + break; + case '!': + if (m->type != STRING && m->type != PSTRING) { + m->reln = *l; + ++l; + break; + } + /*@fallthrough@*/ + default: + if (*l == 'x' && isascii((unsigned char)l[1]) && + isspace((unsigned char)l[1])) { + m->reln = *l; + ++l; + goto GetDesc; /* Bill The Cat */ + } + m->reln = '='; + break; + } + EATAB; + + if (getvalue(m, &l)) + return -1; + /* + * TODO finish this macro and start using it! + * #define offsetcheck {if (offset > HOWMANY-1) + * magwarn("offset too big"); } + */ + + /* + * now get last part - the description + */ +GetDesc: + EATAB; + if (l[0] == '\b') { + ++l; + m->nospflag = 1; + } else if ((l[0] == '\\') && (l[1] == 'b')) { + ++l; + ++l; + m->nospflag = 1; + } else + m->nospflag = 0; + while ((m->desc[i++] = *l++) != '\0' && i= 040 && c <= 0176) /* TODO isprint && !iscntrl */ + (void) fputc(c, fp); + else { + (void) fputc('\\', fp); + switch (c) { + + case '\n': + (void) fputc('n', fp); + /*@switchbreak@*/ break; + + case '\r': + (void) fputc('r', fp); + /*@switchbreak@*/ break; + + case '\b': + (void) fputc('b', fp); + /*@switchbreak@*/ break; + + case '\t': + (void) fputc('t', fp); + /*@switchbreak@*/ break; + + case '\f': + (void) fputc('f', fp); + /*@switchbreak@*/ break; + + case '\v': + (void) fputc('v', fp); + /*@switchbreak@*/ break; + + default: + (void) fprintf(fp, "%.3o", c & 0377); + /*@switchbreak@*/ break; + } + } + } +} + +/* + * swap a short + */ +/*@-bounds@*/ +static uint16_t +swap2(uint16_t sv) + /*@*/ +{ + uint16_t rv; + uint8_t *s = (uint8_t *) &sv; + uint8_t *d = (uint8_t *) &rv; + d[0] = s[1]; + d[1] = s[0]; + return rv; +} +/*@=bounds@*/ + +/* + * swap an int + */ +/*@-bounds@*/ +static uint32_t +swap4(uint32_t sv) + /*@*/ +{ + uint32_t rv; + uint8_t *s = (uint8_t *) &sv; + uint8_t *d = (uint8_t *) &rv; + d[0] = s[3]; + d[1] = s[2]; + d[2] = s[1]; + d[3] = s[0]; + return rv; +} +/*@=bounds@*/ + +/* + * byteswap a single magic entry + */ +static +void bs1(struct magic *m) + /*@modifies m @*/ +{ + m->cont_level = swap2(m->cont_level); + m->offset = swap4(m->offset); + m->in_offset = swap4(m->in_offset); + if (m->type != STRING) + m->value.l = swap4(m->value.l); + m->mask = swap4(m->mask); +} + +/* + * Byteswap an mmap'ed file if needed + */ +/*@-bounds@*/ +static void +byteswap(/*@null@*/ struct magic *m, uint32_t nmagic) + /*@modifies m @*/ +{ + uint32_t i; + if (m != NULL) + for (i = 0; i < nmagic; i++) + bs1(&m[i]); +} +/*@=bounds@*/ + +/* + * make a dbname + */ +static char * +mkdbname(const char *fn) + /*@*/ +{ + static const char ext[] = ".mgc"; + /*@only@*/ + static char *buf = NULL; + + buf = xrealloc(buf, strlen(fn) + sizeof(ext) + 1); + (void)strcpy(buf, fn); + (void)strcat(buf, ext); + return buf; +} + +/* + * parse from a file + * const char *fn: name of magic file + */ +/*@-bounds@*/ +static int +apprentice_file(fmagic fm, /*@out@*/ struct magic **magicp, + /*@out@*/ uint32_t *nmagicp, const char *fn, int action) + /*@globals maxmagic, fileSystem @*/ + /*@modifies fm, *magicp, *nmagicp, maxmagic, fileSystem @*/ +{ + /*@observer@*/ + static const char hdr[] = + "cont\toffset\ttype\topcode\tmask\tvalue\tdesc"; + FILE *f; + char line[BUFSIZ+1]; + int errs = 0; + + f = fopen(fn, "r"); + if (f == NULL) { + if (errno != ENOENT) + (void) fprintf(stderr, + "%s: can't read magic file %s (%s)\n", + __progname, fn, strerror(errno)); + return -1; + } + + maxmagic = MAXMAGIS; + *magicp = (struct magic *) xcalloc(sizeof(**magicp), maxmagic); + + /* parse it */ + if (action == CHECK) /* print silly verbose header for USG compat. */ + (void) printf("%s\n", hdr); + + for (fm->lineno = 1; fgets(line, BUFSIZ, f) != NULL; fm->lineno++) { + if (line[0]=='#') /* comment, do not parse */ + continue; + if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */ + continue; + line[strlen(line)-1] = '\0'; /* delete newline */ + if (parse(magicp, nmagicp, line, action) != 0) + errs = 1; + } + + (void) fclose(f); + if (errs) { + free(*magicp); + *magicp = NULL; + *nmagicp = 0; + } + return errs; +} +/*@=bounds@*/ + +/* + * handle an mmaped file. + */ +/*@-bounds@*/ +static int +apprentice_compile(/*@unused@*/ const fmagic fm, + /*@out@*/ struct magic **magicp, /*@out@*/ uint32_t *nmagicp, + const char *fn, /*@unused@*/ int action) + /*@globals fileSystem, internalState @*/ + /*@modifies fileSystem, internalState @*/ +{ + int fd; + char *dbname = mkdbname(fn); + /*@observer@*/ + static const uint32_t ar[] = { + MAGICNO, VERSIONNO + }; + + if (dbname == NULL) + return -1; + + if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC, 0644)) == -1) { + (void)fprintf(stderr, "%s: Cannot open `%s' (%s)\n", + __progname, dbname, strerror(errno)); + return -1; + } + + if (write(fd, ar, sizeof(ar)) != sizeof(ar)) { + (void)fprintf(stderr, "%s: error writing `%s' (%s)\n", + __progname, dbname, strerror(errno)); + return -1; + } + + if (lseek(fd, sizeof(**magicp), SEEK_SET) != sizeof(**magicp)) { + (void)fprintf(stderr, "%s: error seeking `%s' (%s)\n", + __progname, dbname, strerror(errno)); + return -1; + } + + if (write(fd, *magicp, sizeof(**magicp) * *nmagicp) + != sizeof(**magicp) * *nmagicp) { + (void)fprintf(stderr, "%s: error writing `%s' (%s)\n", + __progname, dbname, strerror(errno)); + return -1; + } + + (void)close(fd); + return 0; +} +/*@=bounds@*/ + +/* + * handle a compiled file. + */ +/*@-bounds@*/ +static int +apprentice_map(/*@unused@*/ const fmagic fm, + /*@out@*/ struct magic **magicp, /*@out@*/ uint32_t *nmagicp, + const char *fn, /*@unused@*/ int action) + /*@globals fileSystem, internalState @*/ + /*@modifies *magicp, *nmagicp, fileSystem, internalState @*/ +{ + int fd; + struct stat st; + uint32_t *ptr; + uint32_t version; + int needsbyteswap; + char *dbname = mkdbname(fn); + void *mm = NULL; + + if (dbname == NULL) + return -1; + + if ((fd = open(dbname, O_RDONLY)) == -1) + return -1; + + if (fstat(fd, &st) == -1) { + (void)fprintf(stderr, "%s: Cannot stat `%s' (%s)\n", + __progname, dbname, strerror(errno)); + goto errxit; + } + +#ifdef HAVE_MMAP + if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) { + (void)fprintf(stderr, "%s: Cannot map `%s' (%s)\n", + __progname, dbname, strerror(errno)); + goto errxit; + } +#else + mm = xmalloc((size_t)st.st_size); + if (read(fd, mm, (size_t)st.st_size) != (size_t)st.st_size) { + (void) fprintf(stderr, "%s: Read failed (%s).\n", __progname, + strerror(errno)); + goto errxit; + } +#endif + *magicp = mm; + (void)close(fd); + fd = -1; + ptr = (uint32_t *) *magicp; + if (ptr == NULL) /* XXX can't happen */ + goto errxit; + if (*ptr != MAGICNO) { + if (swap4(*ptr) != MAGICNO) { + (void)fprintf(stderr, "%s: Bad magic in `%s'\n", + __progname, dbname); + goto errxit; + } + needsbyteswap = 1; + } else + needsbyteswap = 0; + if (needsbyteswap) + version = swap4(ptr[1]); + else + version = ptr[1]; + if (version != VERSIONNO) { + (void)fprintf(stderr, + "%s: version mismatch (%d != %d) in `%s'\n", + __progname, version, VERSIONNO, dbname); + goto errxit; + } + *nmagicp = (st.st_size / sizeof(**magicp)) - 1; + (*magicp)++; + if (needsbyteswap) + byteswap(*magicp, *nmagicp); + return 0; + +errxit: + if (fd != -1) + (void)close(fd); +/*@-branchstate@*/ + if (mm != NULL) { +#ifdef HAVE_MMAP + (void)munmap(mm, (size_t)st.st_size); +#else + free(mm); +#endif + } else { + *magicp = NULL; + *nmagicp = 0; + } +/*@=branchstate@*/ + return -1; +} +/*@=bounds@*/ + +/* + * Handle one file. + */ +static int +apprentice_1(fmagic fm, const char *fn, int action) + /*@globals fileSystem, internalState @*/ + /*@modifies fm, fileSystem, internalState @*/ +{ +/*@-shadow@*/ + struct magic *magic = NULL; + uint32_t nmagic = 0; +/*@=shadow@*/ + struct mlist *ml; + int rv = -1; + + if (action == COMPILE) { + rv = apprentice_file(fm, &magic, &nmagic, fn, action); + if (rv) + return rv; + return apprentice_compile(fm, &magic, &nmagic, fn, action); + } +#ifndef COMPILE_ONLY + if ((rv = apprentice_map(fm, &magic, &nmagic, fn, action)) != 0) + (void)fprintf(stderr, "%s: Using regular magic file `%s'\n", + __progname, fn); + + if (rv != 0) + rv = apprentice_file(fm, &magic, &nmagic, fn, action); + + if (rv != 0) + return rv; + + if (magic == NULL || nmagic == 0) + return rv; + + ml = xmalloc(sizeof(*ml)); + + ml->magic = magic; + ml->nmagic = nmagic; + + fm->mlist->prev->next = ml; + ml->prev = fm->mlist->prev; +/*@-immediatetrans@*/ + ml->next = fm->mlist; +/*@=immediatetrans@*/ +/*@-kepttrans@*/ + fm->mlist->prev = ml; +/*@=kepttrans@*/ + +/*@-compdef -compmempass @*/ + return rv; +/*@=compdef =compmempass @*/ +#endif /* COMPILE_ONLY */ +} + +/* const char *fn: list of magic files */ +int +fmagicSetup(fmagic fm, const char *fn, int action) +{ + char *p, *mfn; + int file_err, errs = -1; + + if (fm->mlist == NULL) { + static struct mlist mlist; +/*@-immediatetrans@*/ + mlist.next = &mlist; + mlist.prev = &mlist; + fm->mlist = &mlist; +/*@=immediatetrans@*/ + } + + mfn = xstrdup(fn); + fn = mfn; + +/*@-branchstate@*/ + while (fn != NULL) { + p = strchr(fn, PATHSEP); + if (p != NULL) + *p++ = '\0'; + file_err = apprentice_1(fm, fn, action); + if (file_err > errs) + errs = file_err; + fn = p; + } +/*@=branchstate@*/ + if (errs == -1) + (void) fprintf(stderr, "%s: couldn't find any magic files!\n", + __progname); + if (action == CHECK && errs) + exit(EXIT_FAILURE); + + free(mfn); +/*@-compdef -compmempass@*/ + return errs; +/*@=compdef =compmempass@*/ +} + +#ifdef COMPILE_ONLY +int +main(int argc, char *argv[]) + /*@*/ +{ + static struct fmagic_s myfmagic; + fmagic fm = &myfmagic; + int ret; + + setprogname(argv[0]); /* Retrofit glibc __progname */ + + if (argc != 2) { + (void)fprintf(stderr, "usage: %s file\n", __progname); + exit(1); + } + fm->magicfile = argv[1]; + + exit(apprentice(fm, fm->magicfile, COMPILE)); +} +#endif /* COMPILE_ONLY */ diff --git a/file/src/ascmagic.c b/file/src/ascmagic.c new file mode 100644 index 0000000..37ef00b --- /dev/null +++ b/file/src/ascmagic.c @@ -0,0 +1,773 @@ +/* + * ASCII magic -- file types that we know based on keywords + * that can appear anywhere in the file. + * + * Copyright (c) Ian F. Darwin, 1987. + * Written by Ian F. Darwin. + * + * Extensively modified by Eric Fischer in July, 2000, + * to handle character codes other than ASCII on a unified basis. + * + * Joerg Wunsch wrote the original support for 8-bit + * international characters, now subsumed into this file. + */ + +/* + * This software is not subject to any license of the American Telephone + * and Telegraph Company or of the Regents of the University of California. + * + * Permission is granted to anyone to use this software for any purpose on + * any computer system, and to alter it and redistribute it freely, subject + * to the following restrictions: + * + * 1. The author is not responsible for the consequences of use of this + * software, no matter how awful, even if they arise from flaws in it. + * + * 2. The origin of this software must not be misrepresented, either by + * explicit claim or by omission. Since few users ever read sources, + * credits must appear in the documentation. + * + * 3. Altered versions must be plainly marked as such, and must not be + * misrepresented as being the original software. Since few users + * ever read sources, credits must appear in the documentation. + * + * 4. This notice may not be removed or altered. + */ + +#include "system.h" +#include "file.h" +#include "names.h" +#include "tar.h" +#include "debug.h" + +FILE_RCSID("@(#)Id: ascmagic.c,v 1.32 2002/07/03 18:26:37 christos Exp ") + +/*@access fmagic @*/ + +/* + * Stolen (by the author!) from the public domain tar program: + * Public Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu). + */ +#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') ) + +/* + * Quick and dirty octal conversion. + * + * Result is -1 if the field is invalid (all blank, or nonoctal). + */ +/*@-bounds@*/ +static int +from_oct(int digs, char *where) + /*@*/ +{ + int value; + + while (isspace((unsigned char)*where)) { /* Skip spaces */ + where++; + if (--digs <= 0) + return -1; /* All blank field */ + } + value = 0; +/*@-shiftimplementation@*/ + while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */ + value = (value << 3) | (*where++ - '0'); + --digs; + } +/*@=shiftimplementation@*/ + + if (digs > 0 && *where && !isspace((unsigned char)*where)) + return -1; /* Ended on non-space/nul */ + + return value; +} +/*@=bounds@*/ + +/* + * Return + * 0 if the checksum is bad (i.e., probably not a tar archive), + * 1 for old UNIX tar file, + * 2 for Unix Std (POSIX) tar file. + */ +static int +is_tar(const fmagic fm) + /*@*/ +{ + int nb = fm->nb; + union record *header = (union record *)fm->buf; + int i; + int sum, recsum; + char *p; + + if (nb < sizeof(*header)) + return 0; + + recsum = from_oct(8, header->header.chksum); + + sum = 0; + p = header->charptr; +/*@-sizeoftype@*/ + for (i = sizeof(union record); --i >= 0;) +/*@=sizeoftype@*/ + { + /* + * We can't use unsigned char here because of old compilers, + * e.g. V7. + */ + sum += 0xFF & *p++; + } + + /* Adjust checksum to count the "chksum" field as blanks. */ + for (i = sizeof(header->header.chksum); --i >= 0;) + sum -= 0xFF & header->header.chksum[i]; + sum += ' ' * sizeof header->header.chksum; + + if (sum != recsum) + return 0; /* Not a tar archive */ + + if (!strcmp(header->header.magic, TARMAGIC)) + return 2; /* Unix Standard tar archive */ + + return 1; /* Old fashioned tar archive */ +} +typedef unsigned long unichar; + +#define MAXLINELEN 300 /* longest sane line length */ +#define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \ + || (x) == 0x85 || (x) == '\f') + +/* + * This table reflects a particular philosophy about what constitutes + * "text," and there is room for disagreement about it. + * + * Version 3.31 of the file command considered a file to be ASCII if + * each of its characters was approved by either the isascii() or + * isalpha() function. On most systems, this would mean that any + * file consisting only of characters in the range 0x00 ... 0x7F + * would be called ASCII text, but many systems might reasonably + * consider some characters outside this range to be alphabetic, + * so the file command would call such characters ASCII. It might + * have been more accurate to call this "considered textual on the + * local system" than "ASCII." + * + * It considered a file to be "International language text" if each + * of its characters was either an ASCII printing character (according + * to the real ASCII standard, not the above test), a character in + * the range 0x80 ... 0xFF, or one of the following control characters: + * backspace, tab, line feed, vertical tab, form feed, carriage return, + * escape. No attempt was made to determine the language in which files + * of this type were written. + * + * + * The table below considers a file to be ASCII if all of its characters + * are either ASCII printing characters (again, according to the X3.4 + * standard, not isascii()) or any of the following controls: bell, + * backspace, tab, line feed, form feed, carriage return, esc, nextline. + * + * I include bell because some programs (particularly shell scripts) + * use it literally, even though it is rare in normal text. I exclude + * vertical tab because it never seems to be used in real text. I also + * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85), + * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline + * character to. It might be more appropriate to include it in the 8859 + * set instead of the ASCII set, but it's got to be included in *something* + * we recognize or EBCDIC files aren't going to be considered textual. + * Some old Unix source files use SO/SI (^N/^O) to shift between Greek + * and Latin characters, so these should possibly be allowed. But they + * make a real mess on VT100-style displays if they're not paired properly, + * so we are probably better off not calling them text. + * + * A file is considered to be ISO-8859 text if its characters are all + * either ASCII, according to the above definition, or printing characters + * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF. + * + * Finally, a file is considered to be international text from some other + * character code if its characters are all either ISO-8859 (according to + * the above definition) or characters in the range 0x80 ... 0x9F, which + * ISO-8859 considers to be control characters but the IBM PC and Macintosh + * consider to be printing characters. + */ + +#define F 0 /* character never appears in text */ +#define T 1 /* character appears in plain ASCII text */ +#define I 2 /* character appears in ISO-8859 text */ +#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */ + +/*@unchecked@*/ /*@observer@*/ +static char text_chars[256] = { + /* BEL BS HT LF FF CR */ + F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */ + /* ESC */ + F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */ + T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */ + /* NEL */ + X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */ + X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */ + I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */ +}; + +/*@-bounds@*/ +static int +looks_ascii(const unsigned char *buf, int nb, + /*@out@*/ unichar *ubuf, /*@out@*/ int *ulen) + /*@modifies *ubuf, *ulen @*/ +{ + int i; + + *ulen = 0; + + for (i = 0; i < nb; i++) { + int t = text_chars[buf[i]]; + + if (t != T) + return 0; + + ubuf[(*ulen)++] = buf[i]; + } + + return 1; +} +/*@=bounds@*/ + +/*@-bounds@*/ +static int +looks_latin1(const unsigned char *buf, int nb, + /*@out@*/ unichar *ubuf, /*@out@*/ int *ulen) + /*@modifies *ubuf, *ulen @*/ +{ + int i; + + *ulen = 0; + + for (i = 0; i < nb; i++) { + int t = text_chars[buf[i]]; + + if (t != T && t != I) + return 0; + + ubuf[(*ulen)++] = buf[i]; + } + + return 1; +} +/*@=bounds@*/ + +/*@-bounds@*/ +static int +looks_extended(const unsigned char *buf, int nb, + /*@out@*/ unichar *ubuf, /*@out@*/ int *ulen) + /*@modifies *ubuf, *ulen @*/ +{ + int i; + + *ulen = 0; + + for (i = 0; i < nb; i++) { + int t = text_chars[buf[i]]; + + if (t != T && t != I && t != X) + return 0; + + ubuf[(*ulen)++] = buf[i]; + } + + return 1; +} +/*@=bounds@*/ + +/*@-bounds@*/ +static int +looks_utf8(const unsigned char *buf, int nb, + /*@out@*/ unichar *ubuf, /*@out@*/ int *ulen) + /*@modifies *ubuf, *ulen @*/ +{ + int i, n; + unichar c; + int gotone = 0; + + *ulen = 0; + + for (i = 0; i < nb; i++) { + if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ + /* + * Even if the whole file is valid UTF-8 sequences, + * still reject it if it uses weird control characters. + */ + + if (text_chars[buf[i]] != T) + return 0; + + ubuf[(*ulen)++] = buf[i]; + } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */ + return 0; + } else { /* 11xxxxxx begins UTF-8 */ + int following; + + if ((buf[i] & 0x20) == 0) { /* 110xxxxx */ + c = buf[i] & 0x1f; + following = 1; + } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */ + c = buf[i] & 0x0f; + following = 2; + } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */ + c = buf[i] & 0x07; + following = 3; + } else if ((buf[i] & 0x04) == 0) { /* 111110xx */ + c = buf[i] & 0x03; + following = 4; + } else if ((buf[i] & 0x02) == 0) { /* 1111110x */ + c = buf[i] & 0x01; + following = 5; + } else + return 0; + + for (n = 0; n < following; n++) { + i++; + if (i >= nb) + goto done; + + if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40)) + return 0; + + c = (c << 6) + (buf[i] & 0x3f); + } + + ubuf[(*ulen)++] = c; + gotone = 1; + } + } +done: + return gotone; /* don't claim it's UTF-8 if it's all 7-bit */ +} +/*@=bounds@*/ + +/*@-bounds@*/ +static int +looks_unicode(const unsigned char *buf, int nb, + /*@out@*/ unichar *ubuf, /*@out@*/ int *ulen) + /*@modifies *ubuf, *ulen @*/ +{ + int bigend; + int i; + + if (nb < 2) + return 0; + + if (buf[0] == 0xff && buf[1] == 0xfe) + bigend = 0; + else if (buf[0] == 0xfe && buf[1] == 0xff) + bigend = 1; + else + return 0; + + *ulen = 0; + + for (i = 2; i + 1 < nb; i += 2) { + /* XXX fix to properly handle chars > 65536 */ + + if (bigend) + ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i]; + else + ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1]; + + if (ubuf[*ulen - 1] == 0xfffe) + return 0; + if (ubuf[*ulen - 1] < 128 && text_chars[ubuf[*ulen - 1]] != T) + return 0; + } + + return 1; +} +/*@=bounds@*/ + +#undef F +#undef T +#undef I +#undef X + +/* + * This table maps each EBCDIC character to an (8-bit extended) ASCII + * character, as specified in the rationale for the dd(1) command in + * draft 11.2 (September, 1991) of the POSIX P1003.2 standard. + * + * Unfortunately it does not seem to correspond exactly to any of the + * five variants of EBCDIC documented in IBM's _Enterprise Systems + * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh + * Edition, July, 1999, pp. I-1 - I-4. + * + * Fortunately, though, all versions of EBCDIC, including this one, agree + * on most of the printing characters that also appear in (7-bit) ASCII. + * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all. + * + * Fortunately too, there is general agreement that codes 0x00 through + * 0x3F represent control characters, 0x41 a nonbreaking space, and the + * remainder printing characters. + * + * This is sufficient to allow us to identify EBCDIC text and to distinguish + * between old-style and internationalized examples of text. + */ + +/*@unchecked@*/ /*@observer@*/ +static unsigned char ebcdic_to_ascii[] = { + 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31, +128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7, +144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26, +' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|', +'&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~', +'-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?', +186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"', +195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201, +202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208, +209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215, +216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231, +'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237, +'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243, +'\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249, +'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255 +}; + +/* + * The following EBCDIC-to-ASCII table may relate more closely to reality, + * or at least to modern reality. It comes from + * + * http://ftp.s390.ibm.com/products/oe/bpxqp9.html + * + * and maps the characters of EBCDIC code page 1047 (the code used for + * Unix-derived software on IBM's 390 systems) to the corresponding + * characters from ISO 8859-1. + * + * If this table is used instead of the above one, some of the special + * cases for the NEL character can be taken out of the code. + */ + +/*@unchecked@*/ /*@unused@*/ /*@observer@*/ +static unsigned char ebcdic_1047_to_8859[] = { +0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, +0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, +0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, +0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, +0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C, +0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E, +0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F, +0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22, +0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1, +0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4, +0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE, +0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7, +0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5, +0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF, +0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5, +0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F +}; + +/* + * Copy buf[0 ... nb-1] into out[], translating EBCDIC to ASCII. + */ +/*@-bounds@*/ +static void +from_ebcdic(const unsigned char *buf, int nb, /*@out@*/ unsigned char *otp) + /*@modifies *otp @*/ +{ + int i; + + for (i = 0; i < nb; i++) { + otp[i] = ebcdic_to_ascii[buf[i]]; + } +} +/*@=bounds@*/ + +/*@-bounds@*/ +static int +fmagicAMatch(const unsigned char *s, const unichar *us, int ulen) + /*@*/ +{ + size_t i; + + for (i = 0; i < ulen; i++) { + if (s[i] != us[i]) + return 0; + } + + if (s[i]) + return 0; + else + return 1; +} +/*@=bounds@*/ + +/* int nb: size actually read */ +/*@-bounds@*/ +int +fmagicA(fmagic fm) +{ + unsigned char * buf = fm->buf; + int nb = fm->nb; + + char nbuf[HOWMANY+1]; /* one extra for terminating '\0' */ + unichar ubuf[HOWMANY+1]; /* one extra for terminating '\0' */ + int ulen; + struct names *p; + int i; + + char *code = NULL; + char *code_mime = NULL; + char *type = NULL; + char *subtype = NULL; + char *subtype_mime = NULL; + + int has_escapes = 0; + int has_backspace = 0; + + int n_crlf = 0; + int n_lf = 0; + int n_cr = 0; + int n_nel = 0; + + int last_line_end = -1; + int has_long_lines = 0; + + /* + * Do the tar test first, because if the first file in the tar + * archive starts with a dot, we can confuse it with an nroff file. + */ + switch (is_tar(fm)) { + case 1: + fmagicPrintf(fm, ((fm->flags & FMAGIC_FLAGS_MIME) + ? "application/x-tar" : "tar archive")); + return 1; + case 2: + fmagicPrintf(fm, ((fm->flags & FMAGIC_FLAGS_MIME) + ? "application/x-tar, POSIX" : "POSIX tar archive")); + return 1; + } + + /* + * Undo the NUL-termination kindly provided by fmagicProcess() + * but leave at least one byte to look at + */ + + while (nb > 1 && buf[nb - 1] == '\0') + nb--; + + /* + * Then try to determine whether it's any character code we can + * identify. Each of these tests, if it succeeds, will leave + * the text converted into one-unichar-per-character Unicode in + * ubuf, and the number of characters converted in ulen. + */ + if (looks_ascii(buf, nb, ubuf, &ulen)) { + code = "ASCII"; + code_mime = "us-ascii"; + type = "text"; + } else if (looks_utf8(buf, nb, ubuf, &ulen)) { + code = "UTF-8 Unicode"; + code_mime = "utf-8"; + type = "text"; + } else if ((i = looks_unicode(buf, nb, ubuf, &ulen))) { + if (i == 1) + code = "Little-endian UTF-16 Unicode"; + else + code = "Big-endian UTF-16 Unicode"; + + type = "character data"; + code_mime = "utf-16"; /* is this defined? */ + } else if (looks_latin1(buf, nb, ubuf, &ulen)) { + code = "ISO-8859"; + type = "text"; + code_mime = "iso-8859-1"; + } else if (looks_extended(buf, nb, ubuf, &ulen)) { + code = "Non-ISO extended-ASCII"; + type = "text"; + code_mime = "unknown"; + } else { + from_ebcdic(buf, nb, nbuf); + + if (looks_ascii(nbuf, nb, ubuf, &ulen)) { + code = "EBCDIC"; + type = "character data"; + code_mime = "ebcdic"; + } else if (looks_latin1(nbuf, nb, ubuf, &ulen)) { + code = "International EBCDIC"; + type = "character data"; + code_mime = "ebcdic"; + } else { + return 0; /* doesn't look like text at all */ + } + } + + /* + * for troff, look for . + letter + letter or .\"; + * this must be done to disambiguate tar archives' ./file + * and other trash from real troff input. + * + * I believe Plan 9 troff allows non-ASCII characters in the names + * of macros, so this test might possibly fail on such a file. + */ + if (*ubuf == '.') { + unichar *tp = ubuf + 1; + + while (ISSPC(*tp)) + ++tp; /* skip leading whitespace */ + if ((tp[0] == '\\' && tp[1] == '\"') || + (isascii(tp[0]) && isalnum(tp[0]) && + isascii(tp[1]) && isalnum(tp[1]) && + ISSPC(tp[2]))) { + subtype_mime = "text/troff"; + subtype = "troff or preprocessor input"; + goto subtype_identified; + } + } + + if ((*buf == 'c' || *buf == 'C') && ISSPC(buf[1])) { + subtype_mime = "text/fortran"; + subtype = "fortran program"; + goto subtype_identified; + } + + /* look for tokens from names.h - this is expensive! */ + + i = 0; + while (i < ulen) { + int end; + + /* + * skip past any leading space + */ + while (i < ulen && ISSPC(ubuf[i])) + i++; + if (i >= ulen) + break; + + /* + * find the next whitespace + */ + for (end = i + 1; end < nb; end++) + if (ISSPC(ubuf[end])) + /*@innerbreak@*/ break; + + /* + * compare the word thus isolated against the token list + */ +/*@-sizeoftype@*/ + for (p = names; p < names + NNAMES; p++) +/*@=sizeoftype@*/ + { + if (p->name == NULL) + /*@innerbreak@*/ break; + if (fmagicAMatch(p->name, ubuf + i, end - i)) { + subtype = types[p->type].human; + subtype_mime = types[p->type].mime; + goto subtype_identified; + } + } + + i = end; + } + +subtype_identified: + + /* + * Now try to discover other details about the file. + */ + for (i = 0; i < ulen; i++) { + if (i > last_line_end + MAXLINELEN) + has_long_lines = 1; + + if (ubuf[i] == '\033') + has_escapes = 1; + if (ubuf[i] == '\b') + has_backspace = 1; + + if (ubuf[i] == '\r' && (i + 1 < ulen && ubuf[i + 1] == '\n')) { + n_crlf++; + last_line_end = i; + } + if (ubuf[i] == '\r' && (i + 1 >= ulen || ubuf[i + 1] != '\n')) { + n_cr++; + last_line_end = i; + } + if (ubuf[i] == '\n' && (i - 1 < 0 || ubuf[i - 1] != '\r')) { + n_lf++; + last_line_end = i; + } + if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */ + n_nel++; + last_line_end = i; + } + } + + if ((fm->flags & FMAGIC_FLAGS_MIME)) { + if (subtype_mime != NULL) + fmagicPrintf(fm, subtype_mime); + else + fmagicPrintf(fm, "text/plain"); + + if (code_mime != NULL) { + fmagicPrintf(fm, "; charset="); + fmagicPrintf(fm, code_mime); + } + } else { + fmagicPrintf(fm, code); + + if (subtype != NULL) { + fmagicPrintf(fm, " "); + fmagicPrintf(fm, subtype); + } + fmagicPrintf(fm, " "); + fmagicPrintf(fm, type); + + if (has_long_lines) + fmagicPrintf(fm, ", with very long lines"); + + /* + * Only report line terminators if we find one other than LF, + * or if we find none at all. + */ + if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) || + (n_crlf != 0 || n_cr != 0 || n_nel != 0)) { + fmagicPrintf(fm, ", with"); + + if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) + fmagicPrintf(fm, " no"); + else { + if (n_crlf) { + fmagicPrintf(fm, " CRLF"); + if (n_cr || n_lf || n_nel) + fmagicPrintf(fm, ","); + } + if (n_cr) { + fmagicPrintf(fm, " CR"); + if (n_lf || n_nel) + fmagicPrintf(fm, ","); + } + if (n_lf) { + fmagicPrintf(fm, " LF"); + if (n_nel) + fmagicPrintf(fm, ","); + } + if (n_nel) + fmagicPrintf(fm, " NEL"); + } + + fmagicPrintf(fm, " line terminators"); + } + + if (has_escapes) + fmagicPrintf(fm, ", with escape sequences"); + if (has_backspace) + fmagicPrintf(fm, ", with overstriking"); + } + + return 1; +} +/*@=bounds@*/ diff --git a/file/src/compress.c b/file/src/compress.c new file mode 100644 index 0000000..6fd8de3 --- /dev/null +++ b/file/src/compress.c @@ -0,0 +1,345 @@ + +#include "system.h" +#include "file.h" +#include "debug.h" + +FILE_RCSID("@(#)Id: compress.c,v 1.25 2002/07/03 18:26:37 christos Exp ") + +/*@access fmagic @*/ + +/*@-nullassign@*/ +/*@unchecked@*/ +static struct { +/*@observer@*/ + const char *magic; + int maglen; +/*@observer@*/ + const char *const argv[3]; + int silent; +} compr[] = { + { "\037\235", 2, { "gzip", "-cdq", NULL }, 1 }, /* compressed */ + /* Uncompress can get stuck; so use gzip first if we have it + * Idea from Damien Clark, thanks! */ + { "\037\235", 2, { "uncompress", "-c", NULL }, 1 }, /* compressed */ + { "\037\213", 2, { "gzip", "-cdq", NULL }, 1 }, /* gzipped */ + { "\037\236", 2, { "gzip", "-cdq", NULL }, 1 }, /* frozen */ + { "\037\240", 2, { "gzip", "-cdq", NULL }, 1 }, /* SCO LZH */ + /* the standard pack utilities do not accept standard input */ + { "\037\036", 2, { "gzip", "-cdq", NULL }, 0 }, /* packed */ + { "BZh", 3, { "bzip2", "-cd", NULL }, 1 }, /* bzip2-ed */ +}; +/*@=nullassign@*/ + +/*@unchecked@*/ +static int ncompr = sizeof(compr) / sizeof(compr[0]); + +/* + * `safe' write for sockets and pipes. + */ +static int +swrite(int fd, const void *buf, size_t n) + /*@*/ +{ + int rv; + size_t rn = n; + + do { + switch (rv = write(fd, buf, n)) { + case -1: + if (errno == EINTR) + continue; + return -1; + default: + n -= rv; + buf = ((const char *)buf) + rv; + /*@switchbreak@*/ break; + } + } while (n > 0); + return rn; +} + + +/* + * `safe' read for sockets and pipes. + */ +static int +sread(int fd, /*@out@*/ void *buf, size_t n) + /*@modifies *buf @*/ +{ + int rv; + size_t rn = n; + + do { + switch (rv = read(fd, buf, n)) { + case -1: + if (errno == EINTR) + continue; + return -1; + case 0: + return rn - n; + default: + n -= rv; + buf = ((char *)buf) + rv; + /*@switchbreak@*/ break; + } + } while (n > 0); + return rn; +} + +int +pipe2file(int fd, void *startbuf, size_t nbytes) +{ + char buf[4096]; + int r, tfd; + + (void)strcpy(buf, "/tmp/file.XXXXXX"); +#ifndef HAVE_MKSTEMP + { + char *ptr = mktemp(buf); + tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600); + r = errno; + (void)unlink(ptr); + errno = r; + } +#else + tfd = mkstemp(buf); + r = errno; + (void)unlink(buf); + errno = r; +#endif + if (tfd == -1) { + error(EXIT_FAILURE, 0, "Can't create temporary file for pipe copy (%s)\n", + strerror(errno)); + /*@notreached@*/ + } + + if (swrite(tfd, startbuf, nbytes) != nbytes) + r = 1; + else { + while ((r = sread(fd, buf, sizeof(buf))) > 0) + if (swrite(tfd, buf, r) != r) + break; + } + + switch (r) { + case -1: + error(EXIT_FAILURE, 0, "Error copying from pipe to temp file (%s)\n", + strerror(errno)); + /*@notreached@*/break; + case 0: + break; + default: + error(EXIT_FAILURE, 0, "Error while writing to temp file (%s)\n", + strerror(errno)); + /*@notreached@*/ + } + + /* + * We duplicate the file descriptor, because fclose on a + * tmpfile will delete the file, but any open descriptors + * can still access the phantom inode. + */ + if ((fd = dup2(tfd, fd)) == -1) { + error(EXIT_FAILURE, 0, "Couldn't dup destcriptor for temp file(%s)\n", + strerror(errno)); + /*@notreached@*/ + } + (void)close(tfd); + if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) { + error(EXIT_FAILURE, 0, "Couldn't seek on temp file (%s)\n", strerror(errno)); + /*@notreached@*/ + } + return fd; +} + +#ifdef HAVE_LIBZ + +#define FHCRC (1 << 1) +#define FEXTRA (1 << 2) +#define FNAME (1 << 3) +#define FCOMMENT (1 << 4) + +/*@-bounds@*/ +static int +uncompressgzipped(const unsigned char *old, + /*@out@*/ unsigned char **newch, int n) + /*@globals fileSystem @*/ + /*@modifies *newch, fileSystem @*/ +{ + unsigned char flg = old[3]; + int data_start = 10; + z_stream z; + int rc; + + if (flg & FEXTRA) + data_start += 2 + old[data_start] + old[data_start + 1] * 256; + if (flg & FNAME) { + while(old[data_start]) + data_start++; + data_start++; + } + if(flg & FCOMMENT) { + while(old[data_start]) + data_start++; + data_start++; + } + if(flg & FHCRC) + data_start += 2; + + *newch = (unsigned char *) xmalloc(HOWMANY + 1); + + z.next_in = (Bytef *)(old + data_start); + z.avail_in = n - data_start; + z.next_out = *newch; + z.avail_out = HOWMANY; + z.zalloc = NULL; + z.zfree = NULL; + z.opaque = NULL; + +/*@-sizeoftype@*/ +/*@-type@*/ + rc = inflateInit2(&z, -15); +/*@=type@*/ +/*@=sizeoftype@*/ + if (rc != Z_OK) { + fprintf(stderr,"%s: zlib: %s\n", __progname, + (z.msg != NULL ? z.msg : "")); + return 0; + } + +/*@-type@*/ + rc = inflate(&z, Z_SYNC_FLUSH); +/*@=type@*/ + if (rc != Z_OK && rc != Z_STREAM_END) { + fprintf(stderr,"%s: zlib: %s\n", __progname, + (z.msg != NULL ? z.msg : "")); + return 0; + } + + n = z.total_out; +/*@-type@*/ + (void) inflateEnd(&z); +/*@=type@*/ + + /* let's keep the nul-terminate tradition */ + (*newch)[n++] = '\0'; + + return n; +} +/*@=bounds@*/ +#endif + +/*@-bounds@*/ +static int +uncompressbuf(int method, const unsigned char *old, + /*@out@*/ unsigned char **newch, int n) + /*@globals fileSystem, internalState @*/ + /*@modifies *newch, fileSystem, internalState @*/ +{ + int fdin[2], fdout[2]; + pid_t pid; + +#ifdef HAVE_LIBZ + if (method == 2) + return uncompressgzipped(old,newch,n); +#endif + + if (pipe(fdin) == -1 || pipe(fdout) == -1) { + error(EXIT_FAILURE, 0, "cannot create pipe (%s).\n", strerror(errno)); + /*@notreached@*/ + } + + switch ((pid = fork())) { + case 0: /* child */ + (void) close(0); + (void) dup(fdin[0]); + (void) close(fdin[0]); + (void) close(fdin[1]); + + (void) close(1); + (void) dup(fdout[1]); + (void) close(fdout[0]); + (void) close(fdout[1]); + if (compr[method].silent) + (void) close(2); + + (void) execvp(compr[method].argv[0], + (char *const *)compr[method].argv); + exit(EXIT_FAILURE); + /*@notreached@*/ break; + case -1: + error(EXIT_FAILURE, 0, "could not fork (%s).\n", strerror(errno)); + /*@notreached@*/break; + + default: /* parent */ + (void) close(fdin[0]); + (void) close(fdout[1]); + + n--; /* The buffer is NUL terminated, and we don't need that. */ + if (swrite(fdin[1], old, n) != n) { + n = 0; + goto errxit; + } + (void) close(fdin[1]); + fdin[1] = -1; + *newch = (unsigned char *) xmalloc(HOWMANY + 1); + if ((n = sread(fdout[0], *newch, HOWMANY)) <= 0) { + free(*newch); + n = 0; + goto errxit; + } + /* NUL terminate, as every buffer is handled here. */ + (*newch)[n++] = '\0'; +errxit: + if (fdin[1] != -1) + (void) close(fdin[1]); + (void) close(fdout[0]); + pid = waitpid(pid, NULL, 0); + return n; + } + /*@notreached@*/ + return 0; +} +/*@=bounds@*/ + +/* + * compress routines: + * fmagicZ() - returns 0 if not recognized, uncompresses and prints + * information if recognized + */ +int +fmagicZ(fmagic fm) +{ + unsigned char * buf = fm->buf; + int nb = fm->nb; + unsigned char * newbuf; + int newsize; + int i; + + for (i = 0; i < ncompr; i++) { +/*@-boundsread@*/ + if (nb < compr[i].maglen) + continue; +/*@=boundsread@*/ + if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0 && + (newsize = uncompressbuf(i, buf, &newbuf, nb)) != 0) { + fm->buf = newbuf; + fm->nb = newsize; + (void) fmagicF(fm, 1); + fm->buf = buf; + fm->nb = nb; +/*@-kepttrans@*/ + free(newbuf); +/*@=kepttrans@*/ + printf(" ("); + (void) fmagicF(fm, 0); + printf(")"); + return 1; + } + } + + if (i == ncompr) + return 0; + + return 1; +} diff --git a/file/src/file.c b/file/src/file.c new file mode 100644 index 0000000..ee69d21 --- /dev/null +++ b/file/src/file.c @@ -0,0 +1,352 @@ +/* + * file - find type of a file or files - main program. + * + * Copyright (c) Ian F. Darwin, 1987. + * Written by Ian F. Darwin. + * + * This software is not subject to any license of the American Telephone + * and Telegraph Company or of the Regents of the University of California. + * + * Permission is granted to anyone to use this software for any purpose on + * any computer system, and to alter it and redistribute it freely, subject + * to the following restrictions: + * + * 1. The author is not responsible for the consequences of use of this + * software, no matter how awful, even if they arise from flaws in it. + * + * 2. The origin of this software must not be misrepresented, either by + * explicit claim or by omission. Since few users ever read sources, + * credits must appear in the documentation. + * + * 3. Altered versions must be plainly marked as such, and must not be + * misrepresented as being the original software. Since few users + * ever read sources, credits must appear in the documentation. + * + * 4. This notice may not be removed or altered. + */ + +#include "system.h" +#include "file.h" +#include "patchlevel.h" +#include "debug.h" + +FILE_RCSID("@(#)Id: file.c,v 1.66 2002/07/03 19:00:41 christos Exp ") + +/*@access fmagic @*/ + +#ifdef S_IFLNK +# define USAGE "Usage: %s [-bciknsvzL] [-f namefile] [-m magicfiles] file...\n" +#else +# define USAGE "Usage: %s [-bciknsvz] [-f namefile] [-m magicfiles] file...\n" +#endif + +#ifdef __EMX__ +static char *apptypeName = NULL; +int os2_apptype (const char *fn, char *buf, int nb); +#endif /* __EMX__ */ + +#ifndef MAXPATHLEN +#define MAXPATHLEN 512 +#endif + + /* Global command-line options */ +/*@unchecked@*/ +static int nobuffer = 0; /* Do not buffer stdout */ + +/* + * unwrap -- read a file of filenames, do each one. + */ +/*@-bounds@*/ +static void +unwrap(fmagic fm, char *fn) + /*@globals fileSystem, internalState @*/ + /*@modifies fm, fileSystem, internalState @*/ +{ + char buf[MAXPATHLEN]; + FILE *f; + int wid = 0, cwid; + int xx; + + if (strcmp("-", fn) == 0) { + f = stdin; + wid = 1; + } else { + if ((f = fopen(fn, "r")) == NULL) { + error(EXIT_FAILURE, 0, "Cannot open `%s' (%s).\n", fn, strerror(errno)); + /*@notreached@*/ + } + + while (fgets(buf, sizeof(buf), f) != NULL) { + cwid = strlen(buf) - 1; + if (cwid > wid) + wid = cwid; + } + + rewind(f); + } + +/*@-nullpass@*/ /* LCL: buf is null??? */ + while (fgets(buf, sizeof(buf), f) != NULL) +/*@=nullpass@*/ + { + buf[strlen(buf)-1] = '\0'; + fm->obp = fm->obuf; + *fm->obp = '\0'; + fm->nob = sizeof(fm->obuf); + xx = fmagicProcess(fm, buf, wid); + fprintf(stdout, "%s\n", fm->obuf); + if (nobuffer) + (void) fflush(stdout); + } + + (void) fclose(f); +} +/*@=bounds@*/ + +/*@exits@*/ +static void +usage(void) + /*@globals fileSystem @*/ + /*@modifies fileSystem @*/ +{ + (void)fprintf(stderr, USAGE, __progname); + (void)fprintf(stderr, "Usage: %s -C [-m magic]\n", __progname); +#ifdef HAVE_GETOPT_H + (void)fputs("Try `file --help' for more information.\n", stderr); +#endif + exit(EXIT_FAILURE); +} + +#ifdef HAVE_GETOPT_H +/*@exits@*/ +static void +help(void) + /*@globals fileSystem @*/ + /*@modifies fileSystem @*/ +{ + (void) puts( +"Usage: file [OPTION]... [FILE]...\n" +"Determine file type of FILEs.\n" +"\n" +" -m, --magic-file LIST use LIST as a colon-separated list of magic\n" +" number files\n" +" -z, --uncompress try to look inside compressed files\n" +" -b, --brief do not prepend filenames to output lines\n" +" -c, --checking-printout print the parsed form of the magic file, use in\n" +" conjunction with -m to debug a new magic file\n" +" before installing it\n" +" -f, --files-from FILE read the filenames to be examined from FILE\n" +" -i, --mime output mime type strings\n" +" -k, --keep-going don't stop at the first match\n" +" -L, --dereference causes symlinks to be followed\n" +" -n, --no-buffer do not buffer output\n" +" -s, --special-files treat special (block/char devices) files as\n" +" ordinary ones\n" +" --help display this help and exit\n" +" --version output version information and exit\n" +); + exit(0); +} +#endif + +/* + * main - parse arguments and handle options + */ +/*@-bounds@*/ +int +main(int argc, char **argv) + /*@globals global_fmagic, nobuffer, + default_magicfile, optind, + fileSystem, internalState @*/ + /*@modifies global_fmagic, nobuffer, + default_magicfile, optind, + fileSystem, internalState @*/ +{ + int xx; + int c; + int action = 0, didsomefiles = 0, errflg = 0, ret = 0, app = 0; + char *mime, *home, *usermagic; + fmagic fm = global_fmagic; + struct stat sb; +#define OPTSTRING "bcdf:ikm:nsvzCL" +#ifdef HAVE_GETOPT_H + int longindex = 0; +/*@-nullassign -readonlytrans@*/ + static struct option long_options[] = + { + {"version", 0, 0, 'v'}, + {"help", 0, 0, 0}, + {"brief", 0, 0, 'b'}, + {"checking-printout", 0, 0, 'c'}, + {"debug", 0, 0, 'd'}, + {"files-from", 1, 0, 'f'}, + {"mime", 0, 0, 'i'}, + {"keep-going", 0, 0, 'k'}, +#ifdef S_IFLNK + {"dereference", 0, 0, 'L'}, +#endif + {"magic-file", 1, 0, 'm'}, + {"uncompress", 0, 0, 'z'}, + {"no-buffer", 0, 0, 'n'}, + {"special-files", 0, 0, 's'}, + {"compile", 0, 0, 'C'}, + {0, 0, 0, 0}, + }; +/*@=nullassign =readonlytrans@*/ +#endif + +#if HAVE_MCHECK_H && HAVE_MTRACE + /*@-noeffect@*/ + mtrace(); /* Trace malloc only if MALLOC_TRACE=mtrace-output-file. */ + /*@=noeffect@*/ +#endif + +#ifdef LC_CTYPE + setlocale(LC_CTYPE, ""); /* makes islower etc work for other langs */ +#endif + +#ifdef __EMX__ + /* sh-like wildcard expansion! Shouldn't hurt at least ... */ + _wildcard(&argc, &argv); +#endif + +/*@-assignexpose@*/ + fm->magicfile = default_magicfile; +/*@=assignexpose@*/ + if ((usermagic = getenv("MAGIC")) != NULL) + fm->magicfile = usermagic; + else { + if ((home = getenv("HOME")) != NULL) { + size_t nb = strlen(home) + 8; + usermagic = xmalloc(nb); + (void)strcpy(usermagic, home); + (void)strcat(usermagic, "/.magic"); + if (stat(usermagic, &sb)<0) + free(usermagic); + else + fm->magicfile = usermagic; + } + } + +#ifndef HAVE_GETOPT_H + while ((c = getopt(argc, argv, OPTSTRING)) != -1) +#else + while ((c = getopt_long(argc, argv, OPTSTRING, long_options, + &longindex)) != -1) +#endif + { + switch (c) { +#ifdef HAVE_GETOPT_H + case 0 : + if (longindex == 1) + help(); + /*@switchbreak@*/ break; +#endif + case 'b': + fm->flags |= FMAGIC_FLAGS_BRIEF; + /*@switchbreak@*/ break; + case 'c': + action = CHECK; + /*@switchbreak@*/ break; + case 'C': + action = COMPILE; + /*@switchbreak@*/ break; + case 'd': + fm->flags |= FMAGIC_FLAGS_DEBUG; + /*@switchbreak@*/ break; + case 'f': + if (!app) { + ret = fmagicSetup(fm, fm->magicfile, action); + if (action) + exit(ret); + app = 1; + } + unwrap(fm, optarg); + ++didsomefiles; + /*@switchbreak@*/ break; + case 'i': + fm->flags |= FMAGIC_FLAGS_MIME; + mime = malloc(strlen(fm->magicfile) + sizeof(".mime")); + if (mime != NULL) { + (void)strcpy(mime, fm->magicfile); + (void)strcat(mime, ".mime"); + } + fm->magicfile = mime; + /*@switchbreak@*/ break; + case 'k': + fm->flags |= FMAGIC_FLAGS_CONTINUE; + /*@switchbreak@*/ break; + case 'm': +/*@-assignexpose@*/ + fm->magicfile = optarg; +/*@=assignexpose@*/ + /*@switchbreak@*/ break; + case 'n': + ++nobuffer; + /*@switchbreak@*/ break; + case 's': + fm->flags |= FMAGIC_FLAGS_SPECIAL; + /*@switchbreak@*/ break; + case 'v': + (void) fprintf(stdout, "%s-%d.%d\n", __progname, + FILE_VERSION_MAJOR, patchlevel); + (void) fprintf(stdout, "magic file from %s\n", + fm->magicfile); + return 1; + case 'z': + fm->flags |= FMAGIC_FLAGS_UNCOMPRESS; + /*@switchbreak@*/ break; +#ifdef S_IFLNK + case 'L': + fm->flags |= FMAGIC_FLAGS_FOLLOW; + /*@switchbreak@*/ break; +#endif + case '?': + default: + errflg++; + /*@switchbreak@*/ break; + } + } + + if (errflg) + usage(); + + if (!app) { +/*@-nullpass@*/ /* FIX: fm->magicfile may be null */ + ret = fmagicSetup(fm, fm->magicfile, action); +/*@=nullpass@*/ + if (action) + exit(ret); + app = 1; + } + + if (optind == argc) { + if (!didsomefiles) + usage(); + } else { + int i, wid, nw; + for (wid = 0, i = optind; i < argc; i++) { + nw = strlen(argv[i]); + if (nw > wid) + wid = nw; + } + for (; optind < argc; optind++) { + fm->obp = fm->obuf; + *fm->obp = '\0'; + fm->nob = sizeof(fm->obuf); + xx = fmagicProcess(fm, argv[optind], wid); + fprintf(stdout, "%s\n", fm->obuf); + if (nobuffer) + (void) fflush(stdout); + } + } + +#if HAVE_MCHECK_H && HAVE_MTRACE + /*@-noeffect@*/ + muntrace(); /* Trace malloc only if MALLOC_TRACE=mtrace-output-file. */ + /*@=noeffect@*/ +#endif + + return 0; +} +/*@=bounds@*/ diff --git a/file/src/file.h b/file/src/file.h new file mode 100644 index 0000000..72b9e8c --- /dev/null +++ b/file/src/file.h @@ -0,0 +1,224 @@ +/* + * file.h - definitions for file(1) program + * @(#)Id: file.h,v 1.43 2002/07/03 18:57:52 christos Exp + * + * Copyright (c) Ian F. Darwin, 1987. + * Written by Ian F. Darwin. + * + * This software is not subject to any license of the American Telephone + * and Telegraph Company or of the Regents of the University of California. + * + * Permission is granted to anyone to use this software for any purpose on + * any computer system, and to alter it and redistribute it freely, subject + * to the following restrictions: + * + * 1. The author is not responsible for the consequences of use of this + * software, no matter how awful, even if they arise from flaws in it. + * + * 2. The origin of this software must not be misrepresented, either by + * explicit claim or by omission. Since few users ever read sources, + * credits must appear in the documentation. + * + * 3. Altered versions must be plainly marked as such, and must not be + * misrepresented as being the original software. Since few users + * ever read sources, credits must appear in the documentation. + * + * 4. This notice may not be removed or altered. + */ + +#ifndef __file_h__ +#define __file_h__ + +/*@-redef@*/ + +#ifndef HOWMANY +# define HOWMANY 65536 /* how much of the file to look at */ +#endif +#define MAXMAGIS 4096 /* max entries in /etc/magic */ +#define MAXDESC 50 /* max leng of text description */ +#define MAXstring 32 /* max leng of "string" types */ + +#define MAGICNO 0xF11E041C +#define VERSIONNO 1 + +#define CHECK 1 +#define COMPILE 2 + +struct magic { + uint16_t cont_level; /* level of ">" */ + uint8_t nospflag; /* supress space character */ + uint8_t flag; +#define INDIR 1 /* if '>(...)' appears, */ +#define UNSIGNED 2 /* comparison is unsigned */ +#define OFFADD 4 /* if '>&' appears, */ + uint8_t reln; /* relation (0=eq, '>'=gt, etc) */ + uint8_t vallen; /* length of string value, if any */ + uint8_t type; /* int, short, long or string. */ + uint8_t in_type; /* type of indirrection */ +#define BYTE 1 +#define SHORT 2 +#define LONG 4 +#define STRING 5 +#define DATE 6 +#define BESHORT 7 +#define BELONG 8 +#define BEDATE 9 +#define LESHORT 10 +#define LELONG 11 +#define LEDATE 12 +#define PSTRING 13 +#define LDATE 14 +#define BELDATE 15 +#define LELDATE 16 +#define REGEX 17 + uint8_t in_op; /* operator for indirection */ + uint8_t mask_op; /* operator for mask */ +#define OPAND 1 +#define OPOR 2 +#define OPXOR 3 +#define OPADD 4 +#define OPMINUS 5 +#define OPMULTIPLY 6 +#define OPDIVIDE 7 +#define OPMODULO 8 +#define OPINVERSE 0x80 + int32_t offset; /* offset to magic number */ + int32_t in_offset; /* offset from indirection */ + union VALUETYPE { + uint8_t b; + uint16_t h; + uint32_t l; + char s[MAXstring]; + const char * buf; + uint8_t hs[2]; /* 2 bytes of a fixed-endian "short" */ + uint8_t hl[4]; /* 4 bytes of a fixed-endian "long" */ + } value; /* either number or string */ + uint32_t mask; /* mask before comparison with value */ + char desc[MAXDESC]; /* description */ +} __attribute__((__packed__)); + +#define BIT(A) (1 << (A)) +#define STRING_IGNORE_LOWERCASE BIT(0) +#define STRING_COMPACT_BLANK BIT(1) +#define STRING_COMPACT_OPTIONAL_BLANK BIT(2) +#define CHAR_IGNORE_LOWERCASE 'c' +#define CHAR_COMPACT_BLANK 'B' +#define CHAR_COMPACT_OPTIONAL_BLANK 'b' + + +/* list of magic entries */ +struct mlist { + struct magic *magic; /* array of magic entries */ + uint32_t nmagic; /* number of entries in array */ +/*@relnull@*/ + struct mlist *next; +/*@relnull@*/ + struct mlist *prev; +}; + +enum fmagicFlags_e { +/*@-enummemuse@*/ + FMAGIC_FLAGS_NONE = 0, +/*@=enummemuse@*/ + FMAGIC_FLAGS_DEBUG = (1 << 0), + FMAGIC_FLAGS_BRIEF = (1 << 1), /*!< brief output format */ + FMAGIC_FLAGS_MIME = (1 << 2), /*!< output as mime-types */ + FMAGIC_FLAGS_CONTINUE = (1 << 3), /*!< continue after 1st match */ + FMAGIC_FLAGS_FOLLOW = (1 << 4), /*!< follow symlinks? */ + FMAGIC_FLAGS_SPECIAL = (1 << 5), /*!< analyze block devices? */ + FMAGIC_FLAGS_UNCOMPRESS = (1 << 6) /*!< uncompress files? */ +}; + +struct fmagic_s { + int flags; /*!< bit(s) to control fmagic behavior. */ +/*@dependent@*/ /*@observer@*/ /*@relnull@*/ + const char *magicfile; /*!< name of the magic file */ + int lineno; /*!< current line number in magic file */ +/*@relnull@*/ + struct mlist * mlist; /*!< list of arrays of magic entries */ +/*@relnull@*/ + struct mlist * ml; /*!< current magic array item */ +/*@observer@*/ + const char * fn; /*!< current file name */ + int fd; /*!< current file descriptor */ + struct stat sb; /*!< current file stat(2) buffer */ +/*@relnull@*/ + unsigned char * buf; /*!< current file buffer */ + int nb; /*!< current no. bytes in file buffer */ + union VALUETYPE val; /*!< current magic expression value */ + int cls; /*!< Elf class */ + int swap; /*!< Elf swap bytes? */ +/*@dependent@*/ + char * obp; /*!< current output buffer pointer */ + size_t nob; /*!< bytes remaining in output buffer */ + char obuf[512]; /*!< output buffer */ +}; + +typedef /*@abstract@*/ struct fmagic_s * fmagic; + +/*@unchecked@*/ +extern fmagic global_fmagic; + +/*@unchecked@*//*@observer@*/ +extern const char * default_magicfile; + +#ifdef __cplusplus +extern "C" { +#endif + +/*@mayexit@*/ +extern int fmagicSetup(fmagic fm, const char *fn, int action) + /*@globals fileSystem, internalState @*/ + /*@modifies fm, fileSystem, internalState @*/; +extern int fmagicProcess(fmagic fm, const char *fn, int wid) + /*@globals fileSystem, internalState @*/ + /*@modifies fm, fileSystem, internalState @*/; + +extern int fmagicA(fmagic fm) + /*@modifies fm @*/; +extern int fmagicD(fmagic fm) + /*@globals fileSystem, internalState @*/ + /*@modifies fm, fileSystem, internalState @*/; +extern void fmagicE(fmagic fm) + /*@globals fileSystem, internalState @*/ + /*@modifies fm, fileSystem, internalState @*/; +extern int fmagicF(fmagic fm, int zfl) + /*@globals fileSystem, internalState @*/ + /*@modifies fm, fileSystem, internalState @*/; +extern int fmagicS(fmagic fm) + /*@globals fileSystem @*/ + /*@modifies fm, fileSystem @*/; +extern int fmagicZ(fmagic fm) + /*@globals fileSystem, internalState @*/ + /*@modifies fm, fileSystem, internalState @*/; + +extern void fmagicPrintf(const fmagic fm, const char *f, ...) + /*@modifies fm @*/; + +/*@observer@*/ +extern char *fmttime(long v, int local) + /*@*/; + +extern void magwarn(const char *f, ...) + /*@globals fileSystem @*/ + /*@modifies fileSystem @*/; +extern void mdump(struct magic *m) + /*@globals fileSystem @*/ + /*@modifies fileSystem @*/; +extern void showstr(FILE *fp, const char *s, int len) + /*@globals fileSystem @*/ + /*@modifies fp, fileSystem @*/; + +extern uint32_t signextend(struct magic *m, uint32_t v) + /*@globals fileSystem @*/ + /*@modifies fileSystem @*/; +extern int pipe2file(int fd, void *startbuf, size_t nbytes) + /*@globals errno, fileSystem, internalState @*/ + /*@modifies errno, fileSystem, internalState @*/; + +#ifdef __cplusplus +} +#endif + +/*@=redef@*/ +#endif /* __file_h__ */ diff --git a/file/src/fsmagic.c b/file/src/fsmagic.c new file mode 100644 index 0000000..81e7a0e --- /dev/null +++ b/file/src/fsmagic.c @@ -0,0 +1,385 @@ +/* + * fsmagic - magic based on filesystem info - directory, special files, etc. + * + * Copyright (c) Ian F. Darwin, 1987. + * Written by Ian F. Darwin. + * + * This software is not subject to any license of the American Telephone + * and Telegraph Company or of the Regents of the University of California. + * + * Permission is granted to anyone to use this software for any purpose on + * any computer system, and to alter it and redistribute it freely, subject + * to the following restrictions: + * + * 1. The author is not responsible for the consequences of use of this + * software, no matter how awful, even if they arise from flaws in it. + * + * 2. The origin of this software must not be misrepresented, either by + * explicit claim or by omission. Since few users ever read sources, + * credits must appear in the documentation. + * + * 3. Altered versions must be plainly marked as such, and must not be + * misrepresented as being the original software. Since few users + * ever read sources, credits must appear in the documentation. + * + * 4. This notice may not be removed or altered. + */ + +#include "system.h" +#include "file.h" +#include "debug.h" + +FILE_RCSID("@(#)Id: fsmagic.c,v 1.36 2002/07/03 19:00:41 christos Exp ") + +/*@access fmagic @*/ + +/*@-bounds@*/ +int +fmagicD(fmagic fm) +{ + const char * fn = fm->fn; + struct stat * st = &fm->sb; + int ret = 0; + int xx; + + /* + * Fstat is cheaper but fails for files you don't have read perms on. + * On 4.2BSD and similar systems, use lstat() to identify symlinks. + */ +#if defined(S_IFLNK) || defined(__LCLINT__) + if (!(fm->flags & FMAGIC_FLAGS_FOLLOW)) + ret = lstat(fn, st); + else +#endif + ret = stat(fn, st); /* don't merge into if; see "ret =" above */ + + if (ret) { + /* Yes, I do mean stdout. */ + /* No \n, caller will provide. */ + fmagicPrintf(fm, "can't stat `%s' (%s).", fn, strerror(errno)); + return 1; + } + + if ((fm->flags & FMAGIC_FLAGS_MIME)) { + if ((st->st_mode & S_IFMT) != S_IFREG) { + fmagicPrintf(fm, "application/x-not-regular-file"); + return 1; + } + } + else { +#if defined(S_ISUID) || defined(__LCLINT__) + if (st->st_mode & S_ISUID) fmagicPrintf(fm, "setuid "); +#endif +#if defined(S_ISGID) || defined(__LCLINT__) + if (st->st_mode & S_ISGID) fmagicPrintf(fm, "setgid "); +#endif +#if defined(S_ISVTX) || defined(__LCLINT__) + if (st->st_mode & S_ISVTX) fmagicPrintf(fm, "sticky "); +#endif + } + + switch (st->st_mode & S_IFMT) { + case S_IFDIR: + fmagicPrintf(fm, "directory"); + return 1; +#if defined(S_IFCHR) || defined(__LCLINT__) + case S_IFCHR: + /* + * If -s has been specified, treat character special files + * like ordinary files. Otherwise, just report that they + * are block special files and go on to the next file. + */ + if ((fm->flags & FMAGIC_FLAGS_SPECIAL)) + break; +#ifdef HAVE_STRUCT_STAT_ST_RDEV +# ifdef dv_unit + fmagicPrintf(fm, "character special (%d/%d/%d)", + major(st->st_rdev), + dv_unit(st->st_rdev), + dv_subunit(st->st_rdev)); +# else +/*@-shiftimplementation@*/ + fmagicPrintf(fm, "character special (%ld/%ld)", + (long) major(st->st_rdev), (long) minor(st->st_rdev)); +/*@=shiftimplementation@*/ +# endif +#else + fmagicPrintf(fm, "character special"); +#endif + return 1; +#endif +#if defined(S_IFBLK) || defined(__LCLINT__) + case S_IFBLK: + /* + * If -s has been specified, treat block special files + * like ordinary files. Otherwise, just report that they + * are block special files and go on to the next file. + */ + if ((fm->flags & FMAGIC_FLAGS_SPECIAL)) + break; +#ifdef HAVE_STRUCT_STAT_ST_RDEV +# ifdef dv_unit + fmagicPrintf(fm, "block special (%d/%d/%d)", + major(st->st_rdev), + dv_unit(st->st_rdev), + dv_subunit(st->st_rdev)); +# else +/*@-shiftimplementation@*/ + fmagicPrintf(fm, "block special (%ld/%ld)", + (long) major(st->st_rdev), (long) minor(st->st_rdev)); +/*@=shiftimplementation@*/ +# endif +#else + fmagicPrintf(fm, "block special"); +#endif + return 1; +#endif + /* TODO add code to handle V7 MUX and Blit MUX files */ +#if defined(S_IFIFO) || defined(__LCLINT__) + case S_IFIFO: + fmagicPrintf(fm, "fifo (named pipe)"); + return 1; +#endif +#if defined(S_IFDOOR) + case S_IFDOOR: + fmagicPrintf(fm, "door"); + return 1; +#endif +#if defined(S_IFLNK) || defined(__LCLINT__) + case S_IFLNK: + { + char buf[BUFSIZ+4]; + int nch; + struct stat tstatbuf; + + buf[0] = '\0'; + if ((nch = readlink(fn, buf, BUFSIZ-1)) <= 0) { + fmagicPrintf(fm, "unreadable symlink (%s).", strerror(errno)); + return 1; + } + buf[nch] = '\0'; /* readlink(2) needs this */ + + /* If broken symlink, say so and quit early. */ +/*@-branchstate@*/ + if (*buf == '/') { + if (stat(buf, &tstatbuf) < 0) { + fmagicPrintf(fm, "broken symbolic link to %s", buf); + return 1; + } + } + else { + char *tmp; + char buf2[BUFSIZ+BUFSIZ+4]; + + if ((tmp = strrchr(fn, '/')) == NULL) { + tmp = buf; /* in current directory anyway */ + } + else { + strcpy (buf2, fn); /* take directory part */ + buf2[tmp-fn+1] = '\0'; + strcat (buf2, buf); /* plus (relative) symlink */ + tmp = buf2; + } + if (stat(tmp, &tstatbuf) < 0) { + fmagicPrintf(fm, "broken symbolic link to %s", buf); + return 1; + } + } +/*@=branchstate@*/ + + /* Otherwise, handle it. */ + if ((fm->flags & FMAGIC_FLAGS_FOLLOW)) { + fmagicPrintf(fm, "\n"); + xx = fmagicProcess(fm, buf, strlen(buf)); + return 1; + } else { /* just print what it points to */ + fmagicPrintf(fm, "symbolic link to %s", buf); + } + } + return 1; +#endif +#if defined(S_IFSOCK) +#ifndef __COHERENT__ + case S_IFSOCK: + fmagicPrintf(fm, "socket"); + return 1; +#endif +#endif + case S_IFREG: + break; + default: + error(EXIT_FAILURE, 0, "invalid mode 0%o.\n", st->st_mode); + /*@notreached@*/ + } + + /* + * regular file, check next possibility + * + * If stat() tells us the file has zero length, report here that + * the file is empty, so we can skip all the work of opening and + * reading the file. + * But if the -s option has been given, we skip this optimization, + * since on some systems, stat() reports zero size for raw disk + * partitions. (If the block special device really has zero length, + * the fact that it is empty will be detected and reported correctly + * when we read the file.) + */ + if (!(fm->flags & FMAGIC_FLAGS_SPECIAL) && st->st_size == 0) { + fmagicPrintf(fm, ((fm->flags & FMAGIC_FLAGS_MIME) + ? "application/x-empty" : "empty")); + return 1; + } + return 0; +} +/*@=bounds@*/ + +int +fmagicF(fmagic fm, int zfl) +{ + /* + * The main work is done here! + * We have the file name and/or the data buffer to be identified. + */ + +#ifdef __EMX__ + /* + * Ok, here's the right place to add a call to some os-specific + * routine, e.g. + */ + if (os2_apptype(fn, buf, nb) == 1) + return 'o'; +#endif + /* try compression stuff */ + if (zfl && fmagicZ(fm)) + return 'z'; + + /* try tests in /etc/magic (or surrogate magic file) */ + if (fmagicS(fm)) + return 's'; + + /* try known keywords, check whether it is ASCII */ + if (fmagicA(fm)) + return 'a'; + + /* abandon hope, all ye who remain here */ + fmagicPrintf(fm, ((fm->flags & FMAGIC_FLAGS_MIME) + ? "application/octet-stream" : "data")); + return '\0'; +} + +/* + * fmagicProcess - process input file + */ +/*@-bounds@*/ +int +fmagicProcess(fmagic fm, const char *fn, int wid) +{ + static const char stdname[] = "standard input"; + char match = '\0'; + int ret = 0; + +/*@-assignexpose -temptrans @*/ + fm->fn = fn; +/*@=assignexpose =temptrans @*/ + fm->buf = xmalloc(HOWMANY+1); + fm->buf[0] = '\0'; + fm->nb = 0; + +/*@-branchstate@*/ + if (strcmp("-", fn) == 0) { + if (fstat(0, &fm->sb)<0) { + error(EXIT_FAILURE, 0, "cannot fstat `%s' (%s).\n", stdname, + strerror(errno)); + /*@notreached@*/ + } + fm->fn = stdname; + } +/*@=branchstate@*/ + + if (wid > 0 && !(fm->flags & FMAGIC_FLAGS_BRIEF)) + fmagicPrintf(fm, "%s:%*s ", fm->fn, + (int) (wid - strlen(fm->fn)), ""); + + if (fm->fn != stdname) { + /* + * first try judging the file based on its filesystem status + */ + if (fmagicD(fm) != 0) + goto exit; + + if ((fm->fd = open(fm->fn, O_RDONLY)) < 0) { + /* We can't open it, but we were able to stat it. */ + if (fm->sb.st_mode & 0002) + fmagicPrintf(fm, "writeable, "); + if (fm->sb.st_mode & 0111) + fmagicPrintf(fm, "executable, "); + fmagicPrintf(fm, "can't read `%s' (%s).", fm->fn, strerror(errno)); + goto exit; + } + } + + + /* + * try looking at the first HOWMANY bytes + */ + if ((fm->nb = read(fm->fd, (char *)fm->buf, HOWMANY)) == -1) { + error(EXIT_FAILURE, 0, "read failed (%s).\n", strerror(errno)); + /*@notreached@*/ + } + + if (fm->nb == 0) + fmagicPrintf(fm, ((fm->flags & FMAGIC_FLAGS_MIME) + ? "application/x-empty" : "empty"), fm); + else { + fm->buf[fm->nb++] = '\0'; /* null-terminate data buffer */ + match = fmagicF(fm, (fm->flags & FMAGIC_FLAGS_UNCOMPRESS)); + } + +#ifdef BUILTIN_ELF + if (match == 's' && fm->nb > 5) { + /* + * We matched something in the file, so this *might* + * be an ELF file, and the file is at least 5 bytes long, + * so if it's an ELF file it has at least one byte + * past the ELF magic number - try extracting information + * from the ELF headers that can't easily be extracted + * with rules in the magic file. + */ + fmagicE(fm); + } +#endif + + if (fm->fn != stdname) { +#ifdef RESTORE_TIME + /* + * Try to restore access, modification times if read it. + * This is really *bad* because it will modify the status + * time of the file... And of course this will affect + * backup programs + */ +# ifdef USE_UTIMES + struct timeval utsbuf[2]; + utsbuf[0].tv_sec = fm->sb.st_atime; + utsbuf[1].tv_sec = fm->sb.st_mtime; + + (void) utimes(fm->fn, utsbuf); /* don't care if loses */ +# else + struct utimbuf utbuf; + + utbuf.actime = fm->sb.st_atime; + utbuf.modtime = fm->sb.st_mtime; + (void) utime(fm->fn, &utbuf); /* don't care if loses */ +# endif +#endif + (void) close(fm->fd); + fm->fd = -1; + } + +exit: + if (fm->buf != NULL) + free(fm->buf); + fm->buf = NULL; + fm->nb = 0; + return ret; +} +/*@=bounds@*/ diff --git a/file/src/names.h b/file/src/names.h new file mode 100644 index 0000000..1da9f49 --- /dev/null +++ b/file/src/names.h @@ -0,0 +1,164 @@ +/* + * Names.h - names and types used by ascmagic in file(1). + * These tokens are here because they can appear anywhere in + * the first HOWMANY bytes, while tokens in MAGIC must + * appear at fixed offsets into the file. Don't make HOWMANY + * too high unless you have a very fast CPU. + * + * Copyright (c) Ian F. Darwin, 1987. + * Written by Ian F. Darwin. + * + * See LEGAL.NOTICE + * + * Id: names.h,v 1.19 2002/05/16 15:01:41 christos Exp + */ + +/* + modified by Chris Lowth - 9 April 2000 + to add mime type strings to the types table. +*/ + +/* these types are used to index the table 'types': keep em in sync! */ +#define L_C 0 /* first and foremost on UNIX */ +#define L_CC 1 /* Bjarne's postincrement */ +#define L_FORT 2 /* the oldest one */ +#define L_MAKE 3 /* Makefiles */ +#define L_PLI 4 /* PL/1 */ +#define L_MACH 5 /* some kinda assembler */ +#define L_ENG 6 /* English */ +#define L_PAS 7 /* Pascal */ +#define L_MAIL 8 /* Electronic mail */ +#define L_NEWS 9 /* Usenet Netnews */ +#define L_JAVA 10 /* Java code */ +#define L_HTML 11 /* HTML */ +#define L_BCPL 12 /* BCPL */ +#define L_M4 13 /* M4 */ + +/*@unchecked@*/ /*@unused@*/ /*@observer@*/ +static const struct { +/*@observer@*/ /*@null@*/ + char *human; +/*@observer@*/ /*@null@*/ + char *mime; +} types[] = { + { "C program", "text/x-c", }, + { "C++ program", "text/x-c++" }, + { "FORTRAN program", "text/x-fortran" }, + { "make commands", "text/x-makefile" }, + { "PL/1 program", "text/x-pl1" }, + { "assembler program", "text/x-asm" }, + { "English", "text/plain, English" }, + { "Pascal program", "text/x-pascal" }, + { "mail", "text/x-mail" }, + { "news", "text/x-news" }, + { "Java program", "text/x-java" }, + { "HTML document", "text/html", }, + { "BCPL program", "text/x-bcpl" }, + { "M4 macro language pre-processor", "text/x-m4" }, + { "can't happen error on names.h/types", "error/x-error" }, + { 0, 0} +}; + +/* + * XXX - how should we distinguish Java from C++? + * The trick used in a Debian snapshot, of having "extends" or "implements" + * as tags for Java, doesn't work very well, given that those keywords + * are often preceded by "class", which flags it as C++. + * + * Perhaps we need to be able to say + * + * If "class" then + * + * if "extends" or "implements" then + * Java + * else + * C++ + * endif + * + * Or should we use other keywords, such as "package" or "import"? + * Unfortunately, Ada95 uses "package", and Modula-3 uses "import", + * although I infer from the language spec at + * + * http://www.research.digital.com/SRC/m3defn/html/m3.html + * + * that Modula-3 uses "IMPORT" rather than "import", i.e. it must be + * in all caps. + * + * So, for now, we go with "import". We must put it before the C++ + * stuff, so that we don't misidentify Java as C++. Not using "package" + * means we won't identify stuff that defines a package but imports + * nothing; hopefully, very little Java code imports nothing (one of the + * reasons for doing OO programming is to import as much as possible + * and write only what you need to, right?). + * + * Unfortunately, "import" may cause us to misidentify English text + * as Java, as it comes after "the" and "The". Perhaps we need a fancier + * heuristic to identify Java? + */ +/*@unchecked@*/ /*@unused@*/ /*@observer@*/ +static struct names { +/*@observer@*/ /*@null@*/ + const char *name; + short type; +} names[] = { + /* These must be sorted by eye for optimal hit rate */ + /* Add to this list only after substantial meditation */ + {"dnl", L_M4}, + {"import", L_JAVA}, + {"\"libhdr\"", L_BCPL}, + {"\"LIBHDR\"", L_BCPL}, + {"//", L_CC}, + {"template", L_CC}, + {"virtual", L_CC}, + {"class", L_CC}, + {"public:", L_CC}, + {"private:", L_CC}, + {"/*", L_C}, /* must precede "The", "the", etc. */ + {"#include", L_C}, + {"char", L_C}, + {"The", L_ENG}, + {"the", L_ENG}, + {"double", L_C}, + {"extern", L_C}, + {"float", L_C}, + {"struct", L_C}, + {"union", L_C}, + {"CFLAGS", L_MAKE}, + {"LDFLAGS", L_MAKE}, + {"all:", L_MAKE}, + {".PRECIOUS", L_MAKE}, +/* Too many files of text have these words in them. Find another way + * to recognize Fortrash. + */ +#ifdef NOTDEF + {"subroutine", L_FORT}, + {"function", L_FORT}, + {"block", L_FORT}, + {"common", L_FORT}, + {"dimension", L_FORT}, + {"integer", L_FORT}, + {"data", L_FORT}, +#endif /*NOTDEF*/ + {".ascii", L_MACH}, + {".asciiz", L_MACH}, + {".byte", L_MACH}, + {".even", L_MACH}, + {".globl", L_MACH}, + {".text", L_MACH}, + {"clr", L_MACH}, + {"(input,", L_PAS}, + {"dcl", L_PLI}, + {"Received:", L_MAIL}, + {">From", L_MAIL}, + {"Return-Path:",L_MAIL}, + {"Cc:", L_MAIL}, + {"Newsgroups:", L_NEWS}, + {"Path:", L_NEWS}, + {"Organization:",L_NEWS}, + {"href=", L_HTML}, + {"HREF=", L_HTML}, + {"] + * + * Revision 1.35 2001/04/24 14:40:25 christos + * - rename magic file sgi to mips and fix it + * - add support for building magic.mgc + * - portability fixes for mmap() + * - try gzip before uncompress, because uncompress sometimes hangs + * - be more conservative about pipe reads and writes + * - many magic fixes + * + * Revision 1.34 2001/03/12 05:05:57 christos + * - new compiled magic format + * - lots of magic additions + * + * Revision 1.33 2000/11/13 00:30:50 christos + * - wordperfect magic fix: freebsd pr 9388 + * - more msdos fixes from freebsd pr's 20131 and 20812 + * - sas and spss magic [Bruce Foster] + * - mkinstalldirs [John Fremlin] + * - sgi opengl fixes [Michael Pruett] + * - netbsd magic fixes [Ignatios Souvatzis] + * - audio additions [Michael Pruett] + * - fix problem with non ansi RCSID [Andreas Ley] + * - oggs magic [Felix von Leitner] + * - gmon magic [Eugen Dedu] + * - TNEF magic [Joomy] + * - netpbm magic and misc other image stuff [Bryan Henderson] + * + * Revision 1.32 2000/08/05 18:24:18 christos + * Correct indianness detection in elf (Charles Hannum) + * FreeBSD elf core support (Guy Harris) + * Use gzip in systems that don't have uncompress (Anthon van der Neut) + * Internationalization/EBCDIC support (Eric Fisher) + * Many many magic changes + * + * Revision 1.31 2000/05/14 17:58:36 christos + * - new magic for claris files + * - new magic for mathematica and maple files + * - new magic for msvc files + * - new -k flag to keep going matching all possible entries + * - add the word executable on #! magic files, and fix the usage of + * the word script + * - lots of other magic fixes + * - fix typo test -> text + * + * Revision 1.30 2000/04/11 02:41:17 christos + * - add support for mime output (-i) + * - make sure we free memory in case realloc fails + * - magic fixes + * + * Revision 1.29 1999/11/28 20:02:29 christos + * new string/[Bcb] magic from anthon, and adjustments to the magic files to + * use it. + * + * Revision 1.28 1999/10/31 22:11:48 christos + * - add "char" type for compatibility with HP/UX + * - recognize HP/UX syntax &=n etc. + * - include errno.h for CYGWIN + * - conditionalize the S_IS* macros + * - revert the SHT_DYNSYM test that broke the linux stripped binaries test + * - lots of Magdir changes + * + * Revision 1.27 1999/02/14 17:21:41 christos + * Automake support and misc cleanups from Rainer Orth + * Enable reading character and block special files from Dale R. Worley + * + * Revision 1.26 1998/09/12 13:19:39 christos + * - add support for bi-endian indirect offsets (Richard Verhoeven) + * - add recognition for bcpl (Joseph Myers) + * - remove non magic files from Magdir to avoid difficulties building + * on os2 where files are case independent + * - magic fixes. + * + * Revision 1.25 1998/06/27 14:04:04 christos + * OLF patch Guy Harris + * Recognize java/html (debian linux) + * Const poisoning (debian linux) + * More magic! + * + * Revision 1.24 1998/02/15 23:20:38 christos + * Autoconf patch: Felix von Leitner + * More magic fixes + * Elf64 fixes + * + * Revision 1.23 1997/11/05 16:03:37 christos + * - correct elf prps offset for SunOS-2.5.1 [guy@netapp.com] + * - handle 64 bit time_t's correctly [ewt@redhat.com] + * - new mime style magic [clarosse@netvista.net] + * - new TI calculator magic [rmcguire@freenet.columbus.oh.us] + * - new figlet fonts [obrien@freebsd.org] + * - new cisco magic, and elf fixes [jhawk@bbnplanet.com] + * - -b flag addition, and x86 filesystem magic [vax@linkhead.paranoia.com] + * - s/Mpeg/MPEG, header and elf typo fixes [guy@netapp.com] + * - Windows/NT registry files, audio code [guy@netapp.com] + * - libGrx graphics lib fonts [guy@netapp.com] + * - PNG fixes [guy@netapp.com] + * - more m$ document magic [guy@netapp.com] + * - PPD files [guy@netapp.com] + * - archive magic cleanup [guy@netapp.com] + * - linux kernel magic cleanup [guy@netapp.com] + * - lecter magic [guy@netapp.com] + * - vgetty magic [guy@netapp.com] + * - sniffer additions [guy@netapp.com] + * + * Revision 1.22 1997/01/15 17:23:24 christos + * - add support for elf core files: find the program name under SVR4 [Ken Pizzini] + * - print strings only up to the first carriage return [various] + * - freebsd international ascii support [J Wunsch] + * - magic fixes and additions [Guy Harris] + * - 64 bit fixes [Larry Schwimmer] + * - support for both utime and utimes, but don't restore file access times + * by default [various] + * - \xXX only takes 2 hex digits, not 3. + * - re-implement support for core files [Guy Harris] + * + * Revision 1.21 1996/10/05 18:15:29 christos + * Segregate elf stuff and conditionally enable it with -DBUILTIN_ELF + * More magic fixes + * + * Revision 1.20 1996/06/22 22:15:52 christos + * - support relative offsets of the form >& + * - fix bug with truncating magic strings that contain \n + * - file -f - did not read from stdin as documented + * - support elf file parsing using our own elf support. + * - as always magdir fixes and additions. + * + * Revision 1.19 1995/10/27 23:14:46 christos + * Ability to parse colon separated list of magic files + * New LEGAL.NOTICE + * Various magic file changes + * + * Revision 1.18 1995/05/20 22:09:21 christos + * Passed incorrect argument to eatsize(). + * Use %ld and %lx where appropriate. + * Remove unused variables + * ELF support for both big and little endian + * Fixes for small files again. + * + * Revision 1.17 1995/04/28 17:29:13 christos + * - Incorrect nroff detection fix from der Mouse + * - Lost and incorrect magic entries. + * - Added ELF stripped binary detection [in C; ugh] + * - Look for $MAGIC to find the magic file. + * - Eat trailing size specifications from numbers i.e. ignore 10L + * - More fixes for very short files + * + * Revision 1.16 1995/03/25 22:06:45 christos + * - use strtoul() where it exists. + * - fix sign-extend bug + * - try to detect tar archives before nroff files, otherwise + * tar files where the first file starts with a . will not work + * + * Revision 1.15 1995/01/21 21:03:35 christos + * Added CSECTION for the file man page + * Added version flag -v + * Fixed bug with -f input flag (from iorio@violet.berkeley.edu) + * Lots of magic fixes and reorganization... + * + * Revision 1.14 1994/05/03 17:58:23 christos + * changes from mycroft@gnu.ai.mit.edu (Charles Hannum) for unsigned + * + * Revision 1.13 1994/01/21 01:27:01 christos + * Fixed null termination bug from Don Seeley at BSDI in ascmagic.c + * + * Revision 1.12 1993/10/27 20:59:05 christos + * Changed -z flag to understand gzip format too. + * Moved builtin compression detection to a table, and move + * the compress magic entry out of the source. + * Made printing of numbers unsigned, and added the mask to it. + * Changed the buffer size to 8k, because gzip will refuse to + * unzip just a few bytes. + * + * Revision 1.11 1993/09/24 18:49:06 christos + * Fixed small bug in softmagic.c introduced by + * copying the data to be examined out of the input + * buffer. Changed the Makefile to use sed to create + * the correct man pages. + * + * Revision 1.10 1993/09/23 21:56:23 christos + * Passed purify. Fixed indirections. Fixed byte order printing. + * Fixed segmentation faults caused by referencing past the end + * of the magic buffer. Fixed bus errors caused by referencing + * unaligned shorts or longs. + * + * Revision 1.9 1993/03/24 14:23:40 ian + * Batch of minor changes from several contributors. + * + * Revision 1.8 93/02/19 15:01:26 ian + * Numerous changes from Guy Harris too numerous to mention but including + * byte-order independance, fixing "old-style masking", etc. etc. A bugfix + * for broken symlinks from martin@@d255s004.zfe.siemens.de. + * + * Revision 1.7 93/01/05 14:57:27 ian + * Couple of nits picked by Christos (again, thanks). + * + * Revision 1.6 93/01/05 13:51:09 ian + * Lotsa work on the Magic directory. + * + * Revision 1.5 92/09/14 14:54:51 ian + * Fix a tiny null-pointer bug in previous fix for tar archive + uncompress. + * + */ + diff --git a/file/src/print.c b/file/src/print.c new file mode 100644 index 0000000..6a18b8b --- /dev/null +++ b/file/src/print.c @@ -0,0 +1,239 @@ +/* + * print.c - debugging printout routines + * + * Copyright (c) Ian F. Darwin, 1987. + * Written by Ian F. Darwin. + * + * This software is not subject to any license of the American Telephone + * and Telegraph Company or of the Regents of the University of California. + * + * Permission is granted to anyone to use this software for any purpose on + * any computer system, and to alter it and redistribute it freely, subject + * to the following restrictions: + * + * 1. The author is not responsible for the consequences of use of this + * software, no matter how awful, even if they arise from flaws in it. + * + * 2. The origin of this software must not be misrepresented, either by + * explicit claim or by omission. Since few users ever read sources, + * credits must appear in the documentation. + * + * 3. Altered versions must be plainly marked as such, and must not be + * misrepresented as being the original software. Since few users + * ever read sources, credits must appear in the documentation. + * + * 4. This notice may not be removed or altered. + */ + +#include "system.h" +#include "file.h" +#include "debug.h" + +FILE_RCSID("@(#)Id: print.c,v 1.38 2002/07/03 18:37:44 christos Exp ") + +/*@access fmagic @*/ + +/*@-compmempass@*/ +/*@unchecked@*/ +static struct fmagic_s myfmagic; +/*@unchecked@*/ +fmagic global_fmagic = &myfmagic; +/*@=compmempass@*/ + +#define SZOF(a) (sizeof(a) / sizeof(a[0])) + +#ifndef COMPILE_ONLY +void +mdump(struct magic *m) +{ + /*@observer@*/ + static const char *typ[] = { "invalid", "byte", "short", "invalid", + "long", "string", "date", "beshort", + "belong", "bedate", "leshort", "lelong", + "ledate", "pstring", "ldate", "beldate", + "leldate", "regex" }; + static const char optyp[] = { '@', '&', '|', '^', '+', '-', + '*', '/', '%' }; + (void) fputc('[', stderr); +/*@-formatconst@*/ + (void) fprintf(stderr, ">>>>>>>> %d" + 8 - (m->cont_level & 7), + m->offset); +/*@=formatconst@*/ + + if (m->flag & INDIR) { + (void) fprintf(stderr, "(%s,", + /* Note: type is unsigned */ + (m->in_type < SZOF(typ)) ? + typ[m->in_type] : "*bad*"); + if (m->in_op & OPINVERSE) + (void) fputc('~', stderr); + (void) fprintf(stderr, "%c%d),", + ((m->in_op&0x7F) < SZOF(optyp)) ? + optyp[m->in_op&0x7F] : '?', + m->in_offset); + } + (void) fprintf(stderr, " %s%s", (m->flag & UNSIGNED) ? "u" : "", + /* Note: type is unsigned */ + (m->type < SZOF(typ)) ? typ[m->type] : "*bad*"); + if (m->mask_op & OPINVERSE) + (void) fputc('~', stderr); + if (m->mask) { + ((m->mask_op&0x7F) < SZOF(optyp)) ? + (void) fputc(optyp[m->mask_op&0x7F], stderr) : + (void) fputc('?', stderr); + if(STRING != m->type || PSTRING != m->type) + (void) fprintf(stderr, "%.8x", m->mask); + else { + if (m->mask & STRING_IGNORE_LOWERCASE) + (void) fputc(CHAR_IGNORE_LOWERCASE, stderr); + if (m->mask & STRING_COMPACT_BLANK) + (void) fputc(CHAR_COMPACT_BLANK, stderr); + if (m->mask & STRING_COMPACT_OPTIONAL_BLANK) + (void) fputc(CHAR_COMPACT_OPTIONAL_BLANK, + stderr); + } + } + + (void) fprintf(stderr, ",%c", m->reln); + + if (m->reln != 'x') { + switch (m->type) { + case BYTE: + case SHORT: + case LONG: + case LESHORT: + case LELONG: + case BESHORT: + case BELONG: + (void) fprintf(stderr, "%d", m->value.l); + break; + case STRING: + case PSTRING: + case REGEX: + showstr(stderr, m->value.s, -1); + break; + case DATE: + case LEDATE: + case BEDATE: + (void)fprintf(stderr, "%s,", fmttime(m->value.l, 1)); + break; + case LDATE: + case LELDATE: + case BELDATE: + (void)fprintf(stderr, "%s,", fmttime(m->value.l, 0)); + break; + default: + (void) fputs("*bad*", stderr); + break; + } + } + (void) fprintf(stderr, ",\"%s\"]\n", m->desc); +} +#endif + +#if !defined(HAVE_ERROR) || defined(__LCLINT__) +/* + * error - print best error message possible and exit + */ +/*VARARGS*/ +void +error(int status, /*@unused@*/ int errnum, const char * format, ...) +{ + va_list va; + + va_start(va, format); + /* cuz we use stdout for most, stderr here */ + (void) fflush(stdout); + + if (__progname != NULL) + (void) fprintf(stderr, "%s: ", __progname); + (void) vfprintf(stderr, format, va); + va_end(va); +#if NOTYET + if (status) +#endif + exit(status); +} +#endif + +/*VARARGS*/ +void +magwarn(const char *f, ...) +{ + fmagic fm = global_fmagic; + va_list va; + + va_start(va, f); + /* cuz we use stdout for most, stderr here */ + (void) fflush(stdout); + + if (__progname != NULL) + (void) fprintf(stderr, "%s: %s, %d: ", + __progname, fm->magicfile, fm->lineno); + (void) vfprintf(stderr, f, va); + va_end(va); + (void) fputc('\n', stderr); +/*@-globstate@*/ /* FIX: __progname might be null */ + return; +/*@=globstate@*/ +} + +void +fmagicPrintf(const fmagic fm, const char *f, ...) +{ + va_list va; + size_t nob; + int rc; + + va_start(va, f); +/*@-boundswrite@*/ + rc = vsnprintf(fm->obp, fm->nob, f, va); +/*@=boundswrite@*/ + va_end(va); + + fm->obuf[sizeof(fm->obuf)-1] = '\0'; + nob = strlen(fm->obp); + fm->obp += nob; + fm->nob -= nob; +} + + +#ifndef COMPILE_ONLY +char * +fmttime(long v, int local) +{ + char *pp = "???"; + char *rt; + time_t t = (time_t)v; + struct tm *tm; + + if (local) { + pp = ctime(&t); + } else { +#ifndef HAVE_DAYLIGHT + static int daylight = 0; +#ifdef HAVE_TM_ISDST + static time_t now = (time_t)0; + + if (now == (time_t)0) { + struct tm *tm1; + (void)time(&now); + tm1 = localtime(&now); + daylight = tm1->tm_isdst; + } +#endif /* HAVE_TM_ISDST */ +#endif /* HAVE_DAYLIGHT */ + if (daylight) + t += 3600; + tm = gmtime(&t); + if (tm != NULL) + pp = asctime(tm); + } + +/*@-modobserver@*/ + if ((rt = strchr(pp, '\n')) != NULL) + *rt = '\0'; +/*@=modobserver@*/ + return pp; +} +#endif diff --git a/file/src/readelf.c b/file/src/readelf.c new file mode 100644 index 0000000..ca2138e --- /dev/null +++ b/file/src/readelf.c @@ -0,0 +1,706 @@ +#include "system.h" + +#ifdef BUILTIN_ELF +#include "file.h" +#include "readelf.h" +#include "debug.h" + +FILE_RCSID("@(#)Id: readelf.c,v 1.22 2002/07/03 18:26:38 christos Exp ") + +/*@access fmagic @*/ + +/*@-bounds@*/ +static uint16_t +getu16(const fmagic fm, uint16_t value) + /*@*/ +{ + union { + uint16_t ui; + char c[2]; + } retval, tmpval; + + if (fm->swap) { + tmpval.ui = value; + + retval.c[0] = tmpval.c[1]; + retval.c[1] = tmpval.c[0]; + + return retval.ui; + } else + return value; +} + +static uint32_t +getu32(const fmagic fm, uint32_t value) + /*@*/ +{ + union { + uint32_t ui; + char c[4]; + } retval, tmpval; + + if (fm->swap) { + tmpval.ui = value; + + retval.c[0] = tmpval.c[3]; + retval.c[1] = tmpval.c[2]; + retval.c[2] = tmpval.c[1]; + retval.c[3] = tmpval.c[0]; + + return retval.ui; + } else + return value; +} + +static uint64_t +getu64(const fmagic fm, uint64_t value) + /*@*/ +{ + union { + uint64_t ui; + char c[8]; + } retval, tmpval; + + if (fm->swap) { + tmpval.ui = value; + + retval.c[0] = tmpval.c[7]; + retval.c[1] = tmpval.c[6]; + retval.c[2] = tmpval.c[5]; + retval.c[3] = tmpval.c[4]; + retval.c[4] = tmpval.c[3]; + retval.c[5] = tmpval.c[2]; + retval.c[6] = tmpval.c[1]; + retval.c[7] = tmpval.c[0]; + + return retval.ui; + } else + return value; +} +/*@=bounds@*/ + +#define sh_addr (fm->cls == ELFCLASS32 \ + ? (void *) &sh32 \ + : (void *) &sh64) +#define sh_size (fm->cls == ELFCLASS32 \ + ? sizeof sh32 \ + : sizeof sh64) +#define shs_type (fm->cls == ELFCLASS32 \ + ? getu32(fm, sh32.sh_type) \ + : getu32(fm, sh64.sh_type)) +#define ph_addr (fm->cls == ELFCLASS32 \ + ? (void *) &ph32 \ + : (void *) &ph64) +#define ph_size (fm->cls == ELFCLASS32 \ + ? sizeof ph32 \ + : sizeof ph64) +#define ph_type (fm->cls == ELFCLASS32 \ + ? getu32(fm, ph32.p_type) \ + : getu32(fm, ph64.p_type)) +#define ph_offset (fm->cls == ELFCLASS32 \ + ? getu32(fm, ph32.p_offset) \ + : getu64(fm, ph64.p_offset)) +#define ph_align (fm->cls == ELFCLASS32 \ + ? (ph32.p_align ? getu32(fm, ph32.p_align) : 4) \ + : (ph64.p_align ? getu64(fm, ph64.p_align) : 4)) +#define nh_size (fm->cls == ELFCLASS32 \ + ? sizeof *nh32 \ + : sizeof *nh64) +#define nh_type (fm->cls == ELFCLASS32 \ + ? getu32(fm, nh32->n_type) \ + : getu32(fm, nh64->n_type)) +#define nh_namesz (fm->cls == ELFCLASS32 \ + ? getu32(fm, nh32->n_namesz) \ + : getu32(fm, nh64->n_namesz)) +#define nh_descsz (fm->cls == ELFCLASS32 \ + ? getu32(fm, nh32->n_descsz) \ + : getu32(fm, nh64->n_descsz)) +#define prpsoffsets(i) (fm->cls == ELFCLASS32 \ + ? prpsoffsets32[i] \ + : prpsoffsets64[i]) + +/*@-bounds@*/ +static void +doshn(fmagic fm, off_t off, int num, size_t size) + /*@globals fileSystem @*/ + /*@modifies fm, fileSystem @*/ +{ + Elf32_Shdr sh32; + Elf64_Shdr sh64; + + if (size != sh_size) { + error(EXIT_FAILURE, 0, "corrupted program header size.\n"); + /*@notreached@*/ + } + + if (lseek(fm->fd, off, SEEK_SET) == -1) { + error(EXIT_FAILURE, 0, "lseek failed (%s).\n", strerror(errno)); + /*@notreached@*/ + } + + for ( ; num; num--) { + if (read(fm->fd, sh_addr, size) == -1) { + error(EXIT_FAILURE, 0, "read failed (%s).\n", strerror(errno)); + /*@notreached@*/ + } + if (shs_type == SHT_SYMTAB /* || shs_type == SHT_DYNSYM */) { + fmagicPrintf(fm, ", not stripped"); + return; + } + } + fmagicPrintf(fm, ", stripped"); +} +/*@=bounds@*/ + +/* + * Look through the program headers of an executable image, searching + * for a PT_INTERP section; if one is found, it's dynamically linked, + * otherwise it's statically linked. + */ +/*@-bounds@*/ +static void +dophn_exec(fmagic fm, off_t off, int num, size_t size) + /*@globals fileSystem @*/ + /*@modifies fm, fileSystem @*/ +{ + Elf32_Phdr ph32; + Elf32_Nhdr *nh32 = NULL; + Elf64_Phdr ph64; + Elf64_Nhdr *nh64 = NULL; + char *linking_style = "statically"; + char *shared_libraries = ""; + char nbuf[BUFSIZ]; + int bufsize; + size_t offset, nameoffset; + + if (size != ph_size) { + error(EXIT_FAILURE, 0, "corrupted program header size.\n"); + /*@notreached@*/ + } + + if (lseek(fm->fd, off, SEEK_SET) == -1) { + error(EXIT_FAILURE, 0, "lseek failed (%s).\n", strerror(errno)); + /*@notreached@*/ + } + + for ( ; num; num--) { + if (read(fm->fd, ph_addr, size) == -1) { + error(EXIT_FAILURE, 0, "read failed (%s).\n", strerror(errno)); + /*@notreached@*/ + } + + switch (ph_type) { + case PT_DYNAMIC: + linking_style = "dynamically"; + /*@switchbreak@*/ break; + case PT_INTERP: + shared_libraries = " (uses shared libs)"; + /*@switchbreak@*/ break; + case PT_NOTE: + /* + * This is a PT_NOTE section; loop through all the notes + * in the section. + */ + if (lseek(fm->fd, (off_t) ph_offset, SEEK_SET) == -1) { + error(EXIT_FAILURE, 0, "lseek failed (%s).\n", strerror(errno)); + /*@notreached@*/ + } + bufsize = read(fm->fd, nbuf, sizeof(nbuf)); + if (bufsize == -1) { + error(EXIT_FAILURE, 0, ": " "read failed (%s).\n", + strerror(errno)); + /*@notreached@*/ + } + offset = 0; + for (;;) { + if (offset >= bufsize) + /*@innerbreak@*/ break; + if (fm->cls == ELFCLASS32) + nh32 = (Elf32_Nhdr *)&nbuf[offset]; + else + nh64 = (Elf64_Nhdr *)&nbuf[offset]; + offset += nh_size; + + if (offset + nh_namesz >= bufsize) { + /* + * We're past the end of the buffer. + */ + /*@innerbreak@*/ break; + } + + nameoffset = offset; + offset += nh_namesz; + offset = ((offset+ph_align-1)/ph_align)*ph_align; + + if ((nh_namesz == 0) && (nh_descsz == 0)) { + /* + * We're out of note headers. + */ + /*@innerbreak@*/ break; + } + + if (offset + nh_descsz >= bufsize) + /*@innerbreak@*/ break; + + if (nh_namesz == 4 && + strcmp(&nbuf[nameoffset], "GNU") == 0 && + nh_type == NT_GNU_VERSION && + nh_descsz == 16) { + uint32_t *desc = + (uint32_t *)&nbuf[offset]; + + fmagicPrintf(fm, ", for GNU/"); + switch (getu32(fm, desc[0])) { + case GNU_OS_LINUX: + fmagicPrintf(fm, "Linux"); + /*@switchbreak@*/ break; + case GNU_OS_HURD: + fmagicPrintf(fm, "Hurd"); + /*@switchbreak@*/ break; + case GNU_OS_SOLARIS: + fmagicPrintf(fm, "Solaris"); + /*@switchbreak@*/ break; + default: + fmagicPrintf(fm, ""); + /*@switchbreak@*/ break; + } + fmagicPrintf(fm, " %d.%d.%d", + getu32(fm, desc[1]), + getu32(fm, desc[2]), + getu32(fm, desc[3])); + } + + if (nh_namesz == 7 && + strcmp(&nbuf[nameoffset], "NetBSD") == 0 && + nh_type == NT_NETBSD_VERSION && + nh_descsz == 4) { + fmagicPrintf(fm, ", for NetBSD"); + /* + * Version number is stuck at 199905, + * and hence is basically content-free. + */ + } + + if (nh_namesz == 8 && + strcmp(&nbuf[nameoffset], "FreeBSD") == 0 && + nh_type == NT_FREEBSD_VERSION && + nh_descsz == 4) { + uint32_t desc = getu32(fm, + *(uint32_t *)&nbuf[offset]); + fmagicPrintf(fm, ", for FreeBSD"); + /* + * Contents is __FreeBSD_version, + * whose relation to OS versions is + * defined by a huge table in the + * Porters' Handbook. Happily, the + * first three digits are the version + * number, at least in versions of + * FreeBSD that use this note. + */ + + fmagicPrintf(fm, " %d.%d", desc / 100000, + desc / 10000 % 10); + if (desc / 1000 % 10 > 0) + fmagicPrintf(fm, ".%d", + desc / 1000 % 10); + } + + if (nh_namesz == 8 && + strcmp(&nbuf[nameoffset], "OpenBSD") == 0 && + nh_type == NT_OPENBSD_VERSION && + nh_descsz == 4) { + fmagicPrintf(fm, ", for OpenBSD"); + /* Content of note is always 0 */ + } + } + if ((lseek(fm->fd, ph_offset + offset, SEEK_SET)) == -1) { + error(EXIT_FAILURE, 0, "lseek failed (%s).\n", strerror(errno)); + /*@notreached@*/ + } + /*@switchbreak@*/ break; + } + } + fmagicPrintf(fm, ", %s linked%s", linking_style, shared_libraries); +} +/*@=bounds@*/ + +#ifdef ELFCORE +/*@unchecked@*/ /*@observer@*/ +static size_t prpsoffsets32[] = { + 8, /* FreeBSD */ + 28, /* Linux 2.0.36 */ + 32, /* Linux (I forget which kernel version) */ + 84 /* SunOS 5.x */ +}; + +/*@unchecked@*/ /*@observer@*/ +static size_t prpsoffsets64[] = { + 120 /* SunOS 5.x, 64-bit */ +}; + +#define NOFFSETS32 (sizeof prpsoffsets32 / sizeof prpsoffsets32[0]) +#define NOFFSETS64 (sizeof prpsoffsets64 / sizeof prpsoffsets64[0]) + +#define NOFFSETS (fm->cls == ELFCLASS32 ? NOFFSETS32 : NOFFSETS64) + +/* + * Look through the program headers of an executable image, searching + * for a PT_NOTE section of type NT_PRPSINFO, with a name "CORE" or + * "FreeBSD"; if one is found, try looking in various places in its + * contents for a 16-character string containing only printable + * characters - if found, that string should be the name of the program + * that dropped core. Note: right after that 16-character string is, + * at least in SunOS 5.x (and possibly other SVR4-flavored systems) and + * Linux, a longer string (80 characters, in 5.x, probably other + * SVR4-flavored systems, and Linux) containing the start of the + * command line for that program. + * + * The signal number probably appears in a section of type NT_PRSTATUS, + * but that's also rather OS-dependent, in ways that are harder to + * dissect with heuristics, so I'm not bothering with the signal number. + * (I suppose the signal number could be of interest in situations where + * you don't have the binary of the program that dropped core; if you + * *do* have that binary, the debugger will probably tell you what + * signal it was.) + */ + +#define OS_STYLE_SVR4 0 +#define OS_STYLE_FREEBSD 1 +#define OS_STYLE_NETBSD 2 + +/*@unchecked@*/ /*@observer@*/ +static const char *os_style_names[] = { + "SVR4", + "FreeBSD", + "NetBSD", +}; + +/*@-bounds@*/ +static void +dophn_core(fmagic fm, off_t off, int num, size_t size) + /*@globals fileSystem @*/ + /*@modifies fm, fileSystem @*/ +{ + Elf32_Phdr ph32; + Elf32_Nhdr *nh32 = NULL; + Elf64_Phdr ph64; + Elf64_Nhdr *nh64 = NULL; + size_t offset, nameoffset, noffset, reloffset; + unsigned char c; + int i, j; + char nbuf[BUFSIZ]; + int bufsize; + int os_style = -1; + + if (size != ph_size) { + error(EXIT_FAILURE, 0, "corrupted program header size.\n"); + /*@notreached@*/ + } + + /* + * Loop through all the program headers. + */ + for ( ; num; num--) { + if (lseek(fm->fd, off, SEEK_SET) == -1) { + error(EXIT_FAILURE, 0, "lseek failed (%s).\n", strerror(errno)); + /*@notreached@*/ + } + if (read(fm->fd, ph_addr, size) == -1) { + error(EXIT_FAILURE, 0, "read failed (%s).\n", strerror(errno)); + /*@notreached@*/ + } + off += size; + if (ph_type != PT_NOTE) + continue; + + /* + * This is a PT_NOTE section; loop through all the notes + * in the section. + */ + if (lseek(fm->fd, (off_t) ph_offset, SEEK_SET) == -1) { + error(EXIT_FAILURE, 0, "lseek failed (%s).\n", strerror(errno)); + /*@notreached@*/ + } + bufsize = read(fm->fd, nbuf, BUFSIZ); + if (bufsize == -1) { + error(EXIT_FAILURE, 0, ": " "read failed (%s).\n", strerror(errno)); + /*@notreached@*/ + } + offset = 0; + for (;;) { + if (offset >= bufsize) + /*@innerbreak@*/ break; + if (fm->cls == ELFCLASS32) + nh32 = (Elf32_Nhdr *)&nbuf[offset]; + else + nh64 = (Elf64_Nhdr *)&nbuf[offset]; + offset += nh_size; + + /* + * Check whether this note has the name "CORE" or + * "FreeBSD", or "NetBSD-CORE". + */ + if (offset + nh_namesz >= bufsize) { + /* + * We're past the end of the buffer. + */ + /*@innerbreak@*/ break; + } + + nameoffset = offset; + offset += nh_namesz; + offset = ((offset + 3)/4)*4; + + /* + * Sigh. The 2.0.36 kernel in Debian 2.1, at + * least, doesn't correctly implement name + * sections, in core dumps, as specified by + * the "Program Linking" section of "UNIX(R) System + * V Release 4 Programmer's Guide: ANSI C and + * Programming Support Tools", because my copy + * clearly says "The first 'namesz' bytes in 'name' + * contain a *null-terminated* [emphasis mine] + * character representation of the entry's owner + * or originator", but the 2.0.36 kernel code + * doesn't include the terminating null in the + * name.... + */ + if (os_style == -1) { + if ((nh_namesz == 4 && + strncmp(&nbuf[nameoffset], + "CORE", 4) == 0) || + (nh_namesz == 5 && + strcmp(&nbuf[nameoffset], + "CORE") == 0)) { + os_style = OS_STYLE_SVR4; + } else + if ((nh_namesz == 8 && + strcmp(&nbuf[nameoffset], + "FreeBSD") == 0)) { + os_style = OS_STYLE_FREEBSD; + } else + if ((nh_namesz >= 11 && + strncmp(&nbuf[nameoffset], + "NetBSD-CORE", 11) == 0)) { + os_style = OS_STYLE_NETBSD; + } else + /*@innercontinue@*/ continue; + fmagicPrintf(fm, ", %s-style", os_style_names[os_style]); + } + + if (os_style == OS_STYLE_NETBSD && + nh_type == NT_NETBSD_CORE_PROCINFO) { + uint32_t signo; + + /* + * Extract the program name. It is at + * offset 0x7c, and is up to 32-bytes, + * including the terminating NUL. + */ + fmagicPrintf(fm, ", from '%.31s'", &nbuf[offset + 0x7c]); + + /* + * Extract the signal number. It is at + * offset 0x08. + */ + memcpy(&signo, &nbuf[offset + 0x08], + sizeof(signo)); + fmagicPrintf(fm, " (signal %u)", getu32(fm, signo)); + } else + if (os_style != OS_STYLE_NETBSD && + nh_type == NT_PRPSINFO) { + /* + * Extract the program name. We assume + * it to be 16 characters (that's what it + * is in SunOS 5.x and Linux). + * + * Unfortunately, it's at a different offset + * in varous OSes, so try multiple offsets. + * If the characters aren't all printable, + * reject it. + */ + for (i = 0; i < NOFFSETS; i++) { + reloffset = prpsoffsets(i); + noffset = offset + reloffset; + for (j = 0; j < 16; + j++, noffset++, reloffset++) { + /* + * Make sure we're not past + * the end of the buffer; if + * we are, just give up. + */ + if (noffset >= bufsize) + goto tryanother; + + /* + * Make sure we're not past + * the end of the contents; + * if we are, this obviously + * isn't the right offset. + */ + if (reloffset >= nh_descsz) + goto tryanother; + + c = nbuf[noffset]; + if (c == '\0') { + /* + * A '\0' at the + * beginning is + * obviously wrong. + * Any other '\0' + * means we're done. + */ + if (j == 0) + goto tryanother; + else + /*@innerbreak@*/ break; + } else { + /* + * A nonprintable + * character is also + * wrong. + */ +#define isquote(c) (strchr("'\"`", (c)) != NULL) + if (!isprint(c) || + isquote(c)) + goto tryanother; + } + } + + /* + * Well, that worked. + */ + fmagicPrintf(fm, ", from '%.16s'", + &nbuf[offset + prpsoffsets(i)]); + /*@innerbreak@*/ break; + + tryanother: + ; + } + /*@innerbreak@*/ break; + } + offset += nh_descsz; + offset = ((offset + 3)/4)*4; + } + } +} +/*@=bounds@*/ +#endif + +/*@-bounds@*/ +void +fmagicE(fmagic fm) +{ +/*@-sizeoftype@*/ + union { + int32_t l; + char c[sizeof (int32_t)]; + } u; +/*@=sizeoftype@*/ + + /* + * If we can't seek, it must be a pipe, socket or fifo. + */ + if((lseek(fm->fd, (off_t)0, SEEK_SET) == (off_t)-1) && (errno == ESPIPE)) + fm->fd = pipe2file(fm->fd, fm->buf, fm->nb); + + /* + * ELF executables have multiple section headers in arbitrary + * file locations and thus file(1) cannot determine it from easily. + * Instead we traverse thru all section headers until a symbol table + * one is found or else the binary is stripped. + */ + if (fm->buf[EI_MAG0] != ELFMAG0 + || (fm->buf[EI_MAG1] != ELFMAG1 && fm->buf[EI_MAG1] != OLFMAG1) + || fm->buf[EI_MAG2] != ELFMAG2 || fm->buf[EI_MAG3] != ELFMAG3) + return; + + + fm->cls = fm->buf[EI_CLASS]; + + if (fm->cls == ELFCLASS32) { + Elf32_Ehdr elfhdr; + if (fm->nb <= sizeof (elfhdr)) + return; + + + u.l = 1; + (void) memcpy(&elfhdr, fm->buf, sizeof elfhdr); +/*@-sizeoftype@*/ + fm->swap = (u.c[sizeof(int32_t) - 1] + 1) != elfhdr.e_ident[EI_DATA]; +/*@=sizeoftype@*/ + + if (getu16(fm, elfhdr.e_type) == ET_CORE) +#ifdef ELFCORE + dophn_core(fm, + getu32(fm, elfhdr.e_phoff), + getu16(fm, elfhdr.e_phnum), + getu16(fm, elfhdr.e_phentsize)); +#else + ; +#endif + else { + if (getu16(fm, elfhdr.e_type) == ET_EXEC) { + dophn_exec(fm, + getu32(fm, elfhdr.e_phoff), + getu16(fm, elfhdr.e_phnum), + getu16(fm, elfhdr.e_phentsize)); + } + doshn(fm, + getu32(fm, elfhdr.e_shoff), + getu16(fm, elfhdr.e_shnum), + getu16(fm, elfhdr.e_shentsize)); + } + return; + } + + if (fm->cls == ELFCLASS64) { + Elf64_Ehdr elfhdr; + if (fm->nb <= sizeof (elfhdr)) + return; + + u.l = 1; + (void) memcpy(&elfhdr, fm->buf, sizeof elfhdr); +/*@-sizeoftype@*/ + fm->swap = (u.c[sizeof(int32_t) - 1] + 1) != elfhdr.e_ident[EI_DATA]; +/*@=sizeoftype@*/ + + if (getu16(fm, elfhdr.e_type) == ET_CORE) +#ifdef ELFCORE + dophn_core(fm, +#ifdef USE_ARRAY_FOR_64BIT_TYPES + getu32(fm, elfhdr.e_phoff[1]), +#else + getu64(fm, elfhdr.e_phoff), +#endif + getu16(fm, elfhdr.e_phnum), + getu16(fm, elfhdr.e_phentsize)); +#else + ; +#endif + else + { + if (getu16(fm, elfhdr.e_type) == ET_EXEC) { + dophn_exec(fm, +#ifdef USE_ARRAY_FOR_64BIT_TYPES + getu32(fm, elfhdr.e_phoff[1]), +#else + getu64(fm, elfhdr.e_phoff), +#endif + getu16(fm, elfhdr.e_phnum), + getu16(fm, elfhdr.e_phentsize)); + } + doshn(fm, +#ifdef USE_ARRAY_FOR_64BIT_TYPES + getu32(fm, elfhdr.e_shoff[1]), +#else + getu64(fm, elfhdr.e_shoff), +#endif + getu16(fm, elfhdr.e_shnum), + getu16(fm, elfhdr.e_shentsize)); + } + return; + } +} +/*@=bounds@*/ +#endif /* BUILTIN_ELF */ diff --git a/file/src/readelf.h b/file/src/readelf.h new file mode 100644 index 0000000..d591043 --- /dev/null +++ b/file/src/readelf.h @@ -0,0 +1,217 @@ +/* $NetBSD: readelf.h,v 1.9 2002/05/18 07:00:47 pooka Exp $ */ +/*@-redef@*/ + +/* + * readelf.h + * @(#)Id: readelf.h,v 1.9 2002/05/16 18:45:56 christos Exp + * + * Provide elf data structures for non-elf machines, allowing file + * non-elf hosts to determine if an elf binary is stripped. + * Note: cobbled from the linux header file, with modifications + */ +#ifndef __fake_elf_h__ +#define __fake_elf_h__ + +#if HAVE_STDINT_H +#include +#endif + +typedef uint32_t Elf32_Addr; +typedef uint32_t Elf32_Off; +typedef uint16_t Elf32_Half; +typedef uint32_t Elf32_Word; +typedef uint8_t Elf32_Char; + +#if SIZEOF_UINT64_T != 8 +#define USE_ARRAY_FOR_64BIT_TYPES +typedef uint32_t Elf64_Addr[2]; +typedef uint32_t Elf64_Off[2]; +typedef uint32_t Elf64_Xword[2]; +#else +typedef uint64_t Elf64_Addr; +typedef uint64_t Elf64_Off; +typedef uint64_t Elf64_Xword; +#endif +typedef uint16_t Elf64_Half; +typedef uint32_t Elf64_Word; +typedef uint8_t Elf64_Char; + +#define EI_NIDENT 16 + +/*@-matchfields@*/ +typedef struct { + Elf32_Char e_ident[EI_NIDENT]; + Elf32_Half e_type; + Elf32_Half e_machine; + Elf32_Word e_version; + Elf32_Addr e_entry; /* Entry point */ + Elf32_Off e_phoff; + Elf32_Off e_shoff; + Elf32_Word e_flags; + Elf32_Half e_ehsize; + Elf32_Half e_phentsize; + Elf32_Half e_phnum; + Elf32_Half e_shentsize; + Elf32_Half e_shnum; + Elf32_Half e_shstrndx; +} Elf32_Ehdr; + +typedef struct { + Elf64_Char e_ident[EI_NIDENT]; + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; /* Entry point */ + Elf64_Off e_phoff; + Elf64_Off e_shoff; + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phentsize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; +} Elf64_Ehdr; +/*@=matchfields@*/ + +/* e_type */ +#define ET_EXEC 2 +#define ET_CORE 4 + +/* sh_type */ +#define SHT_SYMTAB 2 +#define SHT_NOTE 7 +#define SHT_DYNSYM 11 + +/* elf type */ +#define ELFDATANONE 0 /* e_ident[EI_DATA] */ +#define ELFDATA2LSB 1 +#define ELFDATA2MSB 2 + +/* elf class */ +#define ELFCLASSNONE 0 +#define ELFCLASS32 1 +#define ELFCLASS64 2 + +/* magic number */ +#define EI_MAG0 0 /* e_ident[] indexes */ +#define EI_MAG1 1 +#define EI_MAG2 2 +#define EI_MAG3 3 +#define EI_CLASS 4 +#define EI_DATA 5 +#define EI_VERSION 6 +#define EI_PAD 7 + +#define ELFMAG0 0x7f /* EI_MAG */ +#define ELFMAG1 'E' +#define ELFMAG2 'L' +#define ELFMAG3 'F' +#define ELFMAG "\177ELF" + +#define OLFMAG1 'O' +#define OLFMAG "\177OLF" + +typedef struct { + Elf32_Word p_type; + Elf32_Off p_offset; + Elf32_Addr p_vaddr; + Elf32_Addr p_paddr; + Elf32_Word p_filesz; + Elf32_Word p_memsz; + Elf32_Word p_flags; + Elf32_Word p_align; +} Elf32_Phdr; + +typedef struct { + Elf64_Word p_type; + Elf64_Word p_flags; + Elf64_Off p_offset; + Elf64_Addr p_vaddr; + Elf64_Addr p_paddr; + Elf64_Xword p_filesz; + Elf64_Xword p_memsz; + Elf64_Xword p_align; +} Elf64_Phdr; + +#define PT_NULL 0 /* p_type */ +#define PT_LOAD 1 +#define PT_DYNAMIC 2 +#define PT_INTERP 3 +#define PT_NOTE 4 +#define PT_SHLIB 5 +#define PT_PHDR 6 +#define PT_NUM 7 + +typedef struct { + Elf32_Word sh_name; + Elf32_Word sh_type; + Elf32_Word sh_flags; + Elf32_Addr sh_addr; + Elf32_Off sh_offset; + Elf32_Word sh_size; + Elf32_Word sh_link; + Elf32_Word sh_info; + Elf32_Word sh_addralign; + Elf32_Word sh_entsize; +} Elf32_Shdr; + +/*@-matchfields@*/ +typedef struct { + Elf64_Word sh_name; + Elf64_Word sh_type; + Elf64_Off sh_flags; + Elf64_Addr sh_addr; + Elf64_Off sh_offset; + Elf64_Off sh_size; + Elf64_Word sh_link; + Elf64_Word sh_info; + Elf64_Off sh_addralign; + Elf64_Off sh_entsize; +} Elf64_Shdr; +/*@=matchfields@*/ + +/* Notes used in ET_CORE */ +#define NT_PRSTATUS 1 +#define NT_PRFPREG 2 +#define NT_PRPSINFO 3 +#define NT_TASKSTRUCT 4 + +#define NT_NETBSD_CORE_PROCINFO 1 + +/* Note header in a PT_NOTE section */ +typedef struct elf_note { + Elf32_Word n_namesz; /* Name size */ + Elf32_Word n_descsz; /* Content size */ + Elf32_Word n_type; /* Content type */ +} Elf32_Nhdr; + +typedef struct { + Elf64_Word n_namesz; + Elf64_Word n_descsz; + Elf64_Word n_type; +} Elf64_Nhdr; + +#define NT_PRSTATUS 1 +#define NT_PRFPREG 2 +#define NT_PRPSINFO 3 +#define NT_PRXREG 4 +#define NT_PLATFORM 5 +#define NT_AUXV 6 + +/* Note types used in executables */ +/* NetBSD executables (name = "NetBSD") */ +#define NT_NETBSD_VERSION 1 +#define NT_NETBSD_EMULATION 2 +#define NT_FREEBSD_VERSION 1 +#define NT_OPENBSD_VERSION 1 +/* GNU executables (name = "GNU") */ +#define NT_GNU_VERSION 1 + +/* GNU OS tags */ +#define GNU_OS_LINUX 0 +#define GNU_OS_HURD 1 +#define GNU_OS_SOLARIS 2 +/*@=redef@*/ + +#endif diff --git a/file/src/softmagic.c b/file/src/softmagic.c new file mode 100644 index 0000000..9a42076 --- /dev/null +++ b/file/src/softmagic.c @@ -0,0 +1,1075 @@ +/* + * softmagic - interpret variable magic from MAGIC + * + * Copyright (c) Ian F. Darwin, 1987. + * Written by Ian F. Darwin. + * + * This software is not subject to any license of the American Telephone + * and Telegraph Company or of the Regents of the University of California. + * + * Permission is granted to anyone to use this software for any purpose on + * any computer system, and to alter it and redistribute it freely, subject + * to the following restrictions: + * + * 1. The author is not responsible for the consequences of use of this + * software, no matter how awful, even if they arise from flaws in it. + * + * 2. The origin of this software must not be misrepresented, either by + * explicit claim or by omission. Since few users ever read sources, + * credits must appear in the documentation. + * + * 3. Altered versions must be plainly marked as such, and must not be + * misrepresented as being the original software. Since few users + * ever read sources, credits must appear in the documentation. + * + * 4. This notice may not be removed or altered. + */ + +#include "system.h" +#include "file.h" +#include "debug.h" + +FILE_RCSID("@(#)Id: softmagic.c,v 1.51 2002/07/03 18:26:38 christos Exp ") + +/*@access fmagic @*/ + +/*@-bounds@*/ +static int32_t +fmagicSPrint(const fmagic fm, struct magic *m) + /*@globals fileSystem @*/ + /*@modifies fm, fileSystem @*/ +{ + union VALUETYPE * p = &fm->val; + uint32_t v; + int32_t t = 0; + + switch (m->type) { + case BYTE: + v = signextend(m, p->b); + fmagicPrintf(fm, m->desc, (unsigned char) v); +/*@-sizeoftype@*/ + t = m->offset + sizeof(char); +/*@=sizeoftype@*/ + break; + + case SHORT: + case BESHORT: + case LESHORT: + v = signextend(m, p->h); + fmagicPrintf(fm, m->desc, (unsigned short) v); +/*@-sizeoftype@*/ + t = m->offset + sizeof(short); +/*@=sizeoftype@*/ + break; + + case LONG: + case BELONG: + case LELONG: + v = signextend(m, p->l); + fmagicPrintf(fm, m->desc, (uint32_t) v); +/*@-sizeoftype@*/ + t = m->offset + sizeof(int32_t); +/*@=sizeoftype@*/ + break; + + case STRING: + case PSTRING: + if (m->reln == '=') { + fmagicPrintf(fm, m->desc, m->value.s); + t = m->offset + strlen(m->value.s); + } else { + if (*m->value.s == '\0') { + char *cp = strchr(p->s,'\n'); + if (cp != NULL) + *cp = '\0'; + } + fmagicPrintf(fm, m->desc, p->s); + t = m->offset + strlen(p->s); + } + break; + + case DATE: + case BEDATE: + case LEDATE: + fmagicPrintf(fm, m->desc, fmttime(p->l, 1)); +/*@-sizeoftype@*/ + t = m->offset + sizeof(time_t); +/*@=sizeoftype@*/ + break; + + case LDATE: + case BELDATE: + case LELDATE: + fmagicPrintf(fm, m->desc, fmttime(p->l, 0)); +/*@-sizeoftype@*/ + t = m->offset + sizeof(time_t); +/*@=sizeoftype@*/ + break; + + case REGEX: + fmagicPrintf(fm, m->desc, p->s); + t = m->offset + strlen(p->s); + break; + + default: + error(EXIT_FAILURE, 0, "invalid m->type (%d) in fmagicSPrint().\n", m->type); + /*@notreached@*/ break; + } + return(t); +} +/*@=bounds@*/ + +/* + * Convert the byte order of the data we are looking at + * While we're here, let's apply the mask operation + * (unless you have a better idea) + */ +/*@-bounds@*/ +static int +fmagicSConvert(fmagic fm, struct magic *m) + /*@globals fileSystem @*/ + /*@modifies fm, fileSystem @*/ +{ + union VALUETYPE * p = &fm->val; + + switch (m->type) { + case BYTE: + if (m->mask) + switch (m->mask_op&0x7F) { + case OPAND: + p->b &= m->mask; + /*@innerbreak@*/ break; + case OPOR: + p->b |= m->mask; + /*@innerbreak@*/ break; + case OPXOR: + p->b ^= m->mask; + /*@innerbreak@*/ break; + case OPADD: + p->b += m->mask; + /*@innerbreak@*/ break; + case OPMINUS: + p->b -= m->mask; + /*@innerbreak@*/ break; + case OPMULTIPLY: + p->b *= m->mask; + /*@innerbreak@*/ break; + case OPDIVIDE: + p->b /= m->mask; + /*@innerbreak@*/ break; + case OPMODULO: + p->b %= m->mask; + /*@innerbreak@*/ break; + } + if (m->mask_op & OPINVERSE) + p->b = ~p->b; + return 1; + case SHORT: + if (m->mask) + switch (m->mask_op&0x7F) { + case OPAND: + p->h &= m->mask; + /*@innerbreak@*/ break; + case OPOR: + p->h |= m->mask; + /*@innerbreak@*/ break; + case OPXOR: + p->h ^= m->mask; + /*@innerbreak@*/ break; + case OPADD: + p->h += m->mask; + /*@innerbreak@*/ break; + case OPMINUS: + p->h -= m->mask; + /*@innerbreak@*/ break; + case OPMULTIPLY: + p->h *= m->mask; + /*@innerbreak@*/ break; + case OPDIVIDE: + p->h /= m->mask; + /*@innerbreak@*/ break; + case OPMODULO: + p->h %= m->mask; + /*@innerbreak@*/ break; + } + if (m->mask_op & OPINVERSE) + p->h = ~p->h; + return 1; + case LONG: + case DATE: + case LDATE: + if (m->mask) + switch (m->mask_op&0x7F) { + case OPAND: + p->l &= m->mask; + /*@innerbreak@*/ break; + case OPOR: + p->l |= m->mask; + /*@innerbreak@*/ break; + case OPXOR: + p->l ^= m->mask; + /*@innerbreak@*/ break; + case OPADD: + p->l += m->mask; + /*@innerbreak@*/ break; + case OPMINUS: + p->l -= m->mask; + /*@innerbreak@*/ break; + case OPMULTIPLY: + p->l *= m->mask; + /*@innerbreak@*/ break; + case OPDIVIDE: + p->l /= m->mask; + /*@innerbreak@*/ break; + case OPMODULO: + p->l %= m->mask; + /*@innerbreak@*/ break; + } + if (m->mask_op & OPINVERSE) + p->l = ~p->l; + return 1; + case STRING: + { + int n; + + /* Null terminate and eat *trailing* return */ + p->s[sizeof(p->s) - 1] = '\0'; + n = strlen(p->s) - 1; + if (p->s[n] == '\n') + p->s[n] = '\0'; + return 1; + } + case PSTRING: + { + char *ptr1 = p->s, *ptr2 = ptr1 + 1; + int n = *p->s; + if (n >= sizeof(p->s)) + n = sizeof(p->s) - 1; + while (n--) + *ptr1++ = *ptr2++; + *ptr1 = '\0'; + n = strlen(p->s) - 1; + if (p->s[n] == '\n') + p->s[n] = '\0'; + return 1; + } + case BESHORT: + p->h = (short)((p->hs[0]<<8)|(p->hs[1])); + if (m->mask) + switch (m->mask_op&0x7F) { + case OPAND: + p->h &= m->mask; + /*@innerbreak@*/ break; + case OPOR: + p->h |= m->mask; + /*@innerbreak@*/ break; + case OPXOR: + p->h ^= m->mask; + /*@innerbreak@*/ break; + case OPADD: + p->h += m->mask; + /*@innerbreak@*/ break; + case OPMINUS: + p->h -= m->mask; + /*@innerbreak@*/ break; + case OPMULTIPLY: + p->h *= m->mask; + /*@innerbreak@*/ break; + case OPDIVIDE: + p->h /= m->mask; + /*@innerbreak@*/ break; + case OPMODULO: + p->h %= m->mask; + /*@innerbreak@*/ break; + } + if (m->mask_op & OPINVERSE) + p->h = ~p->h; + return 1; + case BELONG: + case BEDATE: + case BELDATE: + p->l = (int32_t) + ((p->hl[0]<<24)|(p->hl[1]<<16)|(p->hl[2]<<8)|(p->hl[3])); + if (m->mask) + switch (m->mask_op&0x7F) { + case OPAND: + p->l &= m->mask; + /*@innerbreak@*/ break; + case OPOR: + p->l |= m->mask; + /*@innerbreak@*/ break; + case OPXOR: + p->l ^= m->mask; + /*@innerbreak@*/ break; + case OPADD: + p->l += m->mask; + /*@innerbreak@*/ break; + case OPMINUS: + p->l -= m->mask; + /*@innerbreak@*/ break; + case OPMULTIPLY: + p->l *= m->mask; + /*@innerbreak@*/ break; + case OPDIVIDE: + p->l /= m->mask; + /*@innerbreak@*/ break; + case OPMODULO: + p->l %= m->mask; + /*@innerbreak@*/ break; + } + if (m->mask_op & OPINVERSE) + p->l = ~p->l; + return 1; + case LESHORT: + p->h = (short)((p->hs[1]<<8)|(p->hs[0])); + if (m->mask) + switch (m->mask_op&0x7F) { + case OPAND: + p->h &= m->mask; + /*@innerbreak@*/ break; + case OPOR: + p->h |= m->mask; + /*@innerbreak@*/ break; + case OPXOR: + p->h ^= m->mask; + /*@innerbreak@*/ break; + case OPADD: + p->h += m->mask; + /*@innerbreak@*/ break; + case OPMINUS: + p->h -= m->mask; + /*@innerbreak@*/ break; + case OPMULTIPLY: + p->h *= m->mask; + /*@innerbreak@*/ break; + case OPDIVIDE: + p->h /= m->mask; + /*@innerbreak@*/ break; + case OPMODULO: + p->h %= m->mask; + /*@innerbreak@*/ break; + } + if (m->mask_op & OPINVERSE) + p->h = ~p->h; + return 1; + case LELONG: + case LEDATE: + case LELDATE: + p->l = (int32_t) + ((p->hl[3]<<24)|(p->hl[2]<<16)|(p->hl[1]<<8)|(p->hl[0])); + if (m->mask) + switch (m->mask_op&0x7F) { + case OPAND: + p->l &= m->mask; + /*@innerbreak@*/ break; + case OPOR: + p->l |= m->mask; + /*@innerbreak@*/ break; + case OPXOR: + p->l ^= m->mask; + /*@innerbreak@*/ break; + case OPADD: + p->l += m->mask; + /*@innerbreak@*/ break; + case OPMINUS: + p->l -= m->mask; + /*@innerbreak@*/ break; + case OPMULTIPLY: + p->l *= m->mask; + /*@innerbreak@*/ break; + case OPDIVIDE: + p->l /= m->mask; + /*@innerbreak@*/ break; + case OPMODULO: + p->l %= m->mask; + /*@innerbreak@*/ break; + } + if (m->mask_op & OPINVERSE) + p->l = ~p->l; + return 1; + case REGEX: + return 1; + default: + error(EXIT_FAILURE, 0, "invalid type %d in fmagicSConvert().\n", m->type); + /*@notreached@*/ + return 0; + } +} +/*@=bounds@*/ + + +static void +fmagicSDebug(int32_t offset, char *str, int len) + /*@globals fileSystem @*/ + /*@modifies fileSystem @*/ +{ + (void) fprintf(stderr, "fmagicSGet @%d: ", offset); + showstr(stderr, (char *) str, len); + (void) fputc('\n', stderr); + (void) fputc('\n', stderr); +} + +/*@-bounds@*/ +static int +fmagicSGet(fmagic fm, struct magic *m) + /*@globals fileSystem @*/ + /*@modifies fm, fileSystem @*/ +{ + unsigned char * buf = fm->buf; + int nb = fm->nb; + union VALUETYPE * p = &fm->val; + int32_t offset = m->offset; + +/*@-branchstate@*/ + if (m->type == REGEX) { + /* + * offset is interpreted as last line to search, + * (starting at 1), not as bytes-from start-of-file + */ + char *last = NULL; +/*@-temptrans@*/ + p->buf = buf; +/*@=temptrans@*/ + for (; offset && (buf = strchr(buf, '\n')) != NULL; offset--, buf++) + last = buf; + if (last != NULL) + *last = '\0'; + } else if (offset + sizeof(*p) <= nb) + memcpy(p, buf + offset, sizeof(*p)); + else { + /* + * the usefulness of padding with zeroes eludes me, it + * might even cause problems + */ + int32_t have = nb - offset; + memset(p, 0, sizeof(*p)); + if (have > 0) + memcpy(p, buf + offset, have); + } +/*@=branchstate@*/ + + if (fm->flags & FMAGIC_FLAGS_DEBUG) { + fmagicSDebug(offset, (char *) p, sizeof(*p)); + mdump(m); + } + + if (m->flag & INDIR) { + switch (m->in_type) { + case BYTE: + if (m->in_offset) + switch (m->in_op&0x7F) { + case OPAND: + offset = p->b & m->in_offset; + /*@innerbreak@*/ break; + case OPOR: + offset = p->b | m->in_offset; + /*@innerbreak@*/ break; + case OPXOR: + offset = p->b ^ m->in_offset; + /*@innerbreak@*/ break; + case OPADD: + offset = p->b + m->in_offset; + /*@innerbreak@*/ break; + case OPMINUS: + offset = p->b - m->in_offset; + /*@innerbreak@*/ break; + case OPMULTIPLY: + offset = p->b * m->in_offset; + /*@innerbreak@*/ break; + case OPDIVIDE: + offset = p->b / m->in_offset; + /*@innerbreak@*/ break; + case OPMODULO: + offset = p->b % m->in_offset; + /*@innerbreak@*/ break; + } + if (m->in_op & OPINVERSE) + offset = ~offset; + break; + case BESHORT: + if (m->in_offset) + switch (m->in_op&0x7F) { + case OPAND: + offset = (short)((p->hs[0]<<8) | (p->hs[1])) & + m->in_offset; + /*@innerbreak@*/ break; + case OPOR: + offset = (short)((p->hs[0]<<8) | (p->hs[1])) | + m->in_offset; + /*@innerbreak@*/ break; + case OPXOR: + offset = (short)((p->hs[0]<<8) | (p->hs[1])) ^ + m->in_offset; + /*@innerbreak@*/ break; + case OPADD: + offset = (short)((p->hs[0]<<8) | (p->hs[1])) + + m->in_offset; + /*@innerbreak@*/ break; + case OPMINUS: + offset = (short)((p->hs[0]<<8) | (p->hs[1])) - + m->in_offset; + /*@innerbreak@*/ break; + case OPMULTIPLY: + offset = (short)((p->hs[0]<<8) | (p->hs[1])) * + m->in_offset; + /*@innerbreak@*/ break; + case OPDIVIDE: + offset = (short)((p->hs[0]<<8) | (p->hs[1])) / + m->in_offset; + /*@innerbreak@*/ break; + case OPMODULO: + offset = (short)((p->hs[0]<<8) | (p->hs[1])) % + m->in_offset; + /*@innerbreak@*/ break; + } + if (m->in_op & OPINVERSE) + offset = ~offset; + break; + case LESHORT: + if (m->in_offset) + switch (m->in_op&0x7F) { + case OPAND: + offset = (short)((p->hs[1]<<8) | (p->hs[0])) & + m->in_offset; + /*@innerbreak@*/ break; + case OPOR: + offset = (short)((p->hs[1]<<8) | (p->hs[0])) | + m->in_offset; + /*@innerbreak@*/ break; + case OPXOR: + offset = (short)((p->hs[1]<<8) | (p->hs[0])) ^ + m->in_offset; + /*@innerbreak@*/ break; + case OPADD: + offset = (short)((p->hs[1]<<8) | (p->hs[0])) + + m->in_offset; + /*@innerbreak@*/ break; + case OPMINUS: + offset = (short)((p->hs[1]<<8) | (p->hs[0])) - + m->in_offset; + /*@innerbreak@*/ break; + case OPMULTIPLY: + offset = (short)((p->hs[1]<<8) | (p->hs[0])) * + m->in_offset; + /*@innerbreak@*/ break; + case OPDIVIDE: + offset = (short)((p->hs[1]<<8) | (p->hs[0])) / + m->in_offset; + /*@innerbreak@*/ break; + case OPMODULO: + offset = (short)((p->hs[1]<<8) | (p->hs[0])) % + m->in_offset; + /*@innerbreak@*/ break; + } + if (m->in_op & OPINVERSE) + offset = ~offset; + break; + case SHORT: + if (m->in_offset) + switch (m->in_op&0x7F) { + case OPAND: + offset = p->h & m->in_offset; + /*@innerbreak@*/ break; + case OPOR: + offset = p->h | m->in_offset; + /*@innerbreak@*/ break; + case OPXOR: + offset = p->h ^ m->in_offset; + /*@innerbreak@*/ break; + case OPADD: + offset = p->h + m->in_offset; + /*@innerbreak@*/ break; + case OPMINUS: + offset = p->h - m->in_offset; + /*@innerbreak@*/ break; + case OPMULTIPLY: + offset = p->h * m->in_offset; + /*@innerbreak@*/ break; + case OPDIVIDE: + offset = p->h / m->in_offset; + /*@innerbreak@*/ break; + case OPMODULO: + offset = p->h % m->in_offset; + /*@innerbreak@*/ break; + } + if (m->in_op & OPINVERSE) + offset = ~offset; + break; + case BELONG: + if (m->in_offset) + switch (m->in_op&0x7F) { + case OPAND: + offset = (int32_t)( (p->hl[0]<<24) | (p->hl[1]<<16) | + (p->hl[2]<< 8) | (p->hl[3])) & + m->in_offset; + /*@innerbreak@*/ break; + case OPOR: + offset = (int32_t)( (p->hl[0]<<24) | (p->hl[1]<<16) | + (p->hl[2]<< 8) | (p->hl[3])) | + m->in_offset; + /*@innerbreak@*/ break; + case OPXOR: + offset = (int32_t)( (p->hl[0]<<24) | (p->hl[1]<<16) | + (p->hl[2]<< 8) | (p->hl[3])) ^ + m->in_offset; + /*@innerbreak@*/ break; + case OPADD: + offset = (int32_t)( (p->hl[0]<<24) | (p->hl[1]<<16) | + (p->hl[2]<< 8) | (p->hl[3])) + + m->in_offset; + /*@innerbreak@*/ break; + case OPMINUS: + offset = (int32_t)( (p->hl[0]<<24) | (p->hl[1]<<16) | + (p->hl[2]<< 8) | (p->hl[3])) - + m->in_offset; + /*@innerbreak@*/ break; + case OPMULTIPLY: + offset = (int32_t)( (p->hl[0]<<24) | (p->hl[1]<<16) | + (p->hl[2]<< 8) | (p->hl[3])) * + m->in_offset; + /*@innerbreak@*/ break; + case OPDIVIDE: + offset = (int32_t)( (p->hl[0]<<24) | (p->hl[1]<<16) | + (p->hl[2]<< 8) | (p->hl[3])) / + m->in_offset; + /*@innerbreak@*/ break; + case OPMODULO: + offset = (int32_t)( (p->hl[0]<<24) | (p->hl[1]<<16) | + (p->hl[2]<< 8) | (p->hl[3])) % + m->in_offset; + /*@innerbreak@*/ break; + } + if (m->in_op & OPINVERSE) + offset = ~offset; + break; + case LELONG: + if (m->in_offset) + switch (m->in_op&0x7F) { + case OPAND: + offset = (int32_t)( (p->hl[3]<<24) | (p->hl[2]<<16) | + (p->hl[1]<< 8) | (p->hl[0])) & + m->in_offset; + /*@innerbreak@*/ break; + case OPOR: + offset = (int32_t)( (p->hl[3]<<24) | (p->hl[2]<<16) | + (p->hl[1]<< 8) | (p->hl[0])) | + m->in_offset; + /*@innerbreak@*/ break; + case OPXOR: + offset = (int32_t)( (p->hl[3]<<24) | (p->hl[2]<<16) | + (p->hl[1]<< 8) | (p->hl[0])) ^ + m->in_offset; + /*@innerbreak@*/ break; + case OPADD: + offset = (int32_t)( (p->hl[3]<<24) | (p->hl[2]<<16) | + (p->hl[1]<< 8) | (p->hl[0])) + + m->in_offset; + /*@innerbreak@*/ break; + case OPMINUS: + offset = (int32_t)( (p->hl[3]<<24) | (p->hl[2]<<16) | + (p->hl[1]<< 8) | (p->hl[0])) - + m->in_offset; + /*@innerbreak@*/ break; + case OPMULTIPLY: + offset = (int32_t)( (p->hl[3]<<24) | (p->hl[2]<<16) | + (p->hl[1]<< 8) | (p->hl[0])) * + m->in_offset; + /*@innerbreak@*/ break; + case OPDIVIDE: + offset = (int32_t)( (p->hl[3]<<24) | (p->hl[2]<<16) | + (p->hl[1]<< 8) | (p->hl[0])) / + m->in_offset; + /*@innerbreak@*/ break; + case OPMODULO: + offset = (int32_t)( (p->hl[3]<<24) | (p->hl[2]<<16) | + (p->hl[1]<< 8) | (p->hl[0])) % + m->in_offset; + /*@innerbreak@*/ break; + } + if (m->in_op & OPINVERSE) + offset = ~offset; + break; + case LONG: + if (m->in_offset) + switch (m->in_op&0x7F) { + case OPAND: + offset = p->l & m->in_offset; + /*@innerbreak@*/ break; + case OPOR: + offset = p->l | m->in_offset; + /*@innerbreak@*/ break; + case OPXOR: + offset = p->l ^ m->in_offset; + /*@innerbreak@*/ break; + case OPADD: + offset = p->l + m->in_offset; + /*@innerbreak@*/ break; + case OPMINUS: + offset = p->l - m->in_offset; + /*@innerbreak@*/ break; + case OPMULTIPLY: + offset = p->l * m->in_offset; + /*@innerbreak@*/ break; + case OPDIVIDE: + offset = p->l / m->in_offset; + /*@innerbreak@*/ break; + case OPMODULO: + offset = p->l % m->in_offset; + /*@innerbreak@*/ break; + /* case TOOMANYSWITCHBLOCKS: + * ugh = p->eye % m->strain; + * rub; + * case BEER: + * off = p->tab & m->in_gest; + * sleep; + */ + } + if (m->in_op & OPINVERSE) + offset = ~offset; + break; + } + +/*@-compmempass@*/ + if (buf == NULL || offset + sizeof(*p) > nb) + return 0; +/*@=compmempass@*/ + + memcpy(p, buf + offset, sizeof(*p)); + + if (fm->flags & FMAGIC_FLAGS_DEBUG) { + fmagicSDebug(offset, (char *) p, sizeof(*p)); + mdump(m); + } + } +/*@-compmempass@*/ + if (!fmagicSConvert(fm, m)) + return 0; + return 1; +/*@=compmempass@*/ +} +/*@=bounds@*/ + +/*@-bounds@*/ +static int +fmagicSCheck(const fmagic fm, struct magic *m) + /*@globals fileSystem @*/ + /*@modifies fileSystem @*/ +{ + union VALUETYPE * p = &fm->val; + uint32_t l = m->value.l; + uint32_t v = 0; + int matched; + + if ( (m->value.s[0] == 'x') && (m->value.s[1] == '\0') ) { + fprintf(stderr, "BOINK"); + return 1; + } + + switch (m->type) { + case BYTE: + v = p->b; + break; + + case SHORT: + case BESHORT: + case LESHORT: + v = p->h; + break; + + case LONG: + case BELONG: + case LELONG: + case DATE: + case BEDATE: + case LEDATE: + case LDATE: + case BELDATE: + case LELDATE: + v = p->l; + break; + + case STRING: + case PSTRING: + { + /* + * What we want here is: + * v = strncmp(m->value.s, p->s, m->vallen); + * but ignoring any nulls. bcmp doesn't give -/+/0 + * and isn't universally available anyway. + */ + unsigned char *a = (unsigned char*)m->value.s; + unsigned char *b = (unsigned char*)p->s; + int len = m->vallen; + l = 0; + v = 0; + if (0L == m->mask) { /* normal string: do it fast */ + while (--len >= 0) + if ((v = *b++ - *a++) != '\0') + /*@loopbreak@*/ break; + } else { /* combine the others */ + while (--len >= 0) { + if ((m->mask & STRING_IGNORE_LOWERCASE) && islower(*a)) { + if ((v = tolower(*b++) - *a++) != '\0') + /*@loopbreak@*/ break; + } else + if ((m->mask & STRING_COMPACT_BLANK) && isspace(*a)) { + a++; + if (isspace(*b++)) { + while (isspace(*b)) + b++; + } else { + v = 1; + /*@loopbreak@*/ break; + } + } else + if (isspace(*a) && (m->mask & STRING_COMPACT_OPTIONAL_BLANK)) { + a++; + while (isspace(*b)) + b++; + } else { + if ((v = *b++ - *a++) != '\0') + /*@loopbreak@*/ break; + } + } + } + break; + } + case REGEX: + { + int rc; + regex_t rx; + char errmsg[512]; + + rc = regcomp(&rx, m->value.s, REG_EXTENDED|REG_NOSUB); + if (rc) { + (void) regerror(rc, &rx, errmsg, sizeof(errmsg)); + error(EXIT_FAILURE, 0, "regex error %d, (%s)\n", rc, errmsg); + /*@notreached@*/ + } else { + rc = regexec(&rx, p->buf, 0, NULL, 0); + return !rc; + } + } + /*@notreached@*/ break; + default: + error(EXIT_FAILURE, 0, "invalid type %d in fmagicSCheck().\n", m->type); + /*@notreached@*/ + return 0; + } + + if(m->type != STRING && m->type != PSTRING) + v = signextend(m, v); + + switch (m->reln) { + case 'x': + if (fm->flags & FMAGIC_FLAGS_DEBUG) + (void) fprintf(stderr, "%u == *any* = 1\n", v); + matched = 1; + break; + + case '!': + matched = v != l; + if (fm->flags & FMAGIC_FLAGS_DEBUG) + (void) fprintf(stderr, "%u != %u = %d\n", + v, l, matched); + break; + + case '=': + matched = v == l; + if (fm->flags & FMAGIC_FLAGS_DEBUG) + (void) fprintf(stderr, "%u == %u = %d\n", + v, l, matched); + break; + + case '>': + if (m->flag & UNSIGNED) { + matched = v > l; + if (fm->flags & FMAGIC_FLAGS_DEBUG) + (void) fprintf(stderr, "%u > %u = %d\n", v, l, matched); + } + else { + matched = (int32_t) v > (int32_t) l; + if (fm->flags & FMAGIC_FLAGS_DEBUG) + (void) fprintf(stderr, "%d > %d = %d\n", v, l, matched); + } + break; + + case '<': + if (m->flag & UNSIGNED) { + matched = v < l; + if (fm->flags & FMAGIC_FLAGS_DEBUG) + (void) fprintf(stderr, "%u < %u = %d\n", v, l, matched); + } + else { + matched = (int32_t) v < (int32_t) l; + if (fm->flags & FMAGIC_FLAGS_DEBUG) + (void) fprintf(stderr, "%d < %d = %d\n", v, l, matched); + } + break; + + case '&': + matched = (v & l) == l; + if (fm->flags & FMAGIC_FLAGS_DEBUG) + (void) fprintf(stderr, "((%x & %x) == %x) = %d\n", v, l, l, matched); + break; + + case '^': + matched = (v & l) != l; + if (fm->flags & FMAGIC_FLAGS_DEBUG) + (void) fprintf(stderr, "((%x & %x) != %x) = %d\n", v, l, l, matched); + break; + + default: + matched = 0; + error(EXIT_FAILURE, 0, "fmagicSCheck: can't happen: invalid relation %d.\n", m->reln); + /*@notreached@*/ break; + } + + return matched; +} +/*@=bounds@*/ + +/* + * Go through the whole list, stopping if you find a match. Process all + * the continuations of that match before returning. + * + * We support multi-level continuations: + * + * At any time when processing a successful top-level match, there is a + * current continuation level; it represents the level of the last + * successfully matched continuation. + * + * Continuations above that level are skipped as, if we see one, it + * means that the continuation that controls them - i.e, the + * lower-level continuation preceding them - failed to match. + * + * Continuations below that level are processed as, if we see one, + * it means we've finished processing or skipping higher-level + * continuations under the control of a successful or unsuccessful + * lower-level continuation, and are now seeing the next lower-level + * continuation and should process it. The current continuation + * level reverts to the level of the one we're seeing. + * + * Continuations at the current level are processed as, if we see + * one, there's no lower-level continuation that may have failed. + * + * If a continuation matches, we bump the current continuation level + * so that higher-level continuations are processed. + */ +/*@-bounds@*/ +static int +fmagicSMatch(const fmagic fm) + /*@globals fileSystem @*/ + /*@modifies fm, fileSystem @*/ +{ + struct magic * m; + uint32_t nmagic = fm->ml->nmagic; + int cont_level = 0; + int need_separator = 0; + /*@only@*/ + static int32_t * tmpoff = NULL; + static int tmpdelta = 64; + static size_t tmplen = 0; + int32_t oldoff = 0; + int firstline = 1; /* a flag to print X\n X\n- X */ + int ret = 0; /* if a match is found it is set to 1*/ + int i; + + for (i = 0; i < nmagic; i++) { + m = &fm->ml->magic[i]; + /* if main entry matches, print it... */ + if (!fmagicSGet(fm, m) || !fmagicSCheck(fm, m)) { + /* main entry didn't match, flush its continuations */ + while ((m+1)->cont_level != 0 && ++i < nmagic) + m++; + continue; + } + + if (! firstline) { /* we found another match */ + /* put a newline and '-' to do some simple formatting */ + fmagicPrintf(fm, "\n- "); + } + + if ((cont_level+1) >= tmplen) { + tmplen += tmpdelta; + tmpoff = xrealloc(tmpoff, tmplen * sizeof(*tmpoff)); + } + tmpoff[cont_level] = fmagicSPrint(fm, m); + cont_level++; + + /* + * If we printed something, we'll need to print + * a blank before we print something else. + */ + if (m->desc[0]) + need_separator = 1; + + /* and any continuations that match */ + while ((m+1)->cont_level != 0 && ++i < nmagic) { + m++; + if (cont_level < m->cont_level) + /*@innercontinue@*/ continue; + if (cont_level > m->cont_level) { + /* We're at the end of the level "cont_level" continuations. */ + cont_level = m->cont_level; + } + if (m->flag & OFFADD) { + oldoff = m->offset; + m->offset += tmpoff[cont_level-1]; + } + if (fmagicSGet(fm, m) && fmagicSCheck(fm, m)) { + /* + * This continuation matched. + * Print its message, with a blank before it if the previous + * item printed and this item isn't empty. + */ + /* space if previous printed */ + if (need_separator + && (m->nospflag == 0) && (m->desc[0] != '\0')) + { + fmagicPrintf(fm, " "); + need_separator = 0; + } + if ((cont_level+1) >= tmplen) { + tmplen += tmpdelta; + tmpoff = xrealloc(tmpoff, tmplen * sizeof(*tmpoff)); + } + tmpoff[cont_level] = fmagicSPrint(fm, m); + cont_level++; + if (m->desc[0]) + need_separator = 1; + } + if (m->flag & OFFADD) + m->offset = oldoff; + } + firstline = 0; + ret = 1; + if (!(fm->flags & FMAGIC_FLAGS_CONTINUE)) /* don't keep searching */ + return 1; + } + return ret; /* This is hit if -k is set or there is no match */ +} +/*@=bounds@*/ + +/* + * fmagicS - lookup one file in database + * (already read from MAGIC by apprentice.c). + * Passed the name and FILE * of one file to be typed. + */ +int +fmagicS(fmagic fm) +{ +/*@-branchstate@*/ + if (fm->mlist != NULL) + for (fm->ml = fm->mlist->next; fm->ml != fm->mlist; fm->ml = fm->ml->next) { +/*@-compmempass@*/ + if (fmagicSMatch(fm)) + return 1; +/*@=compmempass@*/ + } +/*@=branchstate@*/ + +/*@-compmempass@*/ + return 0; +/*@=compmempass@*/ +} diff --git a/file/src/tar.h b/file/src/tar.h new file mode 100644 index 0000000..c0dda6a --- /dev/null +++ b/file/src/tar.h @@ -0,0 +1,165 @@ +/* + * Header file for public domain tar (tape archive) program. + * + * @(#)tar.h 1.20 86/10/29 Public Domain. + * + * Created 25 August 1985 by John Gilmore, ihnp4!hoptoad!gnu. + * + * Id: tar.h,v 1.5 1999/01/13 15:44:10 christos Exp # checkin only + */ + +#ifndef __tar_h__ +#define __tar_h__ + +/* + * Header block on tape. + * + * I'm going to use traditional DP naming conventions here. + * A "block" is a big chunk of stuff that we do I/O on. + * A "record" is a piece of info that we care about. + * Typically many "record"s fit into a "block". + */ +#define RECORDSIZE 512 +#define NAMSIZ 100 +#define TUNMLEN 32 +#define TGNMLEN 32 + +/*@-fielduse@*/ +union record { + char charptr[RECORDSIZE]; + struct header { + char name[NAMSIZ]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char chksum[8]; + char linkflag; + char linkname[NAMSIZ]; + char magic[8]; + char uname[TUNMLEN]; + char gname[TGNMLEN]; + char devmajor[8]; + char devminor[8]; + } header; +}; +/*@=fielduse@*/ + +/* The checksum field is filled with this while the checksum is computed. */ +#define CHKBLANKS " " /* 8 blanks, no null */ + +/* The magic field is filled with this if uname and gname are valid. */ +#define TARMAGIC "ustar " /* 7 chars and a null */ + +#if 0 +/* The linkflag defines the type of file */ +#define LF_OLDNORMAL '\0' /* Normal disk file, Unix compat */ +#define LF_NORMAL '0' /* Normal disk file */ +#define LF_LINK '1' /* Link to previously dumped file */ +#define LF_SYMLINK '2' /* Symbolic link */ +#define LF_CHR '3' /* Character special file */ +#define LF_BLK '4' /* Block special file */ +#define LF_DIR '5' /* Directory */ +#define LF_FIFO '6' /* FIFO special file */ +#define LF_CONTIG '7' /* Contiguous file */ +/* Further link types may be defined later. */ + +/* + * Exit codes from the "tar" program + */ +#define EX_SUCCESS 0 /* success! */ +#define EX_ARGSBAD 1 /* invalid args */ +#define EX_BADFILE 2 /* invalid filename */ +#define EX_BADARCH 3 /* bad archive */ +#define EX_SYSTEM 4 /* system gave unexpected error */ + + +/* + * Global variables + */ +extern union record *ar_block; /* Start of block of archive */ +extern union record *ar_record; /* Current record of archive */ +extern union record *ar_last; /* Last+1 record of archive block */ +extern char ar_reading; /* 0 writing, !0 reading archive */ +extern int blocking; /* Size of each block, in records */ +extern int blocksize; /* Size of each block, in bytes */ +extern char *ar_file; /* File containing archive */ +extern char *name_file; /* File containing names to work on */ +extern char *tar; /* Name of this program */ + +/* + * Flags from the command line + */ +extern char f_reblock; /* -B */ +extern char f_create; /* -c */ +extern char f_debug; /* -d */ +extern char f_sayblock; /* -D */ +extern char f_follow_links; /* -h */ +extern char f_ignorez; /* -i */ +extern char f_keep; /* -k */ +extern char f_modified; /* -m */ +extern char f_oldarch; /* -o */ +extern char f_use_protection; /* -p */ +extern char f_sorted_names; /* -s */ +extern char f_list; /* -t */ +extern char f_namefile; /* -T */ +extern char f_verbose; /* -v */ +extern char f_extract; /* -x */ +extern char f_compress; /* -z */ + +/* + * We now default to Unix Standard format rather than 4.2BSD tar format. + * The code can actually produce all three: + * f_standard ANSI standard + * f_oldarch V7 + * neither 4.2BSD + * but we don't bother, since 4.2BSD can read ANSI standard format anyway. + * The only advantage to the "neither" option is that we can cmp(1) our + * output to the output of 4.2BSD tar, for debugging. + */ +#define f_standard (!f_oldarch) + +/* + * Structure for keeping track of filenames and lists thereof. + */ +struct name { + struct name *next; + short length; + char found; + char name[NAMSIZ+1]; +}; + +extern struct name *namelist; /* Points to first name in list */ +extern struct name *namelast; /* Points to last name in list */ + +extern int archive; /* File descriptor for archive file */ +extern int errors; /* # of files in error */ + +/* + * + * Due to the next struct declaration, each routine that includes + * "tar.h" must also include . I tried to make it automatic, + * but System V has no defines in , so there is no way of + * knowing when it has been included. In addition, it cannot be included + * twice, but must be included exactly once. Argghh! + * + * Thanks, typedef. Thanks, USG. + */ +struct link { + struct link *next; + dev_t dev; + ino_t ino; + short linkcount; + char name[NAMSIZ+1]; +}; + +extern struct link *linklist; /* Points to first link in list */ + +/* + * Error recovery stuff + */ +extern char read_error_flag; +#endif + +#endif /* __tar_h__ */