2 jargrep.c - main functions for jargrep utility
3 Copyright (C) 1999 Bryan Burns
4 Copyright (C) 2000 Cory Hollingsworth
6 Parts of this program are base on Bryan Burns work with fastjar
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License
11 as published by the Free Software Foundation; either version 2
12 of the License, or (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 /* $Id: jargrep.c,v 1.8 2000/09/13 14:02:02 cory Exp $
27 Revision 1.8 2000/09/13 14:02:02 cory
28 Reformatted some of the code to more closly match the layout of the orriginal
31 Revision 1.7 2000/09/12 22:29:36 cory
32 Jargrep now seems to do what I want it to do. Performs properly on Linux x86,
33 will test some other platforms later.
44 #include <sys/types.h>
52 char *Usage = { "Usage: %s [-bcinsw] <-e regexp | regexp> file(s)\n" };
57 Function name: opt_valid
58 arg: options Bitfield flag that contains the command line options of grepjar.
59 purpose: To guard agains the occurance of certain incompatible flags being used
61 returns: TRUE if options are valid, FALSE otherwise.
64 int opt_valid(int options) {
67 if((options & JG_PRINT_COUNT) &&
68 (options & (JG_PRINT_BYTEOFFSET | JG_PRINT_LINE_NUMBER)))
78 Function name: create_regexp
79 args: regstr String containing the uncompiled regular expression. This may be the
80 expression as is passed in through argv.
81 options This is the flag containing the commandline options that have been
83 purpose: Handle the exception handling involved with setting upt a new regular
85 returns: Newly allocated compile regular expression ready to be used in an regexec call.
88 regex_t *create_regexp(char *regstr, int options) {
95 if(exp = (regex_t *) malloc(sizeof(regex_t)))
97 if(errcode = regcomp(exp, regstr, (options & JG_IGNORE_CASE) ? REG_ICASE : 0)) {
98 fprintf(stderr, "regcomp of regex failed,\n");
99 if(errmsg = (char *) malloc(msgsize = regerror(errcode, exp, NULL, 0) + 1)) {
100 regerror(errcode, exp, errmsg, msgsize);
101 fprintf(stderr, "Error: %s\n", errmsg);
107 fprintf(stderr, "Malloc of errmsg failed.\n");
108 fprintf(stderr, "Error: %s\n", strerror(errno));
115 fprintf(stderr, "Malloc of regex failed,\n");
116 fprintf(stderr, "Error: %s\n", strerror(errno));
124 Function name: check_sig
125 args: scratch Pointer to array of bytes containing signature.
126 pbf Pointer to push back handle for jar file.
127 purpose: Verify that checksum is correct.
128 returns: 0, 1, or 2. 0 means we are ready to read embedded file information. 1 means
129 we have read beyound the embedded file list and can exit knowing we have read all the
130 relevent information. 2 means we still haven't reached embdedded file list and need to
131 do some more reading.
133 int check_sig(ub1 *scratch, pb_file *pbfp) {
137 signature = UNPACK_UB4(scratch, 0);
140 printf("signature is %x\n", signature);
142 if(signature == 0x08074b50){
144 printf("skipping data descriptor\n");
146 pb_read(pbfp, scratch, 12);
148 } else if(signature == 0x02014b50){
150 printf("Central header reached.. we're all done!\n");
153 }else if(signature != 0x04034b50){
154 printf("Ick! %#x\n", signature);
162 Function name: decd_siz
163 args csize Pointer to embedded file's compressed size.
164 usize Pointer to embedded file's uncmpressed size.
165 fnlen Pointer to embedded file's file name length.
166 elfen Pointer to length of extra fields in jar file.
167 flags Pointer to bitmapped flags.
168 method Pointer to indicator of storage method of embedded file.
169 file_header Pointer to string containing the above values to be unbacked.
170 Purpose: Unpack the series of values from file_header.
173 void decd_siz(ub4 *csize, ub4 *usize, ub2 *fnlen, ub2 *eflen, ub2 *flags, ub2 *method, ub1 *file_header) {
174 *csize = UNPACK_UB4(file_header, LOC_CSIZE);
176 printf("Compressed size is %u\n", *csize);
179 *usize = UNPACK_UB4(file_header, LOC_USIZE);
181 printf("Uncompressed size is %u\n", *usize);
184 *fnlen = UNPACK_UB2(file_header, LOC_FNLEN);
186 printf("Filename length is %hu\n", *fnlen);
189 *eflen = UNPACK_UB2(file_header, LOC_EFLEN);
191 printf("Extra field length is %hu\n", *eflen);
194 *flags = UNPACK_UB2(file_header, LOC_EXTRA);
196 printf("Flags are %#hx\n", *flags);
199 *method = UNPACK_UB2(file_header, LOC_COMP);
201 printf("Compression method is %#hx\n", *method);
207 Function name: new_filename
208 args: pbf Pointer to push back file handle. Used for reading input file.
209 len Length of file name to be read.
210 purpose: Read in the embedded file name from jar file.
211 returns: Pointer to newly allocated string containing file name.
214 char *new_filename(pb_file *pbf, ub4 len) {
217 if(!(filename = (char *) malloc(len + 1))) {
218 fprintf(stderr, "Malloc failed of filename\n");
219 fprintf(stderr, "Error: %s\n", strerror(errno));
221 pb_read(pbf, filename, len);
222 filename[len] = '\0';
225 printf("filename is %s\n", filename);
232 Funtion name: read_string
233 args: pbf Pointer to push back file handle. Used for reading input file.
234 size Size of embedded file in bytes.
235 purpose: Create a string containing the contents of the embedded noncompressed file.
236 returns: Pointer to newly allocated string containing embedded file contents.
239 char *read_string(pb_file *pbf, int size) {
242 if(page = (char *) malloc(size + 1)) {
243 pb_read(pbf, page, size);
247 fprintf(stderr, "Malloc of page buffer failed.\n");
248 fprintf(stderr, "Error: %s\n", strerror(errno));
256 Function name: extract_line
257 args: stream String containing the full contents of a file which is to be substringed
258 in order to provide line representing our grep output.
259 begin Index into stream which regular expression first matches.
260 end Index into stream which end of match to the regular expression.
261 b Pointer to the index of what will be the beginning of the line when
262 string is returned. Used for -b option.
263 purpose: Create a string that can be printed by jargrep from the long string stream.
264 The matching line that is printed out by jargrep is generated by this function.
265 returns: Pointer to newly allocated string containing matched expression.
268 char *extract_line(char *stream, regoff_t begin, regoff_t end, int *b) {
273 for(*b = begin; *b >= 0 && !iscntrl(stream[*b]); (*b)--);
275 for(e = end; stream[e] == '\t' || !iscntrl(stream[e]); e++);
277 if(retstr = (char *) malloc(length + 1)) {
278 sprintf(retstr, "%d:", *b);
279 strncpy(retstr, &(stream[*b]), length);
280 retstr[length] = '\0';
283 fprintf(stderr, "Malloc failed of output string.\n");
284 fprintf(stderr, "Error: %s\n", strerror(errno));
292 Function name: chk_wrd
293 args: exp Pointer to compiled POSIX style regular expression of search target.
294 str String known to contain at least one match of exp.
295 purpose: Verify that the occurance of the regular expression in str occurs as a whole
296 word and not a substring of another word.
297 returns: TRUE if it is a word, FALSE of it is a substring.
300 int chk_wrd(regex_t *exp, char *str) {
309 frnt_ok = bck_ok = FALSE;
310 while(!wrd_fnd && !(regflag = regexec(exp, str2, 1, &match, 0))) {
311 if(!match.rm_so && (str2 == str)) frnt_ok = TRUE;
312 else if(!isalnum(str2[match.rm_so - 1]) && str2[match.rm_so - 1] != '_')
314 else frnt_ok = FALSE;
316 if(str2[match.rm_eo] == '\0') bck_ok = TRUE;
317 else if(!isalnum(str2[match.rm_eo]) && str2[match.rm_eo] != '_')
321 wrd_fnd = frnt_ok && bck_ok;
322 str2 = &(str2[match.rm_eo]);
329 Function name: prnt_mtchs
330 args: exp Pointer to compiled POSIX style regular expression of search target.
331 filename String containing the name of the embedded file which matches have
333 stream String containing the processed contents of the embedded jar file
334 represended with filename.
335 pmatch Array of regmatch_t matches into stream.
336 nl_offset Array of offsets of '\n' characters in stream. May be NULL if -n is
337 not set on command line.
338 num Number of matches in pmatch array.
339 lines Number of lines in file. Not set if -n is not set on command line.
340 options Bitwise flag containing flags set to represent the command line
342 purpose: Control output of jargrep. Output is controlled by which options have been
343 set at the command line.
346 void prnt_mtchs(regex_t *exp, char *filename, char *stream, regmatch_t *pmatch, regmatch_t *nl_offset, int num, int lines, int options) {
356 for(i = 0; i < num; i++) {
357 str = extract_line(stream, pmatch[i].rm_so, pmatch[i].rm_eo, &begin);
358 if(begin > o_begin) {
359 if(!(options & JG_WORD_EXPRESSIONS) || chk_wrd(exp, str)) {
361 if(!(options & JG_PRINT_COUNT)) {
362 printf("%s:", filename);
363 if(options & JG_PRINT_LINE_NUMBER) {
364 for(; j < lines && nl_offset[j].rm_so < begin; j++);
365 printf("%d:", j + 1);
367 if(options & JG_PRINT_BYTEOFFSET) printf("%d:", begin);
375 if(options & JG_PRINT_COUNT) printf("%s:%d\n", filename, ln_cnt);
379 Function name: check_crc
380 args: pbf Pointer to pushback file pointer for jar file.
381 stream String containing the non modified contents fo the extraced file entry.
382 usize Size of file in bytes.
383 purpose: Verify the CRC matches that as what is stored in the jar file.
386 void check_crc(pb_file *pbf, char *stream, ub4 usize) {
391 crc = crc32(crc, NULL, 0);
392 crc = crc32(crc, stream, usize);
393 if(pb_read(pbf, scratch, 16) != 16) {
397 if(UNPACK_UB4(scratch, 0) != 0x08074b50) {
398 fprintf(stderr, "Error! Missing data descriptor!\n");
401 lcrc = UNPACK_UB4(scratch, 4);
403 fprintf(stderr, "Error! CRCs do not match! Got %x, expected %x\n",
410 Function name mk_ascii
411 args: stream String that contains the contents of the extraced file entry.
413 purpose: Make certain that the contents of the file are ASCII, not binary. This
414 permits grepping of binary files as well by converting non ASCII and control characters
418 void mk_ascii(char *stream, int usize) {
421 for(i = 0; i < usize; i++)
422 if(stream[i] != '\t' && (iscntrl(stream[i]) || (unsigned char) stream[i] >= 128))
427 Funtion name: fnd_match
428 args: exp Pointer to compiled POSIX style regular expression of search target.
429 str_stream String that contains the contents of the extracted file entry.
430 i Pointer to counter and index of matches.
431 purpose: Search str_stream for occurances of the regular expression exp and create
433 returns: Pointer to newly allocated array of regmatch_t which gives indexes to start
434 and end of matches. NULL is returned upon no matches found.
437 regmatch_t *fnd_match(regex_t *exp, char *str_stream, int *i) {
440 regmatch_t *match_array;
444 for(*i = 0, regflag = regexec(exp, str_stream, 1, &match, 0); !regflag;
445 regflag = regexec(exp, &(str_stream[match.rm_eo]), 1, &match, 0), (*i)++)
447 if(tmp = (regmatch_t *)
448 realloc(match_array, sizeof(regmatch_t) * ((*i) + 1)))
452 match.rm_so += match_array[(*i) - 1].rm_eo;
453 match.rm_eo += match_array[(*i) - 1].rm_eo;
455 match_array[*i] = match;
458 fprintf(stderr, "Realloc of match_array failed.\n");
459 fprintf(stderr, "Error: %s\n", strerror(errno));
468 Function name: cont_grep
469 args: exp Pointer to compiled POSIX style regular expression of search target.
470 nl_exp Pointer to compiled POSIX style regular expression of newlines. This
471 argument is NULL unless the -n option is used on the command line.
472 fd File descriptor of the jar file being grepped.
473 pbf Pointer to pushback file style file stream. This is for use with
474 the pushback.c file io funtions.
475 options Bitwise flag containing flags set to represent the command line options.
476 purpose: This function handles single entries in an open jar file. The header is
477 read and then the embeded file is extracted and grepped.
478 returns: FALSE upon failure, TRUE otherwise.
481 int cont_grep(regex_t *exp, regex_t *nl_exp, int fd, char *jarfile, pb_file *pbf, int options) {
494 regmatch_t *match_array;
495 regmatch_t *nl_offsets;
497 if(pb_read(pbf, (file_header + 4), 26) != 26) {
502 decd_siz(&csize, &usize, &fnlen, &eflen, &flags, &method, file_header);
503 filename = new_filename(pbf, fnlen);
504 lseek(fd, eflen, SEEK_CUR);
505 if(filename[fnlen - 1] != '/') {
506 str_stream = (method == 8 || (flags & 0x0008)) ?
507 (char *) inflate_string(pbf, &csize, &usize) :
508 read_string(pbf, csize);
509 if(flags & 0x008) check_crc(pbf, str_stream, usize);
510 mk_ascii(str_stream, usize);
511 match_array = fnd_match(exp, str_stream, &i);
512 if((options & JG_PRINT_LINE_NUMBER) && i)
513 nl_offsets = fnd_match(nl_exp, str_stream, &j);
514 prnt_mtchs(exp, filename, str_stream, match_array, nl_offsets, i, j, options);
515 if(match_array) free(match_array);
526 Funtion name: jargrep
527 args: exp Pointer to compiled POSIX style regular expression of search target.
528 nl_exp Pointer to compiled regular expression for newlines or NULL. Only set
529 if -n option is present at command line.
530 jarfile Filename of jar file to be searched.
531 options Bitwise flag containing flags set to represent the command line options.
532 purpose: Open jar file. Check signatures. When right signature is found go to deeper
536 void jargrep(regex_t *exp, regex_t *nl_exp, char *jarfile, int options) {
542 if((fd = open(jarfile, O_RDONLY)) == -1) {
543 if(!(options & JG_SUPRESS_ERROR))
544 fprintf(stderr, "Error reading file '%s': %s\n", jarfile, strerror(errno));
550 if(pb_read(&pbf, scratch, 4) != 4) {
555 switch (check_sig(scratch, &pbf)) {
557 floop = cont_grep(exp, nl_exp, fd, jarfile, &pbf, options);
563 /* fall through continue */
572 args: argc number of in coming args.
573 argv array of strings.
574 purpose: Entry point of the program. Parse command line arguments and set options.
575 Set up regular expressions. Call grep routines for each file as input.
576 returns: 1 on error 0 on success.
579 int main(int argc, char **argv) {
585 regex_t *nl_exp = NULL;
586 char *regexpstr = NULL;
588 while((c = getopt(argc, argv, "bce:insVw")) != -1) {
591 options |= JG_PRINT_BYTEOFFSET;
594 options |= JG_PRINT_COUNT;
597 if(!(regexpstr = (char *) malloc(strlen(optarg) + 1))) {
598 fprintf(stderr, "Malloc failure.\n");
599 fprintf(stderr, "Error: %s\n", strerror(errno));
602 strcpy(regexpstr, optarg);
605 options |= JG_IGNORE_CASE;
608 options |= JG_PRINT_LINE_NUMBER;
611 options |= JG_SUPRESS_ERROR;
614 options |= JG_INVERT;
617 printf("%s\n", GVERSION);
620 options |= JG_WORD_EXPRESSIONS;
623 fprintf(stderr, "Unknown option -%c\n", c);
624 fprintf(stderr, Usage, argv[0]);
629 if(((argc - optind) >= 2)) {
630 regexpstr = argv[optind];
631 fileindex = optind + 1;
634 fprintf(stderr, "Invalid arguments.\n");
635 fprintf(stderr, Usage, argv[0]);
639 else if((argc - optind) == 1) {
643 fprintf(stderr, "Invalid arguments.\n");
644 fprintf(stderr, Usage, argv[0]);
648 if(opt_valid(options)) {
649 regexp = create_regexp(regexpstr, options);
650 if(options & JG_PRINT_LINE_NUMBER) nl_exp = create_regexp("\n", 0);
652 for(; fileindex < argc; fileindex++)
653 jargrep(regexp, nl_exp, argv[fileindex], options);
655 if(options & JG_PRINT_LINE_NUMBER) regfree(nl_exp);
659 fprintf(stderr, "Error: Invalid combination of options.\n");