2 * Copyright (C) 2007 by INdT
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * @author Gustavo Sverzut Barbieri <gustavo.barbieri@openbossa.org>
24 * Reads EXIF tags from images.
26 * @todo: get GPS data.
27 * @todo: check if worth using mmap().
34 #define _XOPEN_SOURCE 600
35 #include <lightmediascanner_plugin.h>
36 #include <lightmediascanner_utils.h>
37 #include <lightmediascanner_db.h>
38 #include <sys/types.h>
49 JPEG_MARKER_SOI = 0xd8,
50 JPEG_MARKER_JFIF = 0xe0,
51 JPEG_MARKER_EXIF = 0xe1,
52 JPEG_MARKER_COMM = 0xfe,
53 JPEG_MARKER_SOF0 = 0xc0,
54 JPEG_MARKER_SOS = 0xda
58 * Process SOF JPEG, this contains width and height.
61 _jpeg_sof_process(int fd, unsigned short *width, unsigned short *height)
65 if (read(fd, buf, 6) != 6) {
66 perror("could not read() SOF data");
70 *height = (buf[1] << 8) | buf[2];
71 *width = (buf[3] << 8) | buf[4];
77 * Process COM JPEG, this contains user comment.
80 _jpeg_com_process(int fd, int len, struct lms_string_size *comment)
88 comment->str = malloc(len + 1);
93 if (read(fd, comment->str, len) != len) {
100 if (comment->str[len - 1] == '\0')
103 comment->str[len] = '\0';
106 lms_strstrip(comment->str, &comment->len);
107 if (comment->len == 0) {
116 * Walk JPEG markers in order to get useful information.
119 _jpeg_info_get(int fd, int len, struct lms_image_info *info)
121 unsigned char buf[4];
125 found = info->title.str ? 1 : 0;
126 offset = lseek(fd, len - 2, SEEK_CUR);
129 offset = lseek(fd, offset + len, SEEK_SET);
135 if (read(fd, buf, 4) != 4) {
140 len = ((buf[2] << 8) | buf[3]) - 2;
142 if (buf[0] != 0xff) {
143 fprintf(stderr, "ERROR: expected 0xff marker, got %#x\n", buf[0]);
147 if (buf[1] == JPEG_MARKER_SOF0) {
148 if (_jpeg_sof_process(fd, &info->width, &info->height) != 0)
151 } else if (buf[1] == JPEG_MARKER_COMM && !info->title.str) {
152 if (_jpeg_com_process(fd, len, &info->title) != 0)
155 } else if (buf[1] == JPEG_MARKER_SOS)
158 len += 4; /* add read size */
165 * Read JPEG file start (0xffd8 marker) and return the next
166 * marker type and its length.
169 _jpeg_data_get(int fd, int *type, int *len)
171 unsigned char buf[6];
173 if (lseek(fd, 0, SEEK_SET) != 0) {
178 if (read(fd, buf, 6) != 6) {
183 if (buf[0] != 0xff || buf[1] != JPEG_MARKER_SOI || buf[2] != 0xff) {
184 fprintf(stderr, "ERROR: not JPEG file (magic=%#x %#x %#x)\n",
185 buf[0], buf[1], buf[2]);
190 *len = (buf[4] << 8) | buf[5];
195 #define LE_4BYTE(a) ((a)[0] | ((a)[1] << 8) | ((a)[2] << 16) | ((a)[3] << 24))
196 #define BE_4BYTE(a) (((a)[0] << 24) | ((a)[1] << 16) | ((a)[2] << 8) | (a)[3])
198 #define LE_2BYTE(a) ((a)[0] | ((a)[1] << 8))
199 #define BE_2BYTE(a) (((a)[0] << 8) | (a)[1])
201 #define E_2BTYE(little_endian, a) ((little_endian) ? LE_2BYTE(a) : BE_2BYTE(a))
202 #define E_4BTYE(little_endian, a) ((little_endian) ? LE_4BYTE(a) : BE_4BYTE(a))
205 EXIF_TYPE_BYTE = 1, /* 8 bit unsigned */
206 EXIF_TYPE_ASCII = 2, /* 8 bit byte with 7-bit ASCII code, NULL terminated */
207 EXIF_TYPE_SHORT = 3, /* 2-byte unsigned integer */
208 EXIF_TYPE_LONG = 4, /* 4-byte unsigned integer */
209 EXIF_TYPE_RATIONAL = 5, /* 2 4-byte unsigned integer, 1st = numerator */
210 EXIF_TYPE_UNDEFINED = 7, /* 8-bit byte */
211 EXIF_TYPE_SLONG = 9, /* 4-byte signed integer (2'complement) */
212 EXIF_TYPE_SRATIONAL = 10 /* 2 4-byte signed integer, 1st = numerator */
216 EXIF_TAG_ORIENTATION = 0x0112,
217 EXIF_TAG_ARTIST = 0x013b,
218 EXIF_TAG_USER_COMMENT = 0x9286,
219 EXIF_TAG_IMAGE_DESCRIPTION = 0x010e,
220 EXIF_TAG_DATE_TIME = 0x0132,
221 EXIF_TAG_DATE_TIME_ORIGINAL = 0x9003,
222 EXIF_TAG_DATE_TIME_DIGITIZED = 0x9004,
223 EXIF_TAG_EXIF_IFD_POINTER = 0x8769
235 * Read IFD from stream.
238 _exif_ifd_get(int fd, int little_endian, struct exif_ifd *ifd)
240 unsigned char buf[12];
242 if (read(fd, buf, 12) != 12) {
248 ifd->tag = LE_2BYTE(buf);
249 ifd->type = LE_2BYTE(buf + 2);
250 ifd->count = LE_4BYTE(buf + 4);
251 ifd->offset = LE_4BYTE(buf + 8);
253 ifd->tag = BE_2BYTE(buf);
254 ifd->type = BE_2BYTE(buf + 2);
255 ifd->count = BE_4BYTE(buf + 4);
256 ifd->offset = BE_4BYTE(buf + 8);
262 * Get non-exif data based on Exif tag offset.
264 * This will setup the file description position and call _jpeg_info_get().
267 _exif_extra_get(int fd, int abs_offset, int len, struct lms_image_info *info)
269 if (lseek(fd, abs_offset, SEEK_SET) == -1) {
274 if (_jpeg_info_get(fd, len, info) != 0) {
275 fprintf(stderr, "ERROR: could not get image size.\n");
282 _exif_text_encoding_get(int fd, unsigned int count, int offset, struct lms_string_size *s)
287 count -= 8; /* XXX don't just ignore character code, handle it. */
290 if (lseek(fd, offset, SEEK_SET) == -1) {
295 s->str = malloc(count + 1);
297 if (read(fd, s->str, count) != count) {
304 s->str[count] = '\0';
307 lms_strstrip(s->str, &s->len);
317 _exif_text_ascii_get(int fd, unsigned int count, int offset, struct lms_string_size *s)
325 if (lseek(fd, offset, SEEK_SET) == -1) {
330 s->str = malloc(count);
332 if (read(fd, s->str, count) != count) {
339 s->str[count - 1] = '\0';
342 lms_strstrip(s->str, &s->len);
352 _exif_datetime_get(int fd, int offset)
357 if (lseek(fd, offset, SEEK_SET) == -1) {
362 if (read(fd, buf, 20) != 20) {
368 if (strptime(buf, "%Y:%m:%d %H:%M:%S", &tm)) {
374 static int _exif_private_ifd_get(int fd, int base_offset, int offset, int little_endian, struct lms_image_info *info);
377 * Process IFD contents.
380 _exif_ifd_process(int fd, int count, int ifd_offset, int tiff_base, int little_endian, struct lms_image_info *info)
382 int i, torig, tdig, tlast;
384 torig = tdig = tlast = 0;
386 for (i = 0; i < count; i++) {
389 lseek(fd, ifd_offset + i * 12, SEEK_SET);
390 if (_exif_ifd_get(fd, little_endian, &ifd) != 0) {
391 fprintf(stderr, "ERROR: could not read Exif IFD.\n");
396 case EXIF_TAG_ORIENTATION:
397 info->orientation = ifd.offset >> 16;
399 case EXIF_TAG_ARTIST:
400 if (!info->artist.str)
401 _exif_text_ascii_get(fd, ifd.count, tiff_base + ifd.offset,
404 case EXIF_TAG_USER_COMMENT:
405 if (!info->title.str)
406 _exif_text_encoding_get(fd, ifd.count, tiff_base + ifd.offset,
409 case EXIF_TAG_IMAGE_DESCRIPTION:
410 if (!info->title.str)
411 _exif_text_ascii_get(fd, ifd.count, tiff_base + ifd.offset,
414 case EXIF_TAG_DATE_TIME:
415 if (torig == 0 && info->date == 0)
416 tlast = _exif_datetime_get(fd, tiff_base + ifd.offset);
418 case EXIF_TAG_DATE_TIME_ORIGINAL:
419 if (torig == 0 && info->date == 0)
420 torig = _exif_datetime_get(fd, tiff_base + ifd.offset);
422 case EXIF_TAG_DATE_TIME_DIGITIZED:
423 if (torig == 0 && info->date == 0)
424 tdig = _exif_datetime_get(fd, tiff_base + ifd.offset);
426 case EXIF_TAG_EXIF_IFD_POINTER:
427 if (ifd.count == 1 && ifd.type == EXIF_TYPE_LONG)
428 _exif_private_ifd_get(fd, ifd.offset, tiff_base,
429 little_endian, info);
437 if (info->date == 0) {
450 * Process Exif IFD (Exif Private Tag), with more specific info.
453 _exif_private_ifd_get(int fd, int ifd_offset, int tiff_base, int little_endian, struct lms_image_info *info)
458 if (lseek(fd, tiff_base + ifd_offset, SEEK_SET) == -1) {
463 if (read(fd, buf, 2) != 2) {
468 count = E_2BTYE(little_endian, buf);
469 return _exif_ifd_process(fd, count, ifd_offset + 2, tiff_base,
470 little_endian, info);
474 * Process file as it being Exif, will extract Exif as well as other
475 * JPEG markers (comment, size).
478 _exif_data_get(int fd, int len, struct lms_image_info *info)
480 const unsigned char exif_hdr[6] = "Exif\0";
481 unsigned char buf[8];
482 unsigned int little_endian, offset, count;
483 off_t abs_offset, tiff_base;
485 abs_offset = lseek(fd, 0, SEEK_CUR);
486 if (abs_offset == -1) {
491 if (read(fd, buf, 6) != 6) {
496 memset(info, 0, sizeof(*info));
497 info->orientation = 1;
499 if (memcmp(buf, exif_hdr, 6) != 0)
500 return _exif_extra_get(fd, abs_offset, len, info);
502 if (read(fd, buf, 8) != 8) {
507 if (buf[0] == 'I' && buf[1] == 'I') {
509 offset = LE_4BYTE(buf + 4);
510 } else if (buf[0] == 'M' && buf[1] == 'M') {
512 offset = BE_4BYTE(buf + 4);
514 fprintf(stderr, "ERROR: undefined byte sex \"%2.2s\".\n", buf);
519 if (offset > 0 && lseek(fd, offset, SEEK_CUR) == -1) {
524 tiff_base = abs_offset + 6; /* offsets are relative to TIFF base */
526 if (read(fd, buf, 2) != 2) {
530 count = E_2BTYE(little_endian, buf);
532 _exif_ifd_process(fd, count, tiff_base + 8 + 2, tiff_base,
533 little_endian, info);
535 return _exif_extra_get(fd, abs_offset, len, info);
539 * Process file as it being JFIF
542 _jfif_data_get(int fd, int len, struct lms_image_info *info)
544 unsigned char buf[4];
546 memset(info, 0, sizeof(*info));
547 info->orientation = 1;
549 /* JFIF provides no useful information, try to find out Exif */
550 if (lseek(fd, len - 2, SEEK_CUR) == -1) {
555 if (read(fd, buf, 4) != 4) {
560 len = ((buf[2] << 8) | buf[3]);
561 if (buf[0] != 0xff) {
562 fprintf(stderr, "ERROR: expected 0xff marker, got %#x\n", buf[0]);
566 if (buf[1] == JPEG_MARKER_EXIF)
567 return _exif_data_get(fd, len, info);
569 return _jpeg_info_get(fd, len, info);
572 static const char _name[] = "jpeg";
573 static const struct lms_string_size _exts[] = {
574 LMS_STATIC_STRING_SIZE(".jpg"),
575 LMS_STATIC_STRING_SIZE(".jpeg"),
576 LMS_STATIC_STRING_SIZE(".jpe")
580 struct lms_plugin plugin;
581 lms_db_image_t *img_db;
585 _match(struct plugin *p, const char *path, int len, int base)
589 i = lms_which_extension(path, len, _exts, LMS_ARRAY_SIZE(_exts));
593 return (void*)(i + 1);
597 _parse(struct plugin *plugin, struct lms_context *ctxt, const struct lms_file_info *finfo, void *match)
599 struct lms_image_info info = {0};
600 int fd, type, len, r;
602 fd = open(finfo->path, O_RDONLY);
608 if (_jpeg_data_get(fd, &type, &len) != 0) {
613 if (type == JPEG_MARKER_EXIF) {
614 if (_exif_data_get(fd, len, &info) != 0) {
615 fprintf(stderr, "ERROR: could not get EXIF info (%s).\n",
620 } else if (type == JPEG_MARKER_JFIF) {
621 if (_jfif_data_get(fd, len, &info) != 0) {
622 fprintf(stderr, "ERROR: could not get JPEG size (%s).\n",
628 fprintf(stderr, "ERROR: unsupported JPEG marker %#x (%s)\n", type,
635 info.date = finfo->mtime;
637 if (!info.title.str) {
640 ext_idx = ((int)match) - 1;
641 info.title.len = finfo->path_len - finfo->base - _exts[ext_idx].len;
642 info.title.str = malloc((info.title.len + 1) * sizeof(char));
643 memcpy(info.title.str, finfo->path + finfo->base, info.title.len);
644 info.title.str[info.title.len] = '\0';
648 lms_charset_conv(ctxt->cs_conv, &info.title.str, &info.title.len);
650 lms_charset_conv(ctxt->cs_conv, &info.artist.str, &info.artist.len);
653 r = lms_db_image_add(plugin->img_db, &info);
657 free(info.title.str);
659 free(info.artist.str);
661 posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
668 _setup(struct plugin *plugin, struct lms_context *ctxt)
670 plugin->img_db = lms_db_image_new(ctxt->db);
678 _start(struct plugin *plugin, struct lms_context *ctxt)
680 return lms_db_image_start(plugin->img_db);
684 _finish(struct plugin *plugin, struct lms_context *ctxt)
687 return lms_db_image_free(plugin->img_db);
694 _close(struct plugin *plugin)
700 API struct lms_plugin *
701 lms_plugin_open(void)
703 struct plugin *plugin;
705 plugin = malloc(sizeof(*plugin));
706 plugin->plugin.name = _name;
707 plugin->plugin.match = (lms_plugin_match_fn_t)_match;
708 plugin->plugin.parse = (lms_plugin_parse_fn_t)_parse;
709 plugin->plugin.close = (lms_plugin_close_fn_t)_close;
710 plugin->plugin.setup = (lms_plugin_setup_fn_t)_setup;
711 plugin->plugin.start = (lms_plugin_start_fn_t)_start;
712 plugin->plugin.finish = (lms_plugin_finish_fn_t)_finish;
714 return (struct lms_plugin *)plugin;