2 * Copyright (C) 2008 by INdT
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * @author Andre Moreira Magalhaes <andre.magalhaes@openbossa.org>
19 * @author Gustavo Sverzut Barbieri <gustavo.barbieri@openbossa.org>
33 #define _XOPEN_SOURCE 600
34 #include <lightmediascanner_plugin.h>
35 #include <lightmediascanner_db.h>
36 #include <lightmediascanner_charset_conv.h>
37 #include <sys/types.h>
46 #define ID3V2_HEADER_SIZE 10
47 #define ID3V2_FOOTER_SIZE 10
50 ID3_ENCODING_LATIN1 = 0,
57 #define ID3_NUM_ENCODINGS ID3_ENCODING_LAST
60 #include "id3v1_genres.c"
63 struct lms_string_size title;
64 struct lms_string_size artist;
65 struct lms_string_size album;
66 struct lms_string_size genre;
68 int cur_artist_priority;
71 struct id3v2_frame_header {
73 unsigned int frame_size;
75 int data_length_indicator;
85 } __attribute__((packed));
88 struct lms_plugin plugin;
89 lms_db_audio_t *audio_db;
90 lms_charset_conv_t *cs_convs[ID3_NUM_ENCODINGS];
93 static const char _name[] = "id3";
94 static const struct lms_string_size _exts[] = {
95 LMS_STATIC_STRING_SIZE(".mp3"),
96 LMS_STATIC_STRING_SIZE(".aac")
98 static const char *_cats[] = {
103 static const char *_authors[] = {
104 "Andre Moreira Magalhaes",
105 "Gustavo Sverzut Barbieri",
110 _to_uint(const char *data, int data_size)
112 unsigned int sum = 0;
113 unsigned int last, i;
115 last = data_size > 4 ? 3 : data_size - 1;
117 for (i = 0; i <= last; i++)
118 sum |= ((unsigned char) data[i]) << ((last - i) * 8);
124 _is_id3v2_second_synch_byte(unsigned char byte)
128 if ((byte & 0xE0) == 0xE0)
136 static const char pattern[3] = "ID3";
138 unsigned int prev_part_match, prev_part_match_sync = 0;
141 if (read(fd, buffer, sizeof(buffer)) != sizeof(buffer))
144 if (memcmp(buffer, pattern, sizeof(pattern)) == 0)
147 /* This loop is the crux of the find method. There are three cases that we
148 * want to account for:
149 * (1) The previously searched buffer contained a partial match of the
150 * search pattern and we want to see if the next one starts with the
151 * remainder of that pattern.
153 * (2) The search pattern is wholly contained within the current buffer.
155 * (3) The current buffer ends with a partial match of the pattern. We will
156 * note this for use in the next iteration, where we will check for the rest
160 prev_part_match_sync = 0;
163 const char *p, *p_end;
165 /* (1) previous partial match */
166 if (prev_part_match_sync) {
167 if (_is_id3v2_second_synch_byte(buffer[0]))
169 prev_part_match_sync = 0;
172 if (prev_part_match) {
173 const int size = sizeof(buffer) - prev_part_match;
174 const char *part_pattern = pattern + prev_part_match;
176 if (memcmp(buffer, part_pattern, size) == 0)
177 return buffer_offset - prev_part_match;
182 p_end = buffer + sizeof(buffer);
183 for (p = buffer; p < p_end; p++) {
184 if (*p == pattern[0]) {
185 /* Try to match pattern, possible partial contents */
191 if (todo == 0 || memcmp(q, pattern + 1, todo) == 0) {
193 if (todo == sizeof(buffer))
194 /* (2) pattern contained in current buffer */
195 return buffer_offset;
197 /* (3) partial match */
198 prev_part_match = todo;
201 } else if ((unsigned char)*p == 0xff) {
202 /* Try to match synch pattern, possible partial contents */
207 if (_is_id3v2_second_synch_byte(*q))
208 /* (2) synch pattern contained in current buffer */
211 /* (3) partial match */
212 prev_part_match_sync = 1;
216 if (read(fd, buffer, sizeof(buffer)) != sizeof(buffer))
218 buffer_offset += sizeof(buffer);
225 _get_id3v2_frame_header_size(unsigned int version)
240 _parse_id3v2_frame_header(char *data, unsigned int version, struct id3v2_frame_header *fh)
246 memcpy(fh->frame_id, data, 3);
248 fh->frame_size = _to_uint(data + 3, 3);
250 fh->data_length_indicator = 0;
253 memcpy(fh->frame_id, data, 4);
254 fh->frame_size = _to_uint(data + 4, 4);
255 fh->compression = data[9] & 0x40;
256 fh->data_length_indicator = 0;
260 memcpy(fh->frame_id, data, 4);
261 fh->frame_size = _to_uint(data + 4, 4);
262 fh->compression = data[9] & 0x4;
263 fh->data_length_indicator = data[9] & 0x1;
269 _get_id3v2_frame_info(const char *frame_data, unsigned int frame_size, struct lms_string_size *s, lms_charset_conv_t *cs_conv, int strip)
273 if (frame_size > s->len) {
276 tmp = realloc(s->str, sizeof(char) * (frame_size + 1));
281 memcpy(s->str, frame_data, frame_size);
282 s->str[frame_size] = '\0';
285 lms_charset_conv(cs_conv, &s->str, &s->len);
287 lms_string_size_strip_and_free(s);
291 _get_id3v2_artist(unsigned int index, const char *frame_data, unsigned int frame_size, struct id3_info *info, lms_charset_conv_t *cs_conv)
293 static const unsigned char artist_priorities[] = {3, 4, 2, 1};
294 const unsigned int index_max = sizeof(artist_priorities) / sizeof(*artist_priorities);
296 if (index >= index_max)
299 if (artist_priorities[index] > info->cur_artist_priority) {
300 struct lms_string_size artist = {0};
302 _get_id3v2_frame_info(frame_data, frame_size, &artist, cs_conv, 1);
304 if (info->artist.str)
305 free(info->artist.str);
306 info->artist = artist;
307 info->cur_artist_priority = artist_priorities[index];
314 _get_id3v1_genre(unsigned int genre, struct lms_string_size *out)
316 if (genre < ID3V1_NUM_GENRES) {
317 unsigned int size, base, len;
319 base = id3v1_genres_offsets[genre];
320 size = id3v1_genres_offsets[genre + 1] - base;
323 if (len > out->len) {
324 char *p = realloc(out->str, size);
331 memcpy(out->str, id3v1_genres_mem + base, size);
339 _parse_id3v1_genre(const char *str_genre, struct lms_string_size *out)
341 return _get_id3v1_genre(atoi(str_genre), out);
345 _get_id3v2_genre(const char *frame_data, unsigned int frame_size, struct lms_string_size *out, lms_charset_conv_t *cs_conv)
348 struct lms_string_size genre = {0};
350 _get_id3v2_frame_info(frame_data, frame_size, &genre, cs_conv, 1);
354 is_number = (genre.len != 0 && genre.str[0] != '(');
356 for (i = 0; i < genre.len; ++i) {
357 if (!isdigit(genre.str[i])) {
364 if (is_number && _parse_id3v1_genre(genre.str, out) == 0) {
365 /* id3v1 genre found */
370 /* ID3v2.3 "content type" can contain a ID3v1 genre number in parenthesis at
371 * the beginning of the field. If this is all that the field contains, do a
372 * translation from that number to the name and return that. If there is a
373 * string folloing the ID3v1 genre number, that is considered to be
374 * authoritative and we return that instead. Or finally, the field may
375 * simply be free text, in which case we just return the value. */
377 if (genre.len > 1 && genre.str[0] == '(') {
378 char *closing = NULL;
380 if (genre.str[genre.len - 1] == ')') {
381 closing = strchr(genre.str, ')');
382 if (closing == genre.str + genre.len - 1) {
383 /* ) is the last character and only appears once in the
384 * string get the id3v1 genre enclosed by parentheses
386 if (_parse_id3v1_genre(genre.str + 1, out) == 0) {
393 /* get the string followed by the id3v1 genre */
395 closing = strchr(genre.str, ')');
399 out->len = genre.len - (closing - genre.str);
400 out->str = genre.str;
401 memmove(out->str, closing, out->len + 1); /* includes '\0' */
402 lms_string_size_strip_and_free(out);
412 _get_id3v2_trackno(const char *frame_data, unsigned int frame_size, struct id3_info *info, lms_charset_conv_t *cs_conv)
414 struct lms_string_size trackno = {0};
416 _get_id3v2_frame_info(frame_data, frame_size, &trackno, cs_conv, 0);
419 info->trackno = atoi(trackno.str);
424 _parse_id3v2_frame(struct id3v2_frame_header *fh, const char *frame_data, struct id3_info *info, lms_charset_conv_t **cs_convs)
426 lms_charset_conv_t *cs_conv;
427 unsigned int text_encoding, frame_size;
430 /* ignore frames which contains just the encoding */
431 if (fh->frame_size <= 1)
435 fprintf(stderr, "frame id = %.4s frame size = %d text encoding = %d\n",
436 fh->frame_id, fh->frame_size, frame_data[0]);
439 /* All used frames start with 'T' */
450 text_encoding = frame_data[0];
452 /* skip first byte - text encoding */
454 frame_size = fh->frame_size - 1;
456 if (text_encoding < ID3_NUM_ENCODINGS) {
457 if (text_encoding == ID3_ENCODING_UTF16) {
458 /* ignore frames which contains just the encoding */
462 if (memcmp(frame_data, "\xfe\xff", 2) == 0)
463 text_encoding = ID3_ENCODING_UTF16BE;
465 text_encoding = ID3_ENCODING_UTF16LE;
469 cs_conv = cs_convs[text_encoding];
473 /* ID3v2.2 used 3 bytes for the frame id, so let's check it */
474 if ((fid[1] == 'T' && fid[2] == '2') ||
475 (fid[1] == 'I' && fid[2] == 'T' && fid[3] == '2'))
476 _get_id3v2_frame_info(frame_data, frame_size, &info->title, cs_conv, 1);
477 else if (fid[1] == 'P') {
479 _get_id3v2_artist(fid[3] - '1', frame_data, frame_size,
481 else if (fid[2] >= '1' && fid[2] <= '4')
482 _get_id3v2_artist(fid[2] - '1', frame_data, frame_size,
486 else if (fid[1] == 'A' && fid[2] == 'L')
487 _get_id3v2_frame_info(frame_data, frame_size, &info->album, cs_conv, 1);
488 /* TCON (Content/Genre) */
489 else if (fid[1] == 'C' && fid[2] == 'O' && fid[3] == 'N')
490 _get_id3v2_genre(frame_data, frame_size, &info->genre, cs_conv);
491 else if (fid[1] == 'R' && (fid[2] == 'K' ||
492 (fid[2] == 'C' && fid[3] == 'K')))
493 _get_id3v2_trackno(frame_data, frame_size, info, cs_conv);
497 _parse_id3v2(int fd, long id3v2_offset, struct id3_info *info, lms_charset_conv_t **cs_convs)
499 char header_data[10], frame_header_data[10];
500 unsigned int tag_size, major_version, frame_data_pos, frame_data_length, frame_header_size;
501 int extended_header, footer_present;
502 struct id3v2_frame_header fh;
505 lseek(fd, id3v2_offset, SEEK_SET);
508 if (read(fd, header_data, ID3V2_HEADER_SIZE) != ID3V2_HEADER_SIZE)
511 tag_size = _to_uint(header_data + 6, 4);
516 major_version = header_data[3];
519 frame_data_length = tag_size;
521 /* check for extended header */
522 extended_header = header_data[5] & 0x20; /* bit 6 */
523 if (extended_header) {
524 /* skip extended header */
525 unsigned int extended_header_size;
526 char extended_header_data[4];
528 if (read(fd, extended_header_data, 4) != 4)
530 extended_header_size = _to_uint(extended_header_data, 4);
531 lseek(fd, extended_header_size - 4, SEEK_CUR);
532 frame_data_pos += extended_header_size;
533 frame_data_length -= extended_header_size;
536 footer_present = header_data[5] & 0x8; /* bit 4 */
537 if (footer_present && frame_data_length > ID3V2_FOOTER_SIZE)
538 frame_data_length -= ID3V2_FOOTER_SIZE;
540 frame_header_size = _get_id3v2_frame_header_size(major_version);
541 while (frame_data_pos < frame_data_length - frame_header_size) {
542 nread = read(fd, frame_header_data, frame_header_size);
546 if (nread != frame_header_size)
549 if (frame_header_data[0] == 0)
552 _parse_id3v2_frame_header(frame_header_data, major_version, &fh);
554 if (fh.frame_size > 0 &&
556 fh.frame_id[0] == 'T' &&
557 memcmp(fh.frame_id, "TXXX", 4) != 0) {
560 if (fh.data_length_indicator)
561 lseek(fd, 4, SEEK_CUR);
563 frame_data = malloc(sizeof(char) * fh.frame_size);
564 if (read(fd, frame_data, fh.frame_size) != fh.frame_size) {
569 _parse_id3v2_frame(&fh, frame_data, info, cs_convs);
573 if (fh.data_length_indicator)
574 lseek(fd, fh.frame_size + 4, SEEK_CUR);
576 lseek(fd, fh.frame_size, SEEK_CUR);
579 frame_data_pos += fh.frame_size + frame_header_size;
586 _id3v1_str_get(struct lms_string_size *s, const char *buf, int maxlen, lms_charset_conv_t *cs_conv)
589 const char *p, *p_end, *p_last;
593 p_end = buf + maxlen;
594 for (p = buf; *p != '\0' && p < p_end; p++) {
604 len = (p_last - buf) - start;
608 len++; /* p_last is not included yet */
612 tmp = realloc(s->str, sizeof(char) * (len + 1));
619 memcpy(s->str, buf + start, len);
623 lms_charset_conv(cs_conv, &s->str, &s->len);
627 _parse_id3v1(int fd, struct id3_info *info, lms_charset_conv_t *cs_conv)
629 struct id3v1_tag tag;
630 if (read(fd, &tag, sizeof(struct id3v1_tag)) == -1)
633 if (!info->title.str)
634 _id3v1_str_get(&info->title, tag.title, sizeof(tag.title), cs_conv);
635 if (!info->artist.str)
636 _id3v1_str_get(&info->artist, tag.artist, sizeof(tag.artist), cs_conv);
637 if (!info->album.str)
638 _id3v1_str_get(&info->album, tag.album, sizeof(tag.album), cs_conv);
639 if (!info->genre.str)
640 _get_id3v1_genre(tag.genre, &info->genre);
641 if (info->trackno == -1 &&
642 tag.comments[28] == 0 && tag.comments[29] != 0)
643 info->trackno = (unsigned char) tag.comments[29];
649 _match(struct plugin *p, const char *path, int len, int base)
653 i = lms_which_extension(path, len, _exts, LMS_ARRAY_SIZE(_exts));
657 return (void*)(i + 1);
661 _parse(struct plugin *plugin, struct lms_context *ctxt, const struct lms_file_info *finfo, void *match)
663 struct id3_info info = {{0}, {0}, {0}, {0}, -1, -1};
664 struct lms_audio_info audio_info = {0, {0}, {0}, {0}, {0}, 0, 0, 0};
668 fd = open(finfo->path, O_RDONLY);
674 id3v2_offset = _find_id3v2(fd);
675 if (id3v2_offset >= 0) {
677 fprintf(stderr, "id3v2 tag found in file %s with offset %ld\n",
678 finfo->path, id3v2_offset);
680 if (_parse_id3v2(fd, id3v2_offset, &info, plugin->cs_convs) != 0 ||
681 !info.title.str || !info.artist.str ||
682 !info.album.str || !info.genre.str ||
683 info.trackno == -1) {
685 fprintf(stderr, "id3v2 invalid in file %s\n", finfo->path);
691 if (id3v2_offset < 0) {
694 fprintf(stderr, "id3v2 tag not found in file %s. trying id3v1\n",
697 /* check for id3v1 tag */
698 if (lseek(fd, -128, SEEK_END) == -1) {
703 if (read(fd, &tag, 3) == -1) {
708 if (memcmp(tag, "TAG", 3) == 0) {
710 fprintf(stderr, "id3v1 tag found in file %s\n", finfo->path);
712 if (_parse_id3v1(fd, &info, ctxt->cs_conv) != 0) {
719 if (!info.title.str) {
721 ext_idx = ((long)match) - 1;
722 info.title.len = finfo->path_len - finfo->base - _exts[ext_idx].len;
723 info.title.str = malloc((info.title.len + 1) * sizeof(char));
724 memcpy(info.title.str, finfo->path + finfo->base, info.title.len);
725 info.title.str[info.title.len] = '\0';
726 lms_charset_conv(ctxt->cs_conv, &info.title.str, &info.title.len);
729 if (info.trackno == -1)
733 fprintf(stderr, "file %s info\n", finfo->path);
734 fprintf(stderr, "\ttitle='%s'\n", info.title.str);
735 fprintf(stderr, "\tartist='%s'\n", info.artist.str);
736 fprintf(stderr, "\talbum='%s'\n", info.album.str);
737 fprintf(stderr, "\tgenre='%s'\n", info.genre.str);
738 fprintf(stderr, "\ttrack number='%d'\n", info.trackno);
741 audio_info.id = finfo->id;
742 audio_info.title = info.title;
743 audio_info.artist = info.artist;
744 audio_info.album = info.album;
745 audio_info.genre = info.genre;
746 audio_info.trackno = info.trackno;
747 r = lms_db_audio_add(plugin->audio_db, &audio_info);
750 posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
754 free(info.title.str);
756 free(info.artist.str);
758 free(info.album.str);
760 free(info.genre.str);
766 _setup(struct plugin *plugin, struct lms_context *ctxt)
769 const char *id3_encodings[ID3_NUM_ENCODINGS] = {
777 plugin->audio_db = lms_db_audio_new(ctxt->db);
778 if (!plugin->audio_db)
781 for (i = 0; i < ID3_NUM_ENCODINGS; ++i) {
782 /* do not create charset conv for UTF-8 encoding */
783 if (!id3_encodings[i]) {
784 plugin->cs_convs[i] = NULL;
787 plugin->cs_convs[i] = lms_charset_conv_new_full(0, 0);
788 if (!plugin->cs_convs[i])
790 lms_charset_conv_add(plugin->cs_convs[i], id3_encodings[i]);
797 _start(struct plugin *plugin, struct lms_context *ctxt)
799 return lms_db_audio_start(plugin->audio_db);
803 _finish(struct plugin *plugin, struct lms_context *ctxt)
807 if (plugin->audio_db)
808 lms_db_audio_free(plugin->audio_db);
810 for (i = 0; i < ID3_NUM_ENCODINGS; ++i) {
811 if (plugin->cs_convs[i])
812 lms_charset_conv_free(plugin->cs_convs[i]);
819 _close(struct plugin *plugin)
825 API struct lms_plugin *
826 lms_plugin_open(void)
828 struct plugin *plugin;
830 plugin = (struct plugin *)malloc(sizeof(*plugin));
831 plugin->plugin.name = _name;
832 plugin->plugin.match = (lms_plugin_match_fn_t)_match;
833 plugin->plugin.parse = (lms_plugin_parse_fn_t)_parse;
834 plugin->plugin.close = (lms_plugin_close_fn_t)_close;
835 plugin->plugin.setup = (lms_plugin_setup_fn_t)_setup;
836 plugin->plugin.start = (lms_plugin_start_fn_t)_start;
837 plugin->plugin.finish = (lms_plugin_finish_fn_t)_finish;
839 return (struct lms_plugin *)plugin;
842 API struct lms_plugin_info *
843 lms_plugin_info(void)
845 static struct lms_plugin_info info = {
848 "ID3 v1 and v2 for mp3 files",
851 "http://lms.garage.maemo.org"