2 * Copyright (C) 2008 by INdT
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * @author Andre Moreira Magalhaes <andre.magalhaes@openbossa.org>
32 #define _XOPEN_SOURCE 600
33 #include <lightmediascanner_plugin.h>
34 #include <lightmediascanner_db.h>
35 #include <lightmediascanner_charset_conv.h>
36 #include <sys/types.h>
45 #define ID3V2_HEADER_SIZE 10
46 #define ID3V2_FOOTER_SIZE 10
49 ID3_ENCODING_LATIN1 = 0,
56 #define ID3_NUM_ENCODINGS ID3_ENCODING_LAST
59 #include "id3v1_genres.c"
62 struct lms_string_size title;
63 struct lms_string_size artist;
64 struct lms_string_size album;
65 struct lms_string_size genre;
66 unsigned char trackno;
67 int cur_artist_priority;
70 struct id3v2_frame_header {
72 unsigned int frame_size;
74 int data_length_indicator;
84 } __attribute__((packed));
87 struct lms_plugin plugin;
88 lms_db_audio_t *audio_db;
89 lms_charset_conv_t *cs_convs[ID3_NUM_ENCODINGS];
92 static const char _name[] = "id3";
93 static const struct lms_string_size _exts[] = {
94 LMS_STATIC_STRING_SIZE(".mp3"),
95 LMS_STATIC_STRING_SIZE(".aac")
99 _to_uint(const char *data, int data_size)
101 unsigned int sum = 0;
102 unsigned int last, i;
104 last = data_size > 4 ? 3 : data_size - 1;
106 for (i = 0; i <= last; i++)
107 sum |= ((unsigned char) data[i]) << ((last - i) * 8);
113 _is_id3v2_second_synch_byte(unsigned char byte)
117 if ((byte & 0xE0) == 0xE0)
125 long buffer_offset = 0;
127 int buffer_size = sizeof(buffer);
128 const char pattern[] = "ID3";
131 /* These variables are used to keep track of a partial match that happens at
132 * the end of a buffer. */
133 int previous_partial_match = -1;
134 int previous_partial_synch_match = 0;
135 int first_synch_byte;
137 /* Start the search at the beginning of the file. */
138 lseek(fd, 0, SEEK_SET);
140 if ((nread = read(fd, &buffer, buffer_size)) != buffer_size)
143 /* check if pattern is in the beggining of the file */
144 if (memcmp(buffer, pattern, 3) == 0)
147 /* This loop is the crux of the find method. There are three cases that we
148 * want to account for:
149 * (1) The previously searched buffer contained a partial match of the
150 * search pattern and we want to see if the next one starts with the
151 * remainder of that pattern.
153 * (2) The search pattern is wholly contained within the current buffer.
155 * (3) The current buffer ends with a partial match of the pattern. We will
156 * note this for use in the next iteration, where we will check for the rest
160 /* (1) previous partial match */
161 if (previous_partial_synch_match &&
162 _is_id3v2_second_synch_byte(buffer[0]))
165 if (previous_partial_match >= 0 &&
166 previous_partial_match < buffer_size) {
167 const int pat_offset = buffer_size - previous_partial_match;
169 if (memcmp(buffer, pattern + pat_offset, 3 - pat_offset) == 0)
170 return buffer_offset - buffer_size + previous_partial_match;
173 /* (2) pattern contained in current buffer */
175 while ((p = memchr(p, 'I', buffer_size - (p - buffer)))) {
176 if (buffer_size - (p - buffer) < 3)
179 if (memcmp(p, pattern, 3) == 0)
180 return buffer_offset + (p - buffer);
185 p = memchr(buffer, 255, buffer_size);
187 first_synch_byte = p - buffer;
189 first_synch_byte = -1;
191 /* Here we have to loop because there could be several of the first
192 * (11111111) byte, and we want to check all such instances until we
193 * find a full match (11111111 111) or hit the end of the buffer.
195 while (first_synch_byte >= 0) {
196 /* if this *is not* at the end of the buffer */
197 if (first_synch_byte < buffer_size - 1) {
198 if(_is_id3v2_second_synch_byte(buffer[first_synch_byte + 1]))
199 /* We've found the frame synch pattern. */
202 /* We found 11111111 at the end of the current buffer
203 * indicating a partial match of the synch pattern.
204 * The find() below should return -1 and break out of
207 previous_partial_synch_match = 1;
210 /* Check in the rest of the buffer. */
211 p = memchr(p + 1, 255, buffer_size - (p + 1 - buffer));
213 first_synch_byte = p - buffer;
215 first_synch_byte = -1;
218 /* (3) partial match */
219 if (buffer[nread - 1] == pattern[1])
220 previous_partial_match = nread - 1;
221 else if (memcmp(&buffer[nread - 2], pattern, 2) == 0)
222 previous_partial_match = nread - 2;
223 buffer_offset += buffer_size;
225 if ((nread = read(fd, &buffer, sizeof(buffer))) == -1)
233 _get_id3v2_frame_header_size(unsigned int version)
248 _parse_id3v2_frame_header(char *data, unsigned int version, struct id3v2_frame_header *fh)
254 memcpy(fh->frame_id, data, 3);
256 fh->frame_size = _to_uint(data + 3, 3);
258 fh->data_length_indicator = 0;
261 memcpy(fh->frame_id, data, 4);
262 fh->frame_size = _to_uint(data + 4, 4);
263 fh->compression = data[9] & 0x40;
264 fh->data_length_indicator = 0;
268 memcpy(fh->frame_id, data, 4);
269 fh->frame_size = _to_uint(data + 4, 4);
270 fh->compression = data[9] & 0x4;
271 fh->data_length_indicator = data[9] & 0x1;
277 _get_id3v2_frame_info(const char *frame_data, unsigned int frame_size, struct lms_string_size *s, lms_charset_conv_t *cs_conv)
279 if (frame_size > s->len)
280 s->str = realloc(s->str, sizeof(char) * (frame_size + 1));
281 memcpy(s->str, frame_data, frame_size);
282 s->str[frame_size] = '\0';
285 lms_charset_conv(cs_conv, &s->str, &s->len);
289 _get_id3v1_genre(unsigned int genre, struct lms_string_size *out)
291 if (genre < ID3V1_NUM_GENRES) {
292 unsigned int size, base;
294 base = id3v1_genres_offsets[genre];
295 size = id3v1_genres_offsets[genre + 1] - base;
296 out->str = malloc(size);
298 memcpy(out->str, id3v1_genres_mem + base, size);
306 _parse_id3v1_genre(const char *str_genre, struct lms_string_size *out)
308 return _get_id3v1_genre(atoi(str_genre), out);
312 _get_id3v2_genre(const char *frame_data, unsigned int frame_size, struct lms_string_size *out, lms_charset_conv_t *cs_conv)
315 struct lms_string_size genre = {0};
317 _get_id3v2_frame_info(frame_data, frame_size, &genre, cs_conv);
325 is_number = (genre.len != 0 && genre.str[0] != '(');
327 for (i = 0; i < genre.len; ++i) {
328 if (!isdigit(genre.str[i])) {
335 if (is_number && _parse_id3v1_genre(genre.str, out) == 0) {
336 /* id3v1 genre found */
341 /* ID3v2.3 "content type" can contain a ID3v1 genre number in parenthesis at
342 * the beginning of the field. If this is all that the field contains, do a
343 * translation from that number to the name and return that. If there is a
344 * string folloing the ID3v1 genre number, that is considered to be
345 * authoritative and we return that instead. Or finally, the field may
346 * simply be free text, in which case we just return the value. */
348 if (genre.len > 1 && genre.str[0] == '(') {
349 char *closing = NULL;
351 if (genre.str[genre.len - 1] == ')') {
352 closing = strchr(genre.str, ')');
353 if (closing == genre.str + genre.len - 1) {
354 /* ) is the last character and only appears once in the
355 * string get the id3v1 genre enclosed by parentheses
357 if (_parse_id3v1_genre(genre.str + 1, out) == 0) {
364 /* get the string followed by the id3v1 genre */
366 closing = strchr(genre.str, ')');
369 out->str = strdup(closing + 1);
370 out->len = genre.len - (closing + 1 - genre.str);
381 _parse_id3v2_frame(struct id3v2_frame_header *fh, const char *frame_data, struct id3_info *info, lms_charset_conv_t **cs_convs)
383 lms_charset_conv_t *cs_conv = NULL;
384 unsigned int text_encoding, frame_size;
385 static const int artist_priorities[] = { 3, 4, 2, 1 };
388 fprintf(stderr, "frame id = %.4s frame size = %d text encoding = %d\n",
389 fh->frame_id, fh->frame_size, frame_data[0]);
398 text_encoding = frame_data[0];
400 /* skip first byte - text encoding */
402 frame_size = fh->frame_size - 1;
404 if (text_encoding >= 0 && text_encoding < ID3_NUM_ENCODINGS) {
405 if (text_encoding == ID3_ENCODING_UTF16) {
406 if (memcmp(frame_data, "\xfe\xff", 2) == 0)
407 text_encoding = ID3_ENCODING_UTF16BE;
409 text_encoding = ID3_ENCODING_UTF16LE;
413 cs_conv = cs_convs[text_encoding];
416 /* ID3v2.2 used 3 bytes for the frame id, so let's check it */
417 if (memcmp(fh->frame_id, "TIT2", 4) == 0 ||
418 memcmp(fh->frame_id, "TT2", 3) == 0)
419 _get_id3v2_frame_info(frame_data, frame_size, &info->title, cs_conv);
420 else if (memcmp(fh->frame_id, "TP", 2) == 0) {
423 if (memcmp(fh->frame_id, "TPE", 3) == 0) {
424 /* this check shouldn't be needed, but let's make sure */
425 if (fh->frame_id[3] >= '1' && fh->frame_id[3] <= '4')
426 index = fh->frame_id[3] - '1';
429 /* ignore TPA, TPB */
430 if (fh->frame_id[2] >= '1' && fh->frame_id[2] <= '4')
431 index = fh->frame_id[2] - '1';
435 artist_priorities[index] > info->cur_artist_priority) {
436 struct lms_string_size artist = {0};
438 _get_id3v2_frame_info(frame_data, frame_size, &artist, cs_conv);
439 lms_string_size_strip_and_free(&artist);
441 if (info->artist.str)
442 free(info->artist.str);
443 info->artist = artist;
444 info->cur_artist_priority = artist_priorities[index];
449 else if (memcmp(fh->frame_id, "TAL", 3) == 0)
450 _get_id3v2_frame_info(frame_data, frame_size, &info->album, cs_conv);
452 else if (memcmp(fh->frame_id, "TCO", 3) == 0)
453 _get_id3v2_genre(frame_data, frame_size, &info->genre, cs_conv);
454 else if (memcmp(fh->frame_id, "TRCK", 4) == 0 ||
455 memcmp(fh->frame_id, "TRK", 3) == 0) {
456 struct lms_string_size trackno = {0};
457 _get_id3v2_frame_info(frame_data, frame_size, &trackno, cs_conv);
458 info->trackno = atoi(trackno.str);
464 _parse_id3v2(int fd, long id3v2_offset, struct id3_info *info, lms_charset_conv_t **cs_convs)
466 char header_data[10], frame_header_data[10];
467 unsigned int tag_size, major_version, frame_data_pos, frame_data_length, frame_header_size;
468 int extended_header, footer_present;
469 struct id3v2_frame_header fh;
472 lseek(fd, id3v2_offset, SEEK_SET);
475 if (read(fd, header_data, ID3V2_HEADER_SIZE) != ID3V2_HEADER_SIZE)
478 tag_size = _to_uint(header_data + 6, 4);
483 major_version = header_data[3];
486 frame_data_length = tag_size;
488 /* check for extended header */
489 extended_header = header_data[5] & 0x20; /* bit 6 */
490 if (extended_header) {
491 /* skip extended header */
492 unsigned int extended_header_size;
493 char extended_header_data[4];
495 if (read(fd, extended_header_data, 4) != 4)
497 extended_header_size = _to_uint(extended_header_data, 4);
498 lseek(fd, extended_header_size - 4, SEEK_CUR);
499 frame_data_pos += extended_header_size;
500 frame_data_length -= extended_header_size;
503 footer_present = header_data[5] & 0x8; /* bit 4 */
504 if (footer_present && frame_data_length > ID3V2_FOOTER_SIZE)
505 frame_data_length -= ID3V2_FOOTER_SIZE;
507 frame_header_size = _get_id3v2_frame_header_size(major_version);
508 while (frame_data_pos < frame_data_length - frame_header_size) {
509 nread = read(fd, frame_header_data, frame_header_size);
513 if (nread != frame_header_size)
516 if (frame_header_data[0] == 0)
519 _parse_id3v2_frame_header(frame_header_data, major_version, &fh);
523 if (!fh.compression &&
524 fh.frame_id[0] == 'T' &&
525 memcmp(fh.frame_id, "TXXX", 4) != 0) {
528 if (fh.data_length_indicator)
529 lseek(fd, 4, SEEK_CUR);
531 frame_data = malloc(sizeof(char) * fh.frame_size);
532 if (read(fd, frame_data, fh.frame_size) != fh.frame_size) {
537 _parse_id3v2_frame(&fh, frame_data, info, cs_convs);
541 if (fh.data_length_indicator)
542 lseek(fd, fh.frame_size + 4, SEEK_CUR);
544 lseek(fd, fh.frame_size, SEEK_CUR);
547 frame_data_pos += fh.frame_size + frame_header_size;
554 _parse_id3v1(int fd, struct id3_info *info, lms_charset_conv_t *cs_conv)
556 struct id3v1_tag tag;
557 if (read(fd, &tag, sizeof(struct id3v1_tag)) == -1)
560 info->title.str = strndup(tag.title, 30);
561 info->title.len = strlen(info->title.str);
562 lms_charset_conv(cs_conv, &info->title.str, &info->title.len);
563 info->artist.str = strndup(tag.artist, 30);
564 info->artist.len = strlen(info->artist.str);
565 lms_charset_conv(cs_conv, &info->artist.str, &info->artist.len);
566 info->album.str = strndup(tag.album, 30);
567 info->album.len = strlen(info->album.str);
568 lms_charset_conv(cs_conv, &info->album.str, &info->album.len);
569 _get_id3v1_genre(tag.genre, &info->genre);
570 if (tag.comments[28] == 0 && tag.comments[29] != 0)
571 info->trackno = (unsigned char) tag.comments[29];
577 _match(struct plugin *p, const char *path, int len, int base)
581 i = lms_which_extension(path, len, _exts, LMS_ARRAY_SIZE(_exts));
585 return (void*)(i + 1);
589 _parse(struct plugin *plugin, struct lms_context *ctxt, const struct lms_file_info *finfo, void *match)
591 struct id3_info info = {{0}, {0}, {0}, {0}, 0, -1};
592 struct lms_audio_info audio_info = {0, {0}, {0}, {0}, {0}, 0, 0, 0};
596 fd = open(finfo->path, O_RDONLY);
602 id3v2_offset = _find_id3v2(fd);
603 if (id3v2_offset >= 0) {
605 fprintf(stderr, "id3v2 tag found in file %s with offset %ld\n",
606 finfo->path, id3v2_offset);
608 if (_parse_id3v2(fd, id3v2_offset, &info, plugin->cs_convs) != 0) {
616 fprintf(stderr, "id3v2 tag not found in file %s. trying id3v1\n",
619 /* check for id3v1 tag */
620 if (lseek(fd, -128, SEEK_END) == -1) {
625 if (read(fd, &tag, 3) == -1) {
630 if (memcmp(tag, "TAG", 3) == 0) {
632 fprintf(stderr, "id3v1 tag found in file %s\n", finfo->path);
634 if (_parse_id3v1(fd, &info, ctxt->cs_conv) != 0) {
641 lms_string_size_strip_and_free(&info.title);
642 lms_string_size_strip_and_free(&info.artist);
643 lms_string_size_strip_and_free(&info.album);
644 lms_string_size_strip_and_free(&info.genre);
646 if (!info.title.str) {
648 ext_idx = ((int)match) - 1;
649 info.title.len = finfo->path_len - finfo->base - _exts[ext_idx].len;
650 info.title.str = malloc((info.title.len + 1) * sizeof(char));
651 memcpy(info.title.str, finfo->path + finfo->base, info.title.len);
652 info.title.str[info.title.len] = '\0';
653 lms_charset_conv(ctxt->cs_conv, &info.title.str, &info.title.len);
657 fprintf(stderr, "file %s info\n", finfo->path);
658 fprintf(stderr, "\ttitle='%s'\n", info.title.str);
659 fprintf(stderr, "\tartist='%s'\n", info.artist.str);
660 fprintf(stderr, "\talbum='%s'\n", info.album.str);
661 fprintf(stderr, "\tgenre='%s'\n", info.genre.str);
662 fprintf(stderr, "\ttrack number='%d'\n", info.trackno);
665 audio_info.id = finfo->id;
666 audio_info.title = info.title;
667 audio_info.artist = info.artist;
668 audio_info.album = info.album;
669 audio_info.genre = info.genre;
670 audio_info.trackno = info.trackno;
671 r = lms_db_audio_add(plugin->audio_db, &audio_info);
674 posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
678 free(info.title.str);
680 free(info.artist.str);
682 free(info.album.str);
684 free(info.genre.str);
690 _setup(struct plugin *plugin, struct lms_context *ctxt)
693 const char *id3_encodings[ID3_NUM_ENCODINGS] = {
701 plugin->audio_db = lms_db_audio_new(ctxt->db);
702 if (!plugin->audio_db)
705 for (i = 0; i < ID3_NUM_ENCODINGS; ++i) {
706 /* do not create charset conv for UTF-8 encoding */
707 if (!id3_encodings[i]) {
708 plugin->cs_convs[i] = NULL;
711 plugin->cs_convs[i] = lms_charset_conv_new_full(0, 0);
712 if (!plugin->cs_convs[i])
714 lms_charset_conv_add(plugin->cs_convs[i], id3_encodings[i]);
721 _start(struct plugin *plugin, struct lms_context *ctxt)
723 return lms_db_audio_start(plugin->audio_db);
727 _finish(struct plugin *plugin, struct lms_context *ctxt)
731 if (plugin->audio_db)
732 lms_db_audio_free(plugin->audio_db);
734 for (i = 0; i < ID3_NUM_ENCODINGS; ++i) {
735 if (plugin->cs_convs[i])
736 lms_charset_conv_free(plugin->cs_convs[i]);
743 _close(struct plugin *plugin)
749 API struct lms_plugin *
750 lms_plugin_open(void)
752 struct plugin *plugin;
754 plugin = (struct plugin *)malloc(sizeof(*plugin));
755 plugin->plugin.name = _name;
756 plugin->plugin.match = (lms_plugin_match_fn_t)_match;
757 plugin->plugin.parse = (lms_plugin_parse_fn_t)_parse;
758 plugin->plugin.close = (lms_plugin_close_fn_t)_close;
759 plugin->plugin.setup = (lms_plugin_setup_fn_t)_setup;
760 plugin->plugin.start = (lms_plugin_start_fn_t)_start;
761 plugin->plugin.finish = (lms_plugin_finish_fn_t)_finish;
763 return (struct lms_plugin *)plugin;