src/plugins/id3/id3.c

   1 /**
   2  * Copyright (C) 2008-2011 by ProFUSION embedded systems
   3  * Copyright (C) 2008 by INdT
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public License
   7  * as published by the Free Software Foundation; either version 2.1 of
   8  * the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
  18  * 02110-1301 USA
  19  *
  20  * @author Andre Moreira Magalhaes <andre.magalhaes@openbossa.org>
  21  * @author Gustavo Sverzut Barbieri <barbieri@profusion.mobi>
  22  */
  23
  24 /**
  25  * @brief
  26  *
  27  * id3 file parser.
  28  *
  29  * Reference:
  30  *   http://www.mp3-tech.org/programmer/frame_header.html
  31  *   http://www.mpgedit.org/mpgedit/mpeg_format/MP3Format.html
  32  */
  33
  34 #include <lightmediascanner_plugin.h>
  35 #include <lightmediascanner_db.h>
  36 #include <lightmediascanner_charset_conv.h>
  37 #include <shared/util.h>
  38
  39 #include <sys/types.h>
  40 #include <sys/stat.h>
  41 #include <fcntl.h>
  42 #include <stdio.h>
  43 #include <stdlib.h>
  44 #include <string.h>
  45 #include <unistd.h>
  46 #include <ctype.h>
  47
  48 #define ID3V2_HEADER_SIZE       10
  49 #define ID3V2_FOOTER_SIZE       10
  50
  51 /* We parse only the first 4 bytes, which are the interesting ones */
  52 #define MPEG_HEADER_SIZE 4
  53
  54 enum mpeg_audio_version {
  55     MPEG_AUDIO_VERSION_1,
  56     MPEG_AUDIO_VERSION_2,
  57     MPEG_AUDIO_VERSION_2_5,
  58     MPEG_AUDIO_VERSION_4,
  59 };
  60
  61 enum mpeg_audio_layer {
  62     MPEG_AUDIO_LAYER_1,
  63     MPEG_AUDIO_LAYER_2,
  64     MPEG_AUDIO_LAYER_3,
  65     MPEG_AUDIO_LAYER_AAC,
  66 };
  67
  68 struct mpeg_header {
  69     enum mpeg_audio_version version;
  70     enum mpeg_audio_layer layer;
  71
  72     uint8_t channels;
  73     uint8_t sampling_rate_idx;
  74     uint8_t codec_idx;
  75 };
  76
  77 static const struct lms_string_size _codecs[] = {
  78     /* mp3 */
  79     [0] = LMS_STATIC_STRING_SIZE("mpeg1layer1"),
  80     [1] = LMS_STATIC_STRING_SIZE("mpeg1layer2"),
  81     [2] = LMS_STATIC_STRING_SIZE("mpeg1layer3"),
  82     [3] = LMS_STATIC_STRING_SIZE("mpeg2layer1"),
  83     [4] = LMS_STATIC_STRING_SIZE("mpeg2layer2"),
  84     [5] = LMS_STATIC_STRING_SIZE("mpeg2layer3"),
  85     [6] = LMS_STATIC_STRING_SIZE("mpeg2.5layer1"),
  86     [7] = LMS_STATIC_STRING_SIZE("mpeg2.5layer2"),
  87     [8] = LMS_STATIC_STRING_SIZE("mpeg2.5layer3"),
  88
  89     /* aac */
  90 #define MPEG_CODEC_AAC_START 9
  91     [9] = LMS_STATIC_STRING_SIZE("mpeg2aac-main"),
  92     [10] = LMS_STATIC_STRING_SIZE("mpeg2aac-lc"),
  93     [11] = LMS_STATIC_STRING_SIZE("mpeg2aac-ssr"),
  94     [12] = LMS_STATIC_STRING_SIZE("mpeg2aac-ltp"),
  95
  96     [13] = LMS_STATIC_STRING_SIZE("mpeg4aac-main"),
  97     [14] = LMS_STATIC_STRING_SIZE("mpeg4aac-lc"),
  98     [15] = LMS_STATIC_STRING_SIZE("mpeg4aac-ssr"),
  99     [16] = LMS_STATIC_STRING_SIZE("mpeg4aac-ltp"),
 100     { }
 101 };
 102
 103 /* Ordered according to AAC index, take care with mp3 */
 104 static int _sample_rates[16] = {
 105     96000, 88200, 64000,
 106
 107     /* Frequencies available on mp3, */
 108     48000, 44100, 32000,
 109     24000, 22050, 16000,
 110     12000, 11025, 8000,
 111
 112     7350, /* reserved, zeroed */
 113 };
 114
 115 enum ID3Encodings {
 116     ID3_ENCODING_LATIN1 = 0,
 117     ID3_ENCODING_UTF16,
 118     ID3_ENCODING_UTF16BE,
 119     ID3_ENCODING_UTF8,
 120     ID3_ENCODING_UTF16LE,
 121     ID3_ENCODING_LAST
 122 };
 123 #define ID3_NUM_ENCODINGS ID3_ENCODING_LAST
 124
 125
 126 #include "id3v1_genres.c"
 127
 128 struct id3_info {
 129     struct lms_string_size title;
 130     struct lms_string_size artist;
 131     struct lms_string_size album;
 132     struct lms_string_size genre;
 133     int trackno;
 134     int cur_artist_priority;
 135 };
 136
 137 struct id3v2_frame_header {
 138     char frame_id[4];
 139     unsigned int frame_size;
 140     int compression;
 141     int data_length_indicator;
 142 };
 143
 144 struct id3v1_tag {
 145     char title[30];
 146     char artist[30];
 147     char album[30];
 148     char year[4];
 149     char comments[30];
 150     char genre;
 151 } __attribute__((packed));
 152
 153 struct plugin {
 154     struct lms_plugin plugin;
 155     lms_db_audio_t *audio_db;
 156     lms_charset_conv_t *cs_convs[ID3_NUM_ENCODINGS];
 157 };
 158
 159 static const char _name[] = "id3";
 160 static const struct lms_string_size _exts[] = {
 161     LMS_STATIC_STRING_SIZE(".mp3"),
 162     LMS_STATIC_STRING_SIZE(".aac"),
 163     LMS_STATIC_STRING_SIZE(".adts"),
 164 };
 165 static const char *_cats[] = {
 166     "multimedia",
 167     "audio",
 168     NULL
 169 };
 170 static const char *_authors[] = {
 171     "Andre Moreira Magalhaes",
 172     "Gustavo Sverzut Barbieri",
 173     NULL
 174 };
 175
 176 static unsigned int
 177 _to_uint(const char *data, int data_size)
 178 {
 179     unsigned int sum = 0;
 180     unsigned int last, i;
 181
 182     last = data_size > 4 ? 3 : data_size - 1;
 183
 184     for (i = 0; i <= last; i++)
 185         sum |= ((unsigned char) data[i]) << ((last - i) * 8);
 186
 187     return sum;
 188 }
 189
 190 static inline int
 191 _is_id3v2_second_synch_byte(unsigned char byte)
 192 {
 193     if (byte == 0xff)
 194         return 0;
 195     if ((byte & 0xE0) == 0xE0)
 196         return 1;
 197     return 0;
 198 }
 199
 200 static inline int
 201 _fill_mp3_header(struct mpeg_header *hdr, const uint8_t b[4])
 202 {
 203     hdr->sampling_rate_idx = (b[2] & 0x0C) >> 2;
 204     if (hdr->sampling_rate_idx == 0x3)
 205         return -1;
 206     /*
 207      * Sampling rate frequency index
 208      * bits     MPEG1           MPEG2           MPEG2.5
 209      * 00       44100 Hz        22050 Hz        11025 Hz
 210      * 01       48000 Hz        24000 Hz        12000 Hz
 211      * 10       32000 Hz        16000 Hz        8000 Hz
 212      * 11       reserv.         reserv.         reserv.
 213      */
 214
 215     /* swap 0x1 and 0x0 */
 216     if (hdr->sampling_rate_idx < 0x2)
 217         hdr->sampling_rate_idx = !hdr->sampling_rate_idx;
 218     hdr->sampling_rate_idx += 3 * hdr->version + 3;
 219
 220     hdr->codec_idx = hdr->version * 3 + hdr->layer;
 221
 222     hdr->channels = (b[3] & 0xC0) >> 6;
 223     hdr->channels = hdr->channels == 0x3 ? 1 : 2;
 224     return 0;
 225 }
 226
 227 static inline int
 228 _fill_aac_header(struct mpeg_header *hdr, const uint8_t b[4])
 229 {
 230     unsigned int profile;
 231
 232     hdr->sampling_rate_idx = (b[2] & 0x3C) >> 2;
 233
 234     profile = (b[2] & 0xC0) >> 6;
 235     hdr->codec_idx = MPEG_CODEC_AAC_START + profile;
 236     if (hdr->version == MPEG_AUDIO_VERSION_4)
 237         hdr->codec_idx += 4;
 238
 239     hdr->channels = ((b[2] & 0x1) << 2) | ((b[3] & 0xC0) >> 6);
 240     return 0;
 241 }
 242
 243 static inline int
 244 _fill_mpeg_header(struct mpeg_header *hdr, const uint8_t b[4])
 245 {
 246     unsigned int version = (b[1] & 0x18) >>  3;
 247     unsigned int layer = (b[1] & 0x06) >> 1;
 248
 249     switch (layer) {
 250     case 0x0:
 251         if (version == 0x2 || version == 0x3)
 252             hdr->layer = MPEG_AUDIO_LAYER_AAC;
 253         else
 254             return -1;
 255         break;
 256     case 0x1:
 257         hdr->layer = MPEG_AUDIO_LAYER_3;
 258         break;
 259     case 0x2:
 260         hdr->layer = MPEG_AUDIO_LAYER_2;
 261         break;
 262     case 0x3:
 263         hdr->layer = MPEG_AUDIO_LAYER_1;
 264         break;
 265     }
 266
 267     switch (version) {
 268     case 0x0:
 269         hdr->version = MPEG_AUDIO_VERSION_2_5;
 270         break;
 271     case 0x1:
 272         return -1;
 273     case 0x2:
 274         if (layer == 0x0)
 275             hdr->version = MPEG_AUDIO_VERSION_4;
 276         else
 277             hdr->version = MPEG_AUDIO_VERSION_2;
 278         break;
 279     case 0x3:
 280         if (layer == 0x0)
 281             hdr->version = MPEG_AUDIO_VERSION_2;
 282         else
 283             hdr->version = MPEG_AUDIO_VERSION_1;
 284     }
 285
 286     if (hdr->layer == MPEG_AUDIO_LAYER_AAC)
 287         return _fill_aac_header(hdr, b);
 288     else
 289         return _fill_mp3_header(hdr, b);
 290
 291     return 0;
 292 }
 293
 294 static int
 295 _parse_mpeg_header(int fd, off_t off, struct lms_audio_info *audio_info)
 296 {
 297     uint8_t buffer[32];
 298     const uint8_t *p, *p_end;
 299     unsigned int prev_read;
 300     struct mpeg_header hdr;
 301
 302     lseek(fd, off, SEEK_SET);
 303
 304     /* Find sync word */
 305     prev_read = 0;
 306     do {
 307         int nread = read(fd, buffer + prev_read, sizeof(buffer) - prev_read);
 308         if (nread < MPEG_HEADER_SIZE)
 309             return -1;
 310
 311         p = buffer;
 312         p_end = buffer + nread;
 313         while (p < p_end && (p = memchr(p, 0xFF, p_end - p))) {
 314             /* poor man's ring buffer since the needle is small (4 bytes) */
 315             if (p > p_end - MPEG_HEADER_SIZE) {
 316                 memcpy(buffer, p, p_end - p);
 317                 break;
 318             }
 319
 320             if (_is_id3v2_second_synch_byte(*(p + 1)))
 321                 goto found;
 322
 323             p++;
 324         }
 325         prev_read = p ? p_end - p : 0;
 326     } while(1);
 327
 328 found:
 329     if (_fill_mpeg_header(&hdr, p) < 0) {
 330         fprintf(stderr, "Invalid field in file, ignoring.\n");
 331         return 0;
 332     }
 333
 334     audio_info->codec = _codecs[hdr.codec_idx];
 335     audio_info->sampling_rate = _sample_rates[hdr.sampling_rate_idx];
 336     audio_info->channels = hdr.channels;
 337
 338     return 0;
 339 }
 340
 341 /* Returns the offset in fd to the position after the ID3 tag, iff it occurs
 342  * *before* a sync word. Otherwise < 0 is returned and if we gave up looking
 343  * after ID3 because of a sync value, @syncframe_offset is set to its
 344  * correspondent offset */
 345 static long
 346 _find_id3v2(int fd, off_t *syncframe_offset)
 347 {
 348     static const char pattern[3] = "ID3";
 349     char buffer[3];
 350     unsigned int prev_part_match, prev_part_match_sync = 0;
 351     long buffer_offset;
 352
 353     if (read(fd, buffer, sizeof(buffer)) != sizeof(buffer))
 354         return -1;
 355
 356     if (memcmp(buffer, pattern, sizeof(pattern)) == 0)
 357         return 0;
 358
 359     /* This loop is the crux of the find method.  There are three cases that we
 360      * want to account for:
 361      * (1) The previously searched buffer contained a partial match of the
 362      * search pattern and we want to see if the next one starts with the
 363      * remainder of that pattern.
 364      *
 365      * (2) The search pattern is wholly contained within the current buffer.
 366      *
 367      * (3) The current buffer ends with a partial match of the pattern.  We will
 368      * note this for use in the next iteration, where we will check for the rest
 369      * of the pattern.
 370      */
 371     buffer_offset = 0;
 372     prev_part_match_sync = 0;
 373     prev_part_match = 0;
 374     while (1) {
 375         const char *p, *p_end;
 376
 377         /* (1) previous partial match */
 378         if (prev_part_match_sync) {
 379             if (_is_id3v2_second_synch_byte(buffer[0])) {
 380                 *syncframe_offset = buffer_offset - 1;
 381                 return -1;
 382             }
 383             prev_part_match_sync = 0;
 384         }
 385
 386         if (prev_part_match) {
 387             const int size = sizeof(buffer) - prev_part_match;
 388             const char *part_pattern = pattern + prev_part_match;
 389
 390             if (memcmp(buffer, part_pattern, size) == 0)
 391                 return buffer_offset - prev_part_match;
 392
 393             prev_part_match = 0;
 394         }
 395
 396         p_end = buffer + sizeof(buffer);
 397         for (p = buffer; p < p_end; p++) {
 398             if (*p == pattern[0]) {
 399                 /* Try to match pattern, possible partial contents */
 400                 const char *q;
 401                 int todo;
 402
 403                 q = p + 1;
 404                 todo = p_end - q;
 405                 if (todo == 0 || memcmp(q, pattern + 1, todo) == 0) {
 406                     todo++;
 407                     if (todo == sizeof(buffer))
 408                         /* (2) pattern contained in current buffer */
 409                         return buffer_offset;
 410
 411                     /* (3) partial match */
 412                     prev_part_match = todo;
 413                     break;
 414                 }
 415             } else if ((unsigned char)*p == 0xff) {
 416                 /* Try to match synch pattern, possible partial contents */
 417                 const char *q;
 418
 419                 q = p + 1;
 420                 if (q < p_end) {
 421                     if (_is_id3v2_second_synch_byte(*q)) {
 422                         /* (2) synch pattern contained in current buffer */
 423                         *syncframe_offset = buffer_offset + (p - buffer);
 424                         return -1;
 425                     }
 426                 } else
 427                     /* (3) partial match */
 428                     prev_part_match_sync = 1;
 429             }
 430         }
 431
 432         if (read(fd, buffer, sizeof(buffer)) != sizeof(buffer))
 433             return -1;
 434         buffer_offset += sizeof(buffer);
 435     }
 436
 437     return -1;
 438 }
 439
 440 static unsigned int
 441 _get_id3v2_frame_header_size(unsigned int version)
 442 {
 443     switch (version) {
 444     case 0:
 445     case 1:
 446     case 2:
 447         return 6;
 448     case 3:
 449     case 4:
 450     default:
 451         return 10;
 452     }
 453 }
 454
 455 static void
 456 _parse_id3v2_frame_header(char *data, unsigned int version, struct id3v2_frame_header *fh)
 457 {
 458     switch (version) {
 459     case 0:
 460     case 1:
 461     case 2:
 462         memcpy(fh->frame_id, data, 3);
 463         fh->frame_id[3] = 0;
 464         fh->frame_size = _to_uint(data + 3, 3);
 465         fh->compression = 0;
 466         fh->data_length_indicator = 0;
 467         break;
 468     case 3:
 469         memcpy(fh->frame_id, data, 4);
 470         fh->frame_size = _to_uint(data + 4, 4);
 471         fh->compression = data[9] & 0x40;
 472         fh->data_length_indicator = 0;
 473         break;
 474     case 4:
 475     default:
 476         memcpy(fh->frame_id, data, 4);
 477         fh->frame_size = _to_uint(data + 4, 4);
 478         fh->compression = data[9] & 0x4;
 479         fh->data_length_indicator = data[9] & 0x1;
 480         break;
 481     }
 482 }
 483
 484 static inline void
 485 _get_id3v2_frame_info(const char *frame_data, unsigned int frame_size, struct lms_string_size *s, lms_charset_conv_t *cs_conv, int strip)
 486 {
 487     if (frame_size == 0)
 488         return;
 489     if (frame_size > s->len) {
 490         char *tmp;
 491
 492         tmp = realloc(s->str, sizeof(char) * (frame_size + 1));
 493         if (!tmp)
 494             return;
 495         s->str = tmp;
 496     }
 497     memcpy(s->str, frame_data, frame_size);
 498     s->str[frame_size] = '\0';
 499     s->len = frame_size;
 500     if (cs_conv)
 501         lms_charset_conv(cs_conv, &s->str, &s->len);
 502     if (strip)
 503         lms_string_size_strip_and_free(s);
 504 }
 505
 506 static int
 507 _get_id3v2_artist(unsigned int index, const char *frame_data, unsigned int frame_size, struct id3_info *info, lms_charset_conv_t *cs_conv)
 508 {
 509     static const unsigned char artist_priorities[] = {3, 4, 2, 1};
 510     const unsigned int index_max = sizeof(artist_priorities) / sizeof(*artist_priorities);
 511
 512     if (index >= index_max)
 513         return 1;
 514
 515     if (artist_priorities[index] > info->cur_artist_priority) {
 516         struct lms_string_size artist = { };
 517
 518         _get_id3v2_frame_info(frame_data, frame_size, &artist, cs_conv, 1);
 519         if (artist.str) {
 520             free(info->artist.str);
 521             info->artist = artist;
 522             info->cur_artist_priority = artist_priorities[index];
 523         }
 524     }
 525     return 0;
 526 }
 527
 528 static int
 529 _get_id3v1_genre(unsigned int genre, struct lms_string_size *out)
 530 {
 531     if (genre < ID3V1_NUM_GENRES) {
 532         unsigned int size, base, len;
 533
 534         base = id3v1_genres_offsets[genre];
 535         size = id3v1_genres_offsets[genre + 1] - base;
 536         len = size - 1;
 537
 538         if (len > out->len) {
 539             char *p = realloc(out->str, size);
 540             if (!p)
 541                 return -2;
 542             out->str = p;
 543         }
 544
 545         out->len = len;
 546         memcpy(out->str, id3v1_genres_mem + base, size);
 547
 548         return 0;
 549     }
 550     return -1;
 551 }
 552
 553 static inline int
 554 _parse_id3v1_genre(const char *str_genre, struct lms_string_size *out)
 555 {
 556     return _get_id3v1_genre(atoi(str_genre), out);
 557 }
 558
 559 static void
 560 _get_id3v2_genre(const char *frame_data, unsigned int frame_size, struct lms_string_size *out, lms_charset_conv_t *cs_conv)
 561 {
 562     unsigned int i, is_number;
 563     struct lms_string_size genre = { };
 564
 565     _get_id3v2_frame_info(frame_data, frame_size, &genre, cs_conv, 1);
 566     if (!genre.str)
 567         return;
 568
 569     is_number = (genre.len != 0 && genre.str[0] != '(');
 570     if (is_number) {
 571         for (i = 0; i < genre.len; ++i) {
 572             if (!isdigit(genre.str[i])) {
 573                 is_number = 0;
 574                 break;
 575             }
 576         }
 577     }
 578
 579     if (is_number && _parse_id3v1_genre(genre.str, out) == 0) {
 580         /* id3v1 genre found */
 581         free(genre.str);
 582         return;
 583     }
 584
 585     /* ID3v2.3 "content type" can contain a ID3v1 genre number in parenthesis at
 586      * the beginning of the field. If this is all that the field contains, do a
 587      * translation from that number to the name and return that.  If there is a
 588      * string folloing the ID3v1 genre number, that is considered to be
 589      * authoritative and we return that instead. Or finally, the field may
 590      * simply be free text, in which case we just return the value. */
 591
 592     if (genre.len > 1 && genre.str[0] == '(') {
 593         char *closing = NULL;
 594
 595         if (genre.str[genre.len - 1] == ')') {
 596             closing = strchr(genre.str, ')');
 597             if (closing == genre.str + genre.len - 1) {
 598                 /* ) is the last character and only appears once in the
 599                  * string get the id3v1 genre enclosed by parentheses
 600                  */
 601                 if (_parse_id3v1_genre(genre.str + 1, out) == 0) {
 602                     free(genre.str);
 603                     return;
 604                 }
 605             }
 606         }
 607
 608         /* get the string followed by the id3v1 genre */
 609         if (!closing)
 610             closing = strchr(genre.str, ')');
 611
 612         if (closing) {
 613             closing++;
 614             out->len = genre.len - (closing - genre.str);
 615             out->str = genre.str;
 616             memmove(out->str, closing, out->len + 1); /* includes '\0' */
 617             lms_string_size_strip_and_free(out);
 618             return;
 619         }
 620     }
 621
 622     /* pure text */
 623     *out = genre;
 624 }
 625
 626 static void
 627 _get_id3v2_trackno(const char *frame_data, unsigned int frame_size, struct id3_info *info, lms_charset_conv_t *cs_conv)
 628 {
 629     struct lms_string_size trackno = { };
 630
 631     _get_id3v2_frame_info(frame_data, frame_size, &trackno, cs_conv, 0);
 632     if (!trackno.str)
 633         return;
 634     info->trackno = atoi(trackno.str);
 635     free(trackno.str);
 636 }
 637
 638 static void
 639 _parse_id3v2_frame(struct id3v2_frame_header *fh, const char *frame_data, struct id3_info *info, lms_charset_conv_t **cs_convs)
 640 {
 641     lms_charset_conv_t *cs_conv;
 642     unsigned int text_encoding, frame_size;
 643     const char *fid;
 644
 645     /* ignore frames which contains just the encoding */
 646     if (fh->frame_size <= 1)
 647         return;
 648
 649 #if 0
 650     fprintf(stderr, "frame id = %.4s frame size = %d text encoding = %d\n",
 651             fh->frame_id, fh->frame_size, frame_data[0]);
 652 #endif
 653
 654     /* All used frames start with 'T' */
 655     fid = fh->frame_id;
 656     if (fid[0] != 'T')
 657         return;
 658
 659     /* Latin1  = 0
 660      * UTF16   = 1
 661      * UTF16BE = 2
 662      * UTF8    = 3
 663      * UTF16LE = 4
 664      */
 665     text_encoding = frame_data[0];
 666
 667     /* skip first byte - text encoding */
 668     frame_data += 1;
 669     frame_size = fh->frame_size - 1;
 670
 671     if (text_encoding < ID3_NUM_ENCODINGS) {
 672         if (text_encoding == ID3_ENCODING_UTF16) {
 673             /* ignore frames which contains just the encoding */
 674             if (frame_size <= 2)
 675                 return;
 676
 677             if (memcmp(frame_data, "\xfe\xff", 2) == 0)
 678                 text_encoding = ID3_ENCODING_UTF16BE;
 679             else
 680                 text_encoding = ID3_ENCODING_UTF16LE;
 681             frame_data += 2;
 682             frame_size -= 2;
 683         }
 684         cs_conv = cs_convs[text_encoding];
 685     } else
 686         cs_conv = NULL;
 687
 688     /* ID3v2.2 used 3 bytes for the frame id, so let's check it */
 689     if ((fid[1] == 'T' && fid[2] == '2') ||
 690         (fid[1] == 'I' && fid[2] == 'T' && fid[3] == '2'))
 691         _get_id3v2_frame_info(frame_data, frame_size, &info->title, cs_conv, 1);
 692     else if (fid[1] == 'P') {
 693         if (fid[2] == 'E')
 694             _get_id3v2_artist(fid[3] - '1', frame_data, frame_size,
 695                               info, cs_conv);
 696         else if (fid[2] >= '1' && fid[2] <= '4')
 697             _get_id3v2_artist(fid[2] - '1', frame_data, frame_size,
 698                               info, cs_conv);
 699     }
 700     /* TALB, TAL */
 701     else if (fid[1] == 'A' && fid[2] == 'L')
 702         _get_id3v2_frame_info(frame_data, frame_size, &info->album, cs_conv, 1);
 703     /* TCON (Content/Genre) */
 704     else if (fid[1] == 'C' && fid[2] == 'O' && fid[3] == 'N')
 705         _get_id3v2_genre(frame_data, frame_size, &info->genre, cs_conv);
 706     else if (fid[1] == 'R' && (fid[2] == 'K' ||
 707                                (fid[2] == 'C' && fid[3] == 'K')))
 708         _get_id3v2_trackno(frame_data, frame_size, info, cs_conv);
 709 }
 710
 711 static int
 712 _parse_id3v2(int fd, long id3v2_offset, struct id3_info *info, lms_charset_conv_t **cs_convs)
 713 {
 714     char header_data[10], frame_header_data[10];
 715     unsigned int tag_size, major_version, frame_data_pos, frame_data_length, frame_header_size;
 716     int extended_header, footer_present;
 717     struct id3v2_frame_header fh;
 718     size_t nread;
 719
 720     lseek(fd, id3v2_offset, SEEK_SET);
 721
 722     /* parse header */
 723     if (read(fd, header_data, ID3V2_HEADER_SIZE) != ID3V2_HEADER_SIZE)
 724         return -1;
 725
 726     tag_size = _to_uint(header_data + 6, 4);
 727     if (tag_size == 0)
 728         return -1;
 729
 730     /* parse frames */
 731     major_version = header_data[3];
 732
 733     frame_data_pos = 0;
 734     frame_data_length = tag_size;
 735
 736     /* check for extended header */
 737     extended_header = header_data[5] & 0x20; /* bit 6 */
 738     if (extended_header) {
 739         /* skip extended header */
 740         unsigned int extended_header_size;
 741         char extended_header_data[4];
 742
 743         if (read(fd, extended_header_data, 4) != 4)
 744             return -1;
 745         extended_header_size = _to_uint(extended_header_data, 4);
 746         lseek(fd, extended_header_size - 4, SEEK_CUR);
 747         frame_data_pos += extended_header_size;
 748         frame_data_length -= extended_header_size;
 749     }
 750
 751     footer_present = header_data[5] & 0x8;   /* bit 4 */
 752     if (footer_present && frame_data_length > ID3V2_FOOTER_SIZE)
 753         frame_data_length -= ID3V2_FOOTER_SIZE;
 754
 755     frame_header_size = _get_id3v2_frame_header_size(major_version);
 756     while (frame_data_pos < frame_data_length - frame_header_size) {
 757         nread = read(fd, frame_header_data, frame_header_size);
 758         if (nread == 0)
 759             break;
 760
 761         if (nread != frame_header_size)
 762             return -1;
 763
 764         if (frame_header_data[0] == 0)
 765             break;
 766
 767         _parse_id3v2_frame_header(frame_header_data, major_version, &fh);
 768
 769         if (fh.frame_size > 0 &&
 770             !fh.compression &&
 771             fh.frame_id[0] == 'T' &&
 772             memcmp(fh.frame_id, "TXXX", 4) != 0) {
 773             char *frame_data;
 774
 775             if (fh.data_length_indicator)
 776                 lseek(fd, 4, SEEK_CUR);
 777
 778             frame_data = malloc(sizeof(char) * fh.frame_size);
 779             if (read(fd, frame_data, fh.frame_size) != fh.frame_size) {
 780                 free(frame_data);
 781                 return -1;
 782             }
 783
 784             _parse_id3v2_frame(&fh, frame_data, info, cs_convs);
 785             free(frame_data);
 786         }
 787         else {
 788             if (fh.data_length_indicator)
 789                 lseek(fd, fh.frame_size + 4, SEEK_CUR);
 790             else
 791                 lseek(fd, fh.frame_size, SEEK_CUR);
 792         }
 793
 794         frame_data_pos += fh.frame_size + frame_header_size;
 795     }
 796
 797     return 0;
 798 }
 799
 800 static inline void
 801 _id3v1_str_get(struct lms_string_size *s, const char *buf, int maxlen, lms_charset_conv_t *cs_conv)
 802 {
 803     int start, len;
 804     const char *p, *p_end, *p_last;
 805
 806     start = 0;
 807     p_last = NULL;
 808     p_end = buf + maxlen;
 809     for (p = buf; *p != '\0' && p < p_end; p++) {
 810         if (!isspace(*p))
 811             p_last = p;
 812         else if (!p_last)
 813             start++;
 814     }
 815
 816     if (!p_last)
 817         return;
 818
 819     len = (p_last - buf) - start;
 820     if (len < 1)
 821         return;
 822
 823     len++; /* p_last is not included yet */
 824     if ((unsigned)len > s->len) {
 825         char *tmp;
 826
 827         tmp = realloc(s->str, sizeof(char) * (len + 1));
 828         if (!tmp)
 829             return;
 830         s->str = tmp;
 831     }
 832
 833     s->len = len;
 834     memcpy(s->str, buf + start, len);
 835     s->str[len] = '\0';
 836
 837     if (cs_conv)
 838         lms_charset_conv(cs_conv, &s->str, &s->len);
 839 }
 840
 841 static int
 842 _parse_id3v1(int fd, struct id3_info *info, lms_charset_conv_t *cs_conv)
 843 {
 844     struct id3v1_tag tag;
 845     if (read(fd, &tag, sizeof(struct id3v1_tag)) == -1)
 846         return -1;
 847
 848     if (!info->title.str)
 849         _id3v1_str_get(&info->title, tag.title, sizeof(tag.title), cs_conv);
 850     if (!info->artist.str)
 851         _id3v1_str_get(&info->artist, tag.artist, sizeof(tag.artist), cs_conv);
 852     if (!info->album.str)
 853         _id3v1_str_get(&info->album, tag.album, sizeof(tag.album), cs_conv);
 854     if (!info->genre.str)
 855         _get_id3v1_genre(tag.genre, &info->genre);
 856     if (info->trackno == -1 &&
 857         tag.comments[28] == 0 && tag.comments[29] != 0)
 858         info->trackno = (unsigned char) tag.comments[29];
 859
 860     return 0;
 861 }
 862
 863 static void *
 864 _match(struct plugin *p, const char *path, int len, int base)
 865 {
 866     long i;
 867
 868     i = lms_which_extension(path, len, _exts, LMS_ARRAY_SIZE(_exts));
 869     if (i < 0)
 870       return NULL;
 871     else
 872       return (void*)(i + 1);
 873 }
 874
 875 static int
 876 _parse(struct plugin *plugin, struct lms_context *ctxt, const struct lms_file_info *finfo, void *match)
 877 {
 878     struct id3_info info = {
 879         .trackno = -1,
 880         .cur_artist_priority = -1,
 881     };
 882     struct lms_audio_info audio_info = { };
 883     int r, fd;
 884     long id3v2_offset;
 885     off_t syncframe_offset = 0;
 886
 887     fd = open(finfo->path, O_RDONLY);
 888     if (fd < 0) {
 889         perror("open");
 890         return -1;
 891     }
 892
 893     id3v2_offset = _find_id3v2(fd, &syncframe_offset);
 894     if (id3v2_offset >= 0) {
 895 #if 0
 896         fprintf(stderr, "id3v2 tag found in file %s with offset %ld\n",
 897                 finfo->path, id3v2_offset);
 898 #endif
 899         if (_parse_id3v2(fd, id3v2_offset, &info, plugin->cs_convs) != 0 ||
 900             !info.title.str || !info.artist.str ||
 901             !info.album.str || !info.genre.str ||
 902             info.trackno == -1) {
 903 #if 0
 904             fprintf(stderr, "id3v2 invalid in file %s\n", finfo->path);
 905 #endif
 906             id3v2_offset = -1;
 907         }
 908
 909         /* Even if later we failed to parse the ID3, we want to look for sync
 910          * frame only where we were left */
 911         syncframe_offset = lseek(fd, 0, SEEK_CUR);
 912     }
 913
 914     if (id3v2_offset < 0) {
 915         char tag[3];
 916 #if 0
 917         fprintf(stderr, "id3v2 tag not found in file %s. trying id3v1\n",
 918                 finfo->path);
 919 #endif
 920         /* check for id3v1 tag */
 921         if (lseek(fd, -128, SEEK_END) == -1) {
 922             r = -3;
 923             goto done;
 924         }
 925
 926         if (read(fd, &tag, 3) == -1) {
 927             r = -4;
 928             goto done;
 929         }
 930
 931         if (memcmp(tag, "TAG", 3) == 0) {
 932 #if 0
 933             fprintf(stderr, "id3v1 tag found in file %s\n", finfo->path);
 934 #endif
 935             if (_parse_id3v1(fd, &info, ctxt->cs_conv) != 0) {
 936                 r = -5;
 937                 goto done;
 938             }
 939         }
 940     }
 941
 942     if (!info.title.str)
 943         info.title = str_extract_name_from_path(finfo->path, finfo->path_len,
 944                                                 finfo->base,
 945                                                 &_exts[((long) match) - 1],
 946                                                 ctxt->cs_conv);
 947
 948     if (info.trackno == -1)
 949         info.trackno = 0;
 950
 951 #if 0
 952     fprintf(stderr, "file %s info\n", finfo->path);
 953     fprintf(stderr, "\ttitle='%s'\n", info.title.str);
 954     fprintf(stderr, "\tartist='%s'\n", info.artist.str);
 955     fprintf(stderr, "\talbum='%s'\n", info.album.str);
 956     fprintf(stderr, "\tgenre='%s'\n", info.genre.str);
 957     fprintf(stderr, "\ttrack number='%d'\n", info.trackno);
 958 #endif
 959
 960     audio_info.id = finfo->id;
 961     audio_info.title = info.title;
 962     audio_info.artist = info.artist;
 963     audio_info.album = info.album;
 964     audio_info.genre = info.genre;
 965     audio_info.trackno = info.trackno;
 966
 967     _parse_mpeg_header(fd, syncframe_offset, &audio_info);
 968
 969     r = lms_db_audio_add(plugin->audio_db, &audio_info);
 970
 971   done:
 972     posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
 973     close(fd);
 974
 975     free(info.title.str);
 976     free(info.artist.str);
 977     free(info.album.str);
 978     free(info.genre.str);
 979
 980     return r;
 981 }
 982
 983 static int
 984 _setup(struct plugin *plugin, struct lms_context *ctxt)
 985 {
 986     int i;
 987     const char *id3_encodings[ID3_NUM_ENCODINGS] = {
 988         "Latin1",
 989         NULL, /* UTF-16 */
 990         "UTF-16BE",
 991         NULL, /* UTF-8 */
 992         "UTF-16LE",
 993     };
 994
 995     plugin->audio_db = lms_db_audio_new(ctxt->db);
 996     if (!plugin->audio_db)
 997         return -1;
 998
 999     for (i = 0; i < ID3_NUM_ENCODINGS; ++i) {
1000         /* do not create charset conv for UTF-8 encoding */
1001         if (!id3_encodings[i]) {
1002             plugin->cs_convs[i] = NULL;
1003             continue;
1004         }
1005         plugin->cs_convs[i] = lms_charset_conv_new_full(0, 0);
1006         if (!plugin->cs_convs[i])
1007             return -1;
1008         lms_charset_conv_add(plugin->cs_convs[i], id3_encodings[i]);
1009     }
1010
1011     return 0;
1012 }
1013
1014 static int
1015 _start(struct plugin *plugin, struct lms_context *ctxt)
1016 {
1017     return lms_db_audio_start(plugin->audio_db);
1018 }
1019
1020 static int
1021 _finish(struct plugin *plugin, struct lms_context *ctxt)
1022 {
1023     int i;
1024
1025     if (plugin->audio_db)
1026         lms_db_audio_free(plugin->audio_db);
1027
1028     for (i = 0; i < ID3_NUM_ENCODINGS; ++i) {
1029         if (plugin->cs_convs[i])
1030             lms_charset_conv_free(plugin->cs_convs[i]);
1031     }
1032
1033     return 0;
1034 }
1035
1036 static int
1037 _close(struct plugin *plugin)
1038 {
1039     free(plugin);
1040     return 0;
1041 }
1042
1043 API struct lms_plugin *
1044 lms_plugin_open(void)
1045 {
1046     struct plugin *plugin;
1047
1048     plugin = (struct plugin *)malloc(sizeof(*plugin));
1049     plugin->plugin.name = _name;
1050     plugin->plugin.match = (lms_plugin_match_fn_t)_match;
1051     plugin->plugin.parse = (lms_plugin_parse_fn_t)_parse;
1052     plugin->plugin.close = (lms_plugin_close_fn_t)_close;
1053     plugin->plugin.setup = (lms_plugin_setup_fn_t)_setup;
1054     plugin->plugin.start = (lms_plugin_start_fn_t)_start;
1055     plugin->plugin.finish = (lms_plugin_finish_fn_t)_finish;
1056
1057     return (struct lms_plugin *)plugin;
1058 }
1059
1060 API const struct lms_plugin_info *
1061 lms_plugin_info(void)
1062 {
1063     static struct lms_plugin_info info = {
1064         _name,
1065         _cats,
1066         "ID3 v1 and v2 for mp3 files",
1067         PACKAGE_VERSION,
1068         _authors,
1069         "http://lms.garage.maemo.org"
1070     };
1071
1072     return &info;
1073 }