src/plugins/id3/id3.c

   1 /**
   2  * Copyright (C) 2008-2011 by ProFUSION embedded systems
   3  * Copyright (C) 2008 by INdT
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public License
   7  * as published by the Free Software Foundation; either version 2.1 of
   8  * the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
  18  * 02110-1301 USA
  19  *
  20  * @author Andre Moreira Magalhaes <andre.magalhaes@openbossa.org>
  21  * @author Gustavo Sverzut Barbieri <barbieri@profusion.mobi>
  22  */
  23
  24 /**
  25  * @brief
  26  *
  27  * id3 file parser.
  28  */
  29
  30 #ifdef HAVE_CONFIG_H
  31 #include "config.h"
  32 #endif
  33
  34 #define _GNU_SOURCE
  35 #define _XOPEN_SOURCE 600
  36 #include <lightmediascanner_plugin.h>
  37 #include <lightmediascanner_db.h>
  38 #include <lightmediascanner_charset_conv.h>
  39 #include <sys/types.h>
  40 #include <sys/stat.h>
  41 #include <fcntl.h>
  42 #include <stdio.h>
  43 #include <stdlib.h>
  44 #include <string.h>
  45 #include <unistd.h>
  46 #include <ctype.h>
  47
  48 #define ID3V2_HEADER_SIZE       10
  49 #define ID3V2_FOOTER_SIZE       10
  50
  51 /* We parse only the first 4 bytes, which are the interesting ones */
  52 #define MPEG_HEADER_SIZE 4
  53
  54 enum mpeg_audio_version {
  55     MPEG_AUDIO_VERSION_1,
  56     MPEG_AUDIO_VERSION_2,
  57     MPEG_AUDIO_VERSION_2_5,
  58     MPEG_AUDIO_VERSION_4,
  59 };
  60
  61 enum mpeg_audio_layer {
  62     MPEG_AUDIO_LAYER_1,
  63     MPEG_AUDIO_LAYER_2,
  64     MPEG_AUDIO_LAYER_3,
  65     MPEG_AUDIO_LAYER_AAC,
  66 };
  67
  68 struct mpeg_header {
  69     enum mpeg_audio_version version;
  70     enum mpeg_audio_layer layer;
  71
  72     uint8_t channels;
  73     uint8_t sampling_rate_idx;
  74     uint8_t codec_idx;
  75 };
  76
  77 static const struct lms_string_size _codecs[] = {
  78     /* mp3 */
  79     [0] = LMS_STATIC_STRING_SIZE("mpeg1layer1"),
  80     [1] = LMS_STATIC_STRING_SIZE("mpeg1layer2"),
  81     [2] = LMS_STATIC_STRING_SIZE("mpeg1layer3"),
  82     [3] = LMS_STATIC_STRING_SIZE("mpeg2layer1"),
  83     [4] = LMS_STATIC_STRING_SIZE("mpeg2layer2"),
  84     [5] = LMS_STATIC_STRING_SIZE("mpeg2layer3"),
  85     [6] = LMS_STATIC_STRING_SIZE("mpeg2.5layer1"),
  86     [7] = LMS_STATIC_STRING_SIZE("mpeg2.5layer2"),
  87     [8] = LMS_STATIC_STRING_SIZE("mpeg2.5layer3"),
  88
  89     /* aac */
  90 #define MPEG_CODEC_AAC_START 9
  91     [9] = LMS_STATIC_STRING_SIZE("mpeg2aac-main"),
  92     [10] = LMS_STATIC_STRING_SIZE("mpeg2aac-lc"),
  93     [11] = LMS_STATIC_STRING_SIZE("mpeg2aac-ssr"),
  94     [12] = LMS_STATIC_STRING_SIZE("mpeg2aac-ltp"),
  95
  96     [13] = LMS_STATIC_STRING_SIZE("mpeg4aac-main"),
  97     [14] = LMS_STATIC_STRING_SIZE("mpeg4aac-lc"),
  98     [15] = LMS_STATIC_STRING_SIZE("mpeg4aac-ssr"),
  99     [16] = LMS_STATIC_STRING_SIZE("mpeg4aac-ltp"),
 100     { }
 101 };
 102
 103 /* Ordered according to AAC index, take care with mp3 */
 104 static int _sample_rates[16] = {
 105     96000, 88200, 64000,
 106
 107     /* Frequencies available on mp3, */
 108     48000, 44100, 32000,
 109     24000, 22050, 16000,
 110     12000, 11025, 8000,
 111
 112     7350, /* reserved, zeroed */
 113 };
 114
 115 enum ID3Encodings {
 116     ID3_ENCODING_LATIN1 = 0,
 117     ID3_ENCODING_UTF16,
 118     ID3_ENCODING_UTF16BE,
 119     ID3_ENCODING_UTF8,
 120     ID3_ENCODING_UTF16LE,
 121     ID3_ENCODING_LAST
 122 };
 123 #define ID3_NUM_ENCODINGS ID3_ENCODING_LAST
 124
 125
 126 #include "id3v1_genres.c"
 127
 128 struct id3_info {
 129     struct lms_string_size title;
 130     struct lms_string_size artist;
 131     struct lms_string_size album;
 132     struct lms_string_size genre;
 133     int trackno;
 134     int cur_artist_priority;
 135 };
 136
 137 struct id3v2_frame_header {
 138     char frame_id[4];
 139     unsigned int frame_size;
 140     int compression;
 141     int data_length_indicator;
 142 };
 143
 144 struct id3v1_tag {
 145     char title[30];
 146     char artist[30];
 147     char album[30];
 148     char year[4];
 149     char comments[30];
 150     char genre;
 151 } __attribute__((packed));
 152
 153 struct plugin {
 154     struct lms_plugin plugin;
 155     lms_db_audio_t *audio_db;
 156     lms_charset_conv_t *cs_convs[ID3_NUM_ENCODINGS];
 157 };
 158
 159 static const char _name[] = "id3";
 160 static const struct lms_string_size _exts[] = {
 161     LMS_STATIC_STRING_SIZE(".mp3"),
 162     LMS_STATIC_STRING_SIZE(".aac")
 163 };
 164 static const char *_cats[] = {
 165     "multimedia",
 166     "audio",
 167     NULL
 168 };
 169 static const char *_authors[] = {
 170     "Andre Moreira Magalhaes",
 171     "Gustavo Sverzut Barbieri",
 172     NULL
 173 };
 174
 175 static unsigned int
 176 _to_uint(const char *data, int data_size)
 177 {
 178     unsigned int sum = 0;
 179     unsigned int last, i;
 180
 181     last = data_size > 4 ? 3 : data_size - 1;
 182
 183     for (i = 0; i <= last; i++)
 184         sum |= ((unsigned char) data[i]) << ((last - i) * 8);
 185
 186     return sum;
 187 }
 188
 189 static inline int
 190 _is_id3v2_second_synch_byte(unsigned char byte)
 191 {
 192     if (byte == 0xff)
 193         return 0;
 194     if ((byte & 0xE0) == 0xE0)
 195         return 1;
 196     return 0;
 197 }
 198
 199 static inline int
 200 _fill_mp3_header(struct mpeg_header *hdr, const uint8_t b[4])
 201 {
 202     hdr->sampling_rate_idx = (b[2] & 0x0C) >> 2;
 203     if (hdr->sampling_rate_idx == 0x3)
 204         return -1;
 205     /*
 206      * Sampling rate frequency index
 207      * bits     MPEG1           MPEG2           MPEG2.5
 208      * 00       44100 Hz        22050 Hz        11025 Hz
 209      * 01       48000 Hz        24000 Hz        12000 Hz
 210      * 10       32000 Hz        16000 Hz        8000 Hz
 211      * 11       reserv.         reserv.         reserv.
 212      */
 213
 214     /* swap 0x1 and 0x0 */
 215     if (hdr->sampling_rate_idx < 0x2)
 216         hdr->sampling_rate_idx = !hdr->sampling_rate_idx;
 217     hdr->sampling_rate_idx += 3 * hdr->version + 3;
 218
 219     hdr->codec_idx = hdr->version * 3 + hdr->layer;
 220
 221     hdr->channels = (b[3] & 0xC0) >> 6;
 222     hdr->channels = hdr->channels == 0x3 ? 1 : 2;
 223     return 0;
 224 }
 225
 226 static inline int
 227 _fill_aac_header(struct mpeg_header *hdr, const uint8_t b[4])
 228 {
 229     unsigned int profile;
 230
 231     hdr->sampling_rate_idx = (b[2] & 0x3C) >> 2;
 232
 233     profile = (b[2] & 0xC0) >> 6;
 234     hdr->codec_idx = MPEG_CODEC_AAC_START + profile;
 235     if (hdr->version == MPEG_AUDIO_VERSION_4)
 236         hdr->codec_idx += 4;
 237
 238     hdr->channels = ((b[2] & 0x1) << 2) | ((b[3] & 0xC0) >> 6);
 239     return 0;
 240 }
 241
 242 static inline int
 243 _fill_mpeg_header(struct mpeg_header *hdr, const uint8_t b[4])
 244 {
 245     unsigned int version = (b[1] & 0x18) >>  3;
 246     unsigned int layer = (b[1] & 0x06) >> 1;
 247
 248     switch (layer) {
 249     case 0x0:
 250         if (version == 0x2 || version == 0x3)
 251             hdr->layer = MPEG_AUDIO_LAYER_AAC;
 252         else
 253             return -1;
 254         break;
 255     case 0x1:
 256         hdr->layer = MPEG_AUDIO_LAYER_3;
 257         break;
 258     case 0x2:
 259         hdr->layer = MPEG_AUDIO_LAYER_2;
 260         break;
 261     case 0x3:
 262         hdr->layer = MPEG_AUDIO_LAYER_1;
 263         break;
 264     }
 265
 266     switch (version) {
 267     case 0x0:
 268         hdr->version = MPEG_AUDIO_VERSION_2_5;
 269         break;
 270     case 0x1:
 271         return -1;
 272     case 0x2:
 273         if (layer == 0x0)
 274             hdr->version = MPEG_AUDIO_VERSION_4;
 275         else
 276             hdr->version = MPEG_AUDIO_VERSION_2;
 277         break;
 278     case 0x3:
 279         if (layer == 0x0)
 280             hdr->version = MPEG_AUDIO_VERSION_2;
 281         else
 282             hdr->version = MPEG_AUDIO_VERSION_1;
 283     }
 284
 285     if (hdr->layer == MPEG_AUDIO_LAYER_AAC)
 286         return _fill_aac_header(hdr, b);
 287     else
 288         return _fill_mp3_header(hdr, b);
 289
 290     return 0;
 291 }
 292
 293 static int
 294 _parse_mpeg_header(int fd, off_t off, struct lms_audio_info *audio_info)
 295 {
 296     uint8_t buffer[32];
 297     const uint8_t *p, *p_end;
 298     unsigned int prev_read;
 299     struct mpeg_header hdr;
 300
 301     lseek(fd, off, SEEK_SET);
 302
 303     /* Find sync word */
 304     prev_read = 0;
 305     do {
 306         int nread = read(fd, buffer + prev_read, sizeof(buffer) - prev_read);
 307         if (nread < MPEG_HEADER_SIZE)
 308             return -1;
 309
 310         p = buffer;
 311         p_end = buffer + nread;
 312         while (p < p_end && (p = memchr(p, 0xFF, p_end - p))) {
 313             /* poor man's ring buffer since the the needle is small (4 bytes) */
 314             if (p > p_end - MPEG_HEADER_SIZE) {
 315                 memcpy(buffer, p, p_end - p);
 316                 break;
 317             }
 318
 319             if (_is_id3v2_second_synch_byte(*(p + 1)))
 320                 goto found;
 321
 322             p++;
 323         }
 324         prev_read = p ? p_end - p : 0;
 325     } while(1);
 326
 327 found:
 328
 329     if (_fill_mpeg_header(&hdr, p) < 0) {
 330         fprintf(stderr, "Invalid field in file, ignoring.\n");
 331         return 0;
 332     }
 333
 334     audio_info->codec = _codecs[hdr.codec_idx];
 335     audio_info->sampling_rate = _sample_rates[hdr.sampling_rate_idx];
 336     audio_info->channels = hdr.channels;
 337
 338     return 0;
 339 }
 340
 341 /* Returns the offset in fd to the position after the ID3 tag, iff it occurs
 342  * *before* a sync word. Otherwise < 0 is returned and if we gave up looking
 343  * after ID3 because of a sync value, @syncframe_offset is set to its
 344  * correspondent offset */
 345 static long
 346 _find_id3v2(int fd, off_t *syncframe_offset)
 347 {
 348     static const char pattern[3] = "ID3";
 349     char buffer[3];
 350     unsigned int prev_part_match, prev_part_match_sync = 0;
 351     long buffer_offset;
 352
 353     if (read(fd, buffer, sizeof(buffer)) != sizeof(buffer))
 354         return -1;
 355
 356     if (memcmp(buffer, pattern, sizeof(pattern)) == 0)
 357         return 0;
 358
 359     /* This loop is the crux of the find method.  There are three cases that we
 360      * want to account for:
 361      * (1) The previously searched buffer contained a partial match of the
 362      * search pattern and we want to see if the next one starts with the
 363      * remainder of that pattern.
 364      *
 365      * (2) The search pattern is wholly contained within the current buffer.
 366      *
 367      * (3) The current buffer ends with a partial match of the pattern.  We will
 368      * note this for use in the next iteration, where we will check for the rest
 369      * of the pattern.
 370      */
 371     buffer_offset = 0;
 372     prev_part_match_sync = 0;
 373     prev_part_match = 0;
 374     while (1) {
 375         const char *p, *p_end;
 376
 377         /* (1) previous partial match */
 378         if (prev_part_match_sync) {
 379             if (_is_id3v2_second_synch_byte(buffer[0])) {
 380                 *syncframe_offset = buffer_offset - 1;
 381                 return -1;
 382             }
 383             prev_part_match_sync = 0;
 384         }
 385
 386         if (prev_part_match) {
 387             const int size = sizeof(buffer) - prev_part_match;
 388             const char *part_pattern = pattern + prev_part_match;
 389
 390             if (memcmp(buffer, part_pattern, size) == 0)
 391                 return buffer_offset - prev_part_match;
 392
 393             prev_part_match = 0;
 394         }
 395
 396         p_end = buffer + sizeof(buffer);
 397         for (p = buffer; p < p_end; p++) {
 398             if (*p == pattern[0]) {
 399                 /* Try to match pattern, possible partial contents */
 400                 const char *q;
 401                 int todo;
 402
 403                 q = p + 1;
 404                 todo = p_end - q;
 405                 if (todo == 0 || memcmp(q, pattern + 1, todo) == 0) {
 406                     todo++;
 407                     if (todo == sizeof(buffer))
 408                         /* (2) pattern contained in current buffer */
 409                         return buffer_offset;
 410
 411                     /* (3) partial match */
 412                     prev_part_match = todo;
 413                     break;
 414                 }
 415             } else if ((unsigned char)*p == 0xff) {
 416                 /* Try to match synch pattern, possible partial contents */
 417                 const char *q;
 418
 419                 q = p + 1;
 420                 if (q < p_end) {
 421                     if (_is_id3v2_second_synch_byte(*q)) {
 422                         /* (2) synch pattern contained in current buffer */
 423                         *syncframe_offset = buffer_offset + (p - buffer);
 424                         return -1;
 425                     }
 426                 } else
 427                     /* (3) partial match */
 428                     prev_part_match_sync = 1;
 429             }
 430         }
 431
 432         if (read(fd, buffer, sizeof(buffer)) != sizeof(buffer))
 433             return -1;
 434         buffer_offset += sizeof(buffer);
 435     }
 436
 437     return -1;
 438 }
 439
 440 static unsigned int
 441 _get_id3v2_frame_header_size(unsigned int version)
 442 {
 443     switch (version) {
 444     case 0:
 445     case 1:
 446     case 2:
 447         return 6;
 448     case 3:
 449     case 4:
 450     default:
 451         return 10;
 452     }
 453 }
 454
 455 static void
 456 _parse_id3v2_frame_header(char *data, unsigned int version, struct id3v2_frame_header *fh)
 457 {
 458     switch (version) {
 459     case 0:
 460     case 1:
 461     case 2:
 462         memcpy(fh->frame_id, data, 3);
 463         fh->frame_id[3] = 0;
 464         fh->frame_size = _to_uint(data + 3, 3);
 465         fh->compression = 0;
 466         fh->data_length_indicator = 0;
 467         break;
 468     case 3:
 469         memcpy(fh->frame_id, data, 4);
 470         fh->frame_size = _to_uint(data + 4, 4);
 471         fh->compression = data[9] & 0x40;
 472         fh->data_length_indicator = 0;
 473         break;
 474     case 4:
 475     default:
 476         memcpy(fh->frame_id, data, 4);
 477         fh->frame_size = _to_uint(data + 4, 4);
 478         fh->compression = data[9] & 0x4;
 479         fh->data_length_indicator = data[9] & 0x1;
 480         break;
 481     }
 482 }
 483
 484 static inline void
 485 _get_id3v2_frame_info(const char *frame_data, unsigned int frame_size, struct lms_string_size *s, lms_charset_conv_t *cs_conv, int strip)
 486 {
 487     if (frame_size == 0)
 488         return;
 489     if (frame_size > s->len) {
 490         char *tmp;
 491
 492         tmp = realloc(s->str, sizeof(char) * (frame_size + 1));
 493         if (!tmp)
 494             return;
 495         s->str = tmp;
 496     }
 497     memcpy(s->str, frame_data, frame_size);
 498     s->str[frame_size] = '\0';
 499     s->len = frame_size;
 500     if (cs_conv)
 501         lms_charset_conv(cs_conv, &s->str, &s->len);
 502     if (strip)
 503         lms_string_size_strip_and_free(s);
 504 }
 505
 506 static int
 507 _get_id3v2_artist(unsigned int index, const char *frame_data, unsigned int frame_size, struct id3_info *info, lms_charset_conv_t *cs_conv)
 508 {
 509     static const unsigned char artist_priorities[] = {3, 4, 2, 1};
 510     const unsigned int index_max = sizeof(artist_priorities) / sizeof(*artist_priorities);
 511
 512     if (index >= index_max)
 513         return 1;
 514
 515     if (artist_priorities[index] > info->cur_artist_priority) {
 516         struct lms_string_size artist = {0};
 517
 518         _get_id3v2_frame_info(frame_data, frame_size, &artist, cs_conv, 1);
 519         if (artist.str) {
 520             if (info->artist.str)
 521                 free(info->artist.str);
 522             info->artist = artist;
 523             info->cur_artist_priority = artist_priorities[index];
 524         }
 525     }
 526     return 0;
 527 }
 528
 529 static int
 530 _get_id3v1_genre(unsigned int genre, struct lms_string_size *out)
 531 {
 532     if (genre < ID3V1_NUM_GENRES) {
 533         unsigned int size, base, len;
 534
 535         base = id3v1_genres_offsets[genre];
 536         size = id3v1_genres_offsets[genre + 1] - base;
 537         len = size - 1;
 538
 539         if (len > out->len) {
 540             char *p = realloc(out->str, size);
 541             if (!p)
 542                 return -2;
 543             out->str = p;
 544         }
 545
 546         out->len = len;
 547         memcpy(out->str, id3v1_genres_mem + base, size);
 548
 549         return 0;
 550     }
 551     return -1;
 552 }
 553
 554 static inline int
 555 _parse_id3v1_genre(const char *str_genre, struct lms_string_size *out)
 556 {
 557     return _get_id3v1_genre(atoi(str_genre), out);
 558 }
 559
 560 static void
 561 _get_id3v2_genre(const char *frame_data, unsigned int frame_size, struct lms_string_size *out, lms_charset_conv_t *cs_conv)
 562 {
 563     int i, is_number;
 564     struct lms_string_size genre = {0};
 565
 566     _get_id3v2_frame_info(frame_data, frame_size, &genre, cs_conv, 1);
 567     if (!genre.str)
 568         return;
 569
 570     is_number = (genre.len != 0 && genre.str[0] != '(');
 571     if (is_number) {
 572         for (i = 0; i < genre.len; ++i) {
 573             if (!isdigit(genre.str[i])) {
 574                 is_number = 0;
 575                 break;
 576             }
 577         }
 578     }
 579
 580     if (is_number && _parse_id3v1_genre(genre.str, out) == 0) {
 581         /* id3v1 genre found */
 582         free(genre.str);
 583         return;
 584     }
 585
 586     /* ID3v2.3 "content type" can contain a ID3v1 genre number in parenthesis at
 587      * the beginning of the field. If this is all that the field contains, do a
 588      * translation from that number to the name and return that.  If there is a
 589      * string folloing the ID3v1 genre number, that is considered to be
 590      * authoritative and we return that instead. Or finally, the field may
 591      * simply be free text, in which case we just return the value. */
 592
 593     if (genre.len > 1 && genre.str[0] == '(') {
 594         char *closing = NULL;
 595
 596         if (genre.str[genre.len - 1] == ')') {
 597             closing = strchr(genre.str, ')');
 598             if (closing == genre.str + genre.len - 1) {
 599                 /* ) is the last character and only appears once in the
 600                  * string get the id3v1 genre enclosed by parentheses
 601                  */
 602                 if (_parse_id3v1_genre(genre.str + 1, out) == 0) {
 603                     free(genre.str);
 604                     return;
 605                 }
 606             }
 607         }
 608
 609         /* get the string followed by the id3v1 genre */
 610         if (!closing)
 611             closing = strchr(genre.str, ')');
 612
 613         if (closing) {
 614             closing++;
 615             out->len = genre.len - (closing - genre.str);
 616             out->str = genre.str;
 617             memmove(out->str, closing, out->len + 1); /* includes '\0' */
 618             lms_string_size_strip_and_free(out);
 619             return;
 620         }
 621     }
 622
 623     /* pure text */
 624     *out = genre;
 625 }
 626
 627 static void
 628 _get_id3v2_trackno(const char *frame_data, unsigned int frame_size, struct id3_info *info, lms_charset_conv_t *cs_conv)
 629 {
 630     struct lms_string_size trackno = {0};
 631
 632     _get_id3v2_frame_info(frame_data, frame_size, &trackno, cs_conv, 0);
 633     if (!trackno.str)
 634         return;
 635     info->trackno = atoi(trackno.str);
 636     free(trackno.str);
 637 }
 638
 639 static void
 640 _parse_id3v2_frame(struct id3v2_frame_header *fh, const char *frame_data, struct id3_info *info, lms_charset_conv_t **cs_convs)
 641 {
 642     lms_charset_conv_t *cs_conv;
 643     unsigned int text_encoding, frame_size;
 644     const char *fid;
 645
 646     /* ignore frames which contains just the encoding */
 647     if (fh->frame_size <= 1)
 648         return;
 649
 650 #if 0
 651     fprintf(stderr, "frame id = %.4s frame size = %d text encoding = %d\n",
 652             fh->frame_id, fh->frame_size, frame_data[0]);
 653 #endif
 654
 655     /* All used frames start with 'T' */
 656     fid = fh->frame_id;
 657     if (fid[0] != 'T')
 658         return;
 659
 660     /* Latin1  = 0
 661      * UTF16   = 1
 662      * UTF16BE = 2
 663      * UTF8    = 3
 664      * UTF16LE = 4
 665      */
 666     text_encoding = frame_data[0];
 667
 668     /* skip first byte - text encoding */
 669     frame_data += 1;
 670     frame_size = fh->frame_size - 1;
 671
 672     if (text_encoding < ID3_NUM_ENCODINGS) {
 673         if (text_encoding == ID3_ENCODING_UTF16) {
 674             /* ignore frames which contains just the encoding */
 675             if (frame_size <= 2)
 676                 return;
 677
 678             if (memcmp(frame_data, "\xfe\xff", 2) == 0)
 679                 text_encoding = ID3_ENCODING_UTF16BE;
 680             else
 681                 text_encoding = ID3_ENCODING_UTF16LE;
 682             frame_data += 2;
 683             frame_size -= 2;
 684         }
 685         cs_conv = cs_convs[text_encoding];
 686     } else
 687         cs_conv = NULL;
 688
 689     /* ID3v2.2 used 3 bytes for the frame id, so let's check it */
 690     if ((fid[1] == 'T' && fid[2] == '2') ||
 691         (fid[1] == 'I' && fid[2] == 'T' && fid[3] == '2'))
 692         _get_id3v2_frame_info(frame_data, frame_size, &info->title, cs_conv, 1);
 693     else if (fid[1] == 'P') {
 694         if (fid[2] == 'E')
 695             _get_id3v2_artist(fid[3] - '1', frame_data, frame_size,
 696                               info, cs_conv);
 697         else if (fid[2] >= '1' && fid[2] <= '4')
 698             _get_id3v2_artist(fid[2] - '1', frame_data, frame_size,
 699                               info, cs_conv);
 700     }
 701     /* TALB, TAL */
 702     else if (fid[1] == 'A' && fid[2] == 'L')
 703         _get_id3v2_frame_info(frame_data, frame_size, &info->album, cs_conv, 1);
 704     /* TCON (Content/Genre) */
 705     else if (fid[1] == 'C' && fid[2] == 'O' && fid[3] == 'N')
 706         _get_id3v2_genre(frame_data, frame_size, &info->genre, cs_conv);
 707     else if (fid[1] == 'R' && (fid[2] == 'K' ||
 708                                (fid[2] == 'C' && fid[3] == 'K')))
 709         _get_id3v2_trackno(frame_data, frame_size, info, cs_conv);
 710 }
 711
 712 static int
 713 _parse_id3v2(int fd, long id3v2_offset, struct id3_info *info, lms_charset_conv_t **cs_convs)
 714 {
 715     char header_data[10], frame_header_data[10];
 716     unsigned int tag_size, major_version, frame_data_pos, frame_data_length, frame_header_size;
 717     int extended_header, footer_present;
 718     struct id3v2_frame_header fh;
 719     size_t nread;
 720
 721     lseek(fd, id3v2_offset, SEEK_SET);
 722
 723     /* parse header */
 724     if (read(fd, header_data, ID3V2_HEADER_SIZE) != ID3V2_HEADER_SIZE)
 725         return -1;
 726
 727     tag_size = _to_uint(header_data + 6, 4);
 728     if (tag_size == 0)
 729         return -1;
 730
 731     /* parse frames */
 732     major_version = header_data[3];
 733
 734     frame_data_pos = 0;
 735     frame_data_length = tag_size;
 736
 737     /* check for extended header */
 738     extended_header = header_data[5] & 0x20; /* bit 6 */
 739     if (extended_header) {
 740         /* skip extended header */
 741         unsigned int extended_header_size;
 742         char extended_header_data[4];
 743
 744         if (read(fd, extended_header_data, 4) != 4)
 745             return -1;
 746         extended_header_size = _to_uint(extended_header_data, 4);
 747         lseek(fd, extended_header_size - 4, SEEK_CUR);
 748         frame_data_pos += extended_header_size;
 749         frame_data_length -= extended_header_size;
 750     }
 751
 752     footer_present = header_data[5] & 0x8;   /* bit 4 */
 753     if (footer_present && frame_data_length > ID3V2_FOOTER_SIZE)
 754         frame_data_length -= ID3V2_FOOTER_SIZE;
 755
 756     frame_header_size = _get_id3v2_frame_header_size(major_version);
 757     while (frame_data_pos < frame_data_length - frame_header_size) {
 758         nread = read(fd, frame_header_data, frame_header_size);
 759         if (nread == 0)
 760             break;
 761
 762         if (nread != frame_header_size)
 763             return -1;
 764
 765         if (frame_header_data[0] == 0)
 766             break;
 767
 768         _parse_id3v2_frame_header(frame_header_data, major_version, &fh);
 769
 770         if (fh.frame_size > 0 &&
 771             !fh.compression &&
 772             fh.frame_id[0] == 'T' &&
 773             memcmp(fh.frame_id, "TXXX", 4) != 0) {
 774             char *frame_data;
 775
 776             if (fh.data_length_indicator)
 777                 lseek(fd, 4, SEEK_CUR);
 778
 779             frame_data = malloc(sizeof(char) * fh.frame_size);
 780             if (read(fd, frame_data, fh.frame_size) != fh.frame_size) {
 781                 free(frame_data);
 782                 return -1;
 783             }
 784
 785             _parse_id3v2_frame(&fh, frame_data, info, cs_convs);
 786             free(frame_data);
 787         }
 788         else {
 789             if (fh.data_length_indicator)
 790                 lseek(fd, fh.frame_size + 4, SEEK_CUR);
 791             else
 792                 lseek(fd, fh.frame_size, SEEK_CUR);
 793         }
 794
 795         frame_data_pos += fh.frame_size + frame_header_size;
 796     }
 797
 798     return 0;
 799 }
 800
 801 static inline void
 802 _id3v1_str_get(struct lms_string_size *s, const char *buf, int maxlen, lms_charset_conv_t *cs_conv)
 803 {
 804     int start, len;
 805     const char *p, *p_end, *p_last;
 806
 807     start = 0;
 808     p_last = NULL;
 809     p_end = buf + maxlen;
 810     for (p = buf; *p != '\0' && p < p_end; p++) {
 811         if (!isspace(*p))
 812             p_last = p;
 813         else if (!p_last)
 814             start++;
 815     }
 816
 817     if (!p_last)
 818         return;
 819
 820     len = (p_last - buf) - start;
 821     if (len < 1)
 822         return;
 823
 824     len++; /* p_last is not included yet */
 825     if (len > s->len) {
 826         char *tmp;
 827
 828         tmp = realloc(s->str, sizeof(char) * (len + 1));
 829         if (!tmp)
 830             return;
 831         s->str = tmp;
 832     }
 833
 834     s->len = len;
 835     memcpy(s->str, buf + start, len);
 836     s->str[len] = '\0';
 837
 838     if (cs_conv)
 839         lms_charset_conv(cs_conv, &s->str, &s->len);
 840 }
 841
 842 static int
 843 _parse_id3v1(int fd, struct id3_info *info, lms_charset_conv_t *cs_conv)
 844 {
 845     struct id3v1_tag tag;
 846     if (read(fd, &tag, sizeof(struct id3v1_tag)) == -1)
 847         return -1;
 848
 849     if (!info->title.str)
 850         _id3v1_str_get(&info->title, tag.title, sizeof(tag.title), cs_conv);
 851     if (!info->artist.str)
 852         _id3v1_str_get(&info->artist, tag.artist, sizeof(tag.artist), cs_conv);
 853     if (!info->album.str)
 854         _id3v1_str_get(&info->album, tag.album, sizeof(tag.album), cs_conv);
 855     if (!info->genre.str)
 856         _get_id3v1_genre(tag.genre, &info->genre);
 857     if (info->trackno == -1 &&
 858         tag.comments[28] == 0 && tag.comments[29] != 0)
 859         info->trackno = (unsigned char) tag.comments[29];
 860
 861     return 0;
 862 }
 863
 864 static void *
 865 _match(struct plugin *p, const char *path, int len, int base)
 866 {
 867     long i;
 868
 869     i = lms_which_extension(path, len, _exts, LMS_ARRAY_SIZE(_exts));
 870     if (i < 0)
 871       return NULL;
 872     else
 873       return (void*)(i + 1);
 874 }
 875
 876 static int
 877 _parse(struct plugin *plugin, struct lms_context *ctxt, const struct lms_file_info *finfo, void *match)
 878 {
 879     struct id3_info info = {
 880         .trackno = -1,
 881         .cur_artist_priority = -1,
 882     };
 883     struct lms_audio_info audio_info = { };
 884     int r, fd;
 885     long id3v2_offset;
 886     off_t syncframe_offset = 0;
 887
 888     fd = open(finfo->path, O_RDONLY);
 889     if (fd < 0) {
 890         perror("open");
 891         return -1;
 892     }
 893
 894     id3v2_offset = _find_id3v2(fd, &syncframe_offset);
 895     if (id3v2_offset >= 0) {
 896 #if 0
 897         fprintf(stderr, "id3v2 tag found in file %s with offset %ld\n",
 898                 finfo->path, id3v2_offset);
 899 #endif
 900         if (_parse_id3v2(fd, id3v2_offset, &info, plugin->cs_convs) != 0 ||
 901             !info.title.str || !info.artist.str ||
 902             !info.album.str || !info.genre.str ||
 903             info.trackno == -1) {
 904 #if 0
 905             fprintf(stderr, "id3v2 invalid in file %s\n", finfo->path);
 906 #endif
 907             id3v2_offset = -1;
 908         }
 909
 910         /* Even if later we failed to parse the ID3, we want to look for sync
 911          * frame only where we were left */
 912         syncframe_offset = lseek(fd, 0, SEEK_CUR);
 913     }
 914
 915     if (id3v2_offset < 0) {
 916         char tag[3];
 917 #if 0
 918         fprintf(stderr, "id3v2 tag not found in file %s. trying id3v1\n",
 919                 finfo->path);
 920 #endif
 921         /* check for id3v1 tag */
 922         if (lseek(fd, -128, SEEK_END) == -1) {
 923             r = -3;
 924             goto done;
 925         }
 926
 927         if (read(fd, &tag, 3) == -1) {
 928             r = -4;
 929             goto done;
 930         }
 931
 932         if (memcmp(tag, "TAG", 3) == 0) {
 933 #if 0
 934             fprintf(stderr, "id3v1 tag found in file %s\n", finfo->path);
 935 #endif
 936             if (_parse_id3v1(fd, &info, ctxt->cs_conv) != 0) {
 937                 r = -5;
 938                 goto done;
 939             }
 940         }
 941     }
 942
 943     if (!info.title.str) {
 944         long ext_idx;
 945         ext_idx = ((long)match) - 1;
 946         info.title.len = finfo->path_len - finfo->base - _exts[ext_idx].len;
 947         info.title.str = malloc((info.title.len + 1) * sizeof(char));
 948         memcpy(info.title.str, finfo->path + finfo->base, info.title.len);
 949         info.title.str[info.title.len] = '\0';
 950         lms_charset_conv(ctxt->cs_conv, &info.title.str, &info.title.len);
 951     }
 952
 953     if (info.trackno == -1)
 954         info.trackno = 0;
 955
 956 #if 0
 957     fprintf(stderr, "file %s info\n", finfo->path);
 958     fprintf(stderr, "\ttitle='%s'\n", info.title.str);
 959     fprintf(stderr, "\tartist='%s'\n", info.artist.str);
 960     fprintf(stderr, "\talbum='%s'\n", info.album.str);
 961     fprintf(stderr, "\tgenre='%s'\n", info.genre.str);
 962     fprintf(stderr, "\ttrack number='%d'\n", info.trackno);
 963 #endif
 964
 965     audio_info.id = finfo->id;
 966     audio_info.title = info.title;
 967     audio_info.artist = info.artist;
 968     audio_info.album = info.album;
 969     audio_info.genre = info.genre;
 970     audio_info.trackno = info.trackno;
 971
 972     _parse_mpeg_header(fd, syncframe_offset, &audio_info);
 973
 974     r = lms_db_audio_add(plugin->audio_db, &audio_info);
 975
 976   done:
 977     posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
 978     close(fd);
 979
 980     if (info.title.str)
 981         free(info.title.str);
 982     if (info.artist.str)
 983         free(info.artist.str);
 984     if (info.album.str)
 985         free(info.album.str);
 986     if (info.genre.str)
 987         free(info.genre.str);
 988
 989     return r;
 990 }
 991
 992 static int
 993 _setup(struct plugin *plugin, struct lms_context *ctxt)
 994 {
 995     int i;
 996     const char *id3_encodings[ID3_NUM_ENCODINGS] = {
 997         "Latin1",
 998         NULL, /* UTF-16 */
 999         "UTF-16BE",
1000         NULL, /* UTF-8 */
1001         "UTF-16LE",
1002     };
1003
1004     plugin->audio_db = lms_db_audio_new(ctxt->db);
1005     if (!plugin->audio_db)
1006         return -1;
1007
1008     for (i = 0; i < ID3_NUM_ENCODINGS; ++i) {
1009         /* do not create charset conv for UTF-8 encoding */
1010         if (!id3_encodings[i]) {
1011             plugin->cs_convs[i] = NULL;
1012             continue;
1013         }
1014         plugin->cs_convs[i] = lms_charset_conv_new_full(0, 0);
1015         if (!plugin->cs_convs[i])
1016             return -1;
1017         lms_charset_conv_add(plugin->cs_convs[i], id3_encodings[i]);
1018     }
1019
1020     return 0;
1021 }
1022
1023 static int
1024 _start(struct plugin *plugin, struct lms_context *ctxt)
1025 {
1026     return lms_db_audio_start(plugin->audio_db);
1027 }
1028
1029 static int
1030 _finish(struct plugin *plugin, struct lms_context *ctxt)
1031 {
1032     int i;
1033
1034     if (plugin->audio_db)
1035         lms_db_audio_free(plugin->audio_db);
1036
1037     for (i = 0; i < ID3_NUM_ENCODINGS; ++i) {
1038         if (plugin->cs_convs[i])
1039             lms_charset_conv_free(plugin->cs_convs[i]);
1040     }
1041
1042     return 0;
1043 }
1044
1045 static int
1046 _close(struct plugin *plugin)
1047 {
1048     free(plugin);
1049     return 0;
1050 }
1051
1052 API struct lms_plugin *
1053 lms_plugin_open(void)
1054 {
1055     struct plugin *plugin;
1056
1057     plugin = (struct plugin *)malloc(sizeof(*plugin));
1058     plugin->plugin.name = _name;
1059     plugin->plugin.match = (lms_plugin_match_fn_t)_match;
1060     plugin->plugin.parse = (lms_plugin_parse_fn_t)_parse;
1061     plugin->plugin.close = (lms_plugin_close_fn_t)_close;
1062     plugin->plugin.setup = (lms_plugin_setup_fn_t)_setup;
1063     plugin->plugin.start = (lms_plugin_start_fn_t)_start;
1064     plugin->plugin.finish = (lms_plugin_finish_fn_t)_finish;
1065
1066     return (struct lms_plugin *)plugin;
1067 }
1068
1069 API struct lms_plugin_info *
1070 lms_plugin_info(void)
1071 {
1072     static struct lms_plugin_info info = {
1073         _name,
1074         _cats,
1075         "ID3 v1 and v2 for mp3 files",
1076         PACKAGE_VERSION,
1077         _authors,
1078         "http://lms.garage.maemo.org"
1079     };
1080
1081     return &info;
1082 }