2 * Copyright (C) 2008 by INdT
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * @author Andre Moreira Magalhaes <andre.magalhaes@openbossa.org>
24 * asf/wma file parser.
27 * http://www.microsoft.com/en-us/download/details.aspx?id=14995
30 #include <lightmediascanner_plugin.h>
31 #include <lightmediascanner_db.h>
32 #include <shared/util.h>
36 #include <sys/types.h>
45 ATTR_TYPE_UNICODE = 0,
55 struct lms_stream base;
57 unsigned int sampling_rate;
62 struct lms_string_size title;
63 struct lms_string_size artist;
64 struct lms_string_size album;
65 struct lms_string_size genre;
66 enum lms_stream_type type;
68 unsigned char trackno;
70 struct stream *streams;
74 struct lms_plugin plugin;
75 lms_db_audio_t *audio_db;
76 lms_db_video_t *video_db;
77 lms_charset_conv_t *cs_conv;
80 static const char _name[] = "asf";
81 static const struct lms_string_size _container = LMS_STATIC_STRING_SIZE("asf");
82 static const struct lms_string_size _exts[] = {
83 LMS_STATIC_STRING_SIZE(".wma"),
84 LMS_STATIC_STRING_SIZE(".wmv"),
85 LMS_STATIC_STRING_SIZE(".asf")
87 static const char *_cats[] = {
92 static const char *_authors[] = {
93 "Andre Moreira Magalhaes",
97 /* TODO: Add the gazillion of possible codecs -- possibly a task to gperf */
100 struct lms_string_size name;
101 } _audio_codecs[] = {
102 /* id == 0 is special, check callers if it's needed */
103 { 0x0160, LMS_STATIC_STRING_SIZE("wmav1") },
104 { 0x0161, LMS_STATIC_STRING_SIZE("wmav2") },
105 { 0x0162, LMS_STATIC_STRING_SIZE("wmavpro") },
106 { 0x0163, LMS_STATIC_STRING_SIZE("wmavlossless") },
107 { 0x1600, LMS_STATIC_STRING_SIZE("aac") },
108 { 0x706d, LMS_STATIC_STRING_SIZE("aac") },
109 { 0x4143, LMS_STATIC_STRING_SIZE("aac") },
110 { 0xA106, LMS_STATIC_STRING_SIZE("aac") },
111 { 0xF1AC, LMS_STATIC_STRING_SIZE("flac") },
112 { 0x0055, LMS_STATIC_STRING_SIZE("mp3") },
116 /* TODO: Add the gazillion of possible codecs -- possibly a task to gperf */
117 static const struct {
119 struct lms_string_size name;
120 } _video_codecs[] = {
121 /* id == 0 is special, check callers if it's needed */
122 { "WMV1", LMS_STATIC_STRING_SIZE("wmv1") },
123 { "WMV2", LMS_STATIC_STRING_SIZE("wmv2") },
124 { "WMV3", LMS_STATIC_STRING_SIZE("wmv3") },
131 * Microsoft defines these 16-byte (128-bit) GUIDs as:
132 * first 8 bytes are in little-endian order
133 * next 8 bytes are in big-endian order
135 * Eg.: AaBbCcDd-EeFf-GgHh-IiJj-KkLlMmNnOoPp:
137 * to convert to byte string do as follow:
139 * $Dd $Cc $Bb $Aa $Ff $Ee $Hh $Gg $Ii $Jj $Kk $Ll $Mm $Nn $Oo $Pp
141 * See http://www.microsoft.com/windows/windowsmedia/forpros/format/asfspec.aspx
143 static const char header_guid[16] = "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C";
144 static const char header_extension_guid[16] = "\xB5\x03\xBF\x5F\x2E\xA9\xCF\x11\x00\xC0\x0C\x20\x53\x65";
145 static const char file_properties_guid[16] = "\xA1\xDC\xAB\x8C\x47\xA9\xCF\x11\x8E\xE4\x00\xC0\x0C\x20\x53\x65";
146 static const char stream_properties_guid[16] = "\x91\x07\xDC\xB7\xB7\xA9\xCF\x11\x8E\xE6\x00\xC0\x0C\x20\x53\x65";
147 static const char stream_type_audio_guid[16] = "\x40\x9E\x69\xF8\x4D\x5B\xCF\x11\xA8\xFD\x00\x80\x5F\x5C\x44\x2B";
148 static const char stream_type_video_guid[16] = "\xC0\xEF\x19\xBC\x4D\x5B\xCF\x11\xA8\xFD\x00\x80\x5F\x5C\x44\x2B";
149 static const char content_description_guid[16] = "\x33\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C";
150 static const char extended_content_description_guid[16] = "\x40\xA4\xD0\xD2\x07\xE3\xD2\x11\x97\xF0\x00\xA0\xC9\x5E\xA8\x50";
151 static const char metadata_guid[16] = "\xEA\xCB\xF8\xC5\xAF[wH\204g\xAA\214D\xFAL\xCA";
152 static const char metadata_library_guid[16] = "\224\034#D\230\224\321I\241A\x1d\x13NEpT";
153 static const char content_encryption_object_guid[16] = "\xFB\xB3\x11\x22\x23\xBD\xD2\x11\xB4\xB7\x00\xA0\xC9\x55\xFC\x6E";
154 static const char extended_content_encryption_object_guid[16] = "\x14\xE6\x8A\x29\x22\x26\x17\x4C\xB9\x35\xDA\xE0\x7E\xE9\x28\x9C";
156 static const char attr_name_wm_album_artist[28] = "\x57\x00\x4d\x00\x2f\x00\x41\x00\x6c\x00\x62\x00\x75\x00\x6d\x00\x41\x00\x72\x00\x74\x00\x69\x00\x73\x00\x74\x00";
157 static const char attr_name_wm_album_title[26] = "\x57\x00\x4d\x00\x2f\x00\x41\x00\x6c\x00\x62\x00\x75\x00\x6d\x00\x54\x00\x69\x00\x74\x00\x6c\x00\x65\x00";
158 static const char attr_name_wm_genre[16] = "\x57\x00\x4d\x00\x2f\x00\x47\x00\x65\x00\x6e\x00\x72\x00\x65\x00";
159 static const char attr_name_wm_track_number[28] = "\x57\x00\x4d\x00\x2f\x00\x54\x00\x72\x00\x61\x00\x63\x00\x6b\x00\x4e\x00\x75\x00\x6d\x00\x62\x00\x65\x00\x72\x00";
162 _to_number(const char *data, unsigned int type_size, unsigned int data_size)
165 unsigned int last, i;
167 last = data_size > type_size ? type_size : data_size;
169 for (i = 0; i < last; i++)
170 sum |= (unsigned char) (data[i]) << (i * 8);
179 if (read(fd, &v, 2) != 2)
181 return (short) _to_number(v, sizeof(unsigned short), 2);
188 if (read(fd, &v, 4) != 4)
190 return (unsigned int) _to_number(v, sizeof(unsigned int), 4);
197 if (read(fd, &v, 8) != 8)
199 return _to_number(v, sizeof(unsigned long long), 8);
203 _read_string(int fd, size_t count, char **str, unsigned int *len)
206 ssize_t data_size, size;
208 data = malloc(sizeof(char) * count);
209 data_size = read(fd, data, count);
210 if (data_size == -1) {
217 if (data[size - 1] != '\0' || data[size - 2] != '\0')
229 _parse_file_properties(int fd, struct asf_info *info)
234 uint64_t creation_date;
235 uint64_t data_packets_count;
236 uint64_t play_duration;
237 uint64_t send_duration;
240 uint32_t min_data_packet_size;
241 uint32_t max_data_packet_size;
242 uint32_t max_bitrate;
243 } __attribute__((packed)) props;
246 r = read(fd, &props, sizeof(props));
247 if (r != sizeof(props))
251 if (le32toh(props.flags) & 0x1)
254 /* ASF spec 01.20.06 sec. 3.2: we need to subtract the preroll value from
255 * the duration in order to obtain the real duration */
256 info->length = (unsigned int)(
257 (le64toh(props.play_duration) / NSEC100_PER_SEC) -
258 le64toh(props.preroll) / MSEC_PER_SEC);
263 static struct lms_string_size
264 _audio_codec_id_to_str(uint16_t id)
268 for (i = 0; _audio_codecs[i].name.str != NULL; i++)
269 if (_audio_codecs[i].id == id)
270 return _audio_codecs[i].name;
272 return _audio_codecs[i].name;
275 static struct lms_string_size
276 _video_codec_id_to_str(uint8_t id[4])
280 for (i = 0; _video_codecs[i].name.str != NULL; i++)
281 if (memcmp(id, _video_codecs[i].id, 4) == 0)
282 return _video_codecs[i].name;
284 return _video_codecs[i].name;
288 _parse_stream_properties(int fd, struct stream **pstream)
291 char stream_type[16];
292 char error_correction_type[16];
293 uint64_t time_offset;
294 uint32_t type_specific_len;
295 uint32_t error_correction_data_len;
297 uint32_t reserved; /* don't use, unaligned */
298 } __attribute__((packed)) props;
305 s = calloc(1, sizeof(struct stream));
309 r = read(fd, &props, sizeof(props));
310 if (r != sizeof(props))
313 if (memcmp(props.stream_type, stream_type_audio_guid, 16) == 0)
314 s->base.type = LMS_STREAM_TYPE_AUDIO;
315 else if (memcmp(props.stream_type, stream_type_video_guid, 16) == 0)
316 s->base.type = LMS_STREAM_TYPE_VIDEO;
322 s->base.stream_id = le16toh(props.flags) & 0x7F;
323 /* Not a valid stream */
324 if (!s->base.stream_id)
327 if (s->base.type == LMS_STREAM_TYPE_AUDIO) {
328 if (le32toh(props.type_specific_len) < 18)
331 s->base.codec = _audio_codec_id_to_str(_read_word(fd));
332 s->base.audio.channels = _read_word(fd);
333 s->priv.sampling_rate = _read_dword(fd);
334 s->base.audio.bitrate = _read_dword(fd) * 8;
338 uint32_t width_unused;
339 uint32_t height_unused;
341 uint16_t data_size_unused;
347 uint16_t bits_per_pixel;
348 uint8_t compression_id[4];
351 /* other fields are ignored */
352 } __attribute__((packed)) video;
353 int r2 = read(fd, &video, sizeof(video));
354 unsigned int num, den;
360 if ((unsigned int) r2 < get_le32(&video.size) -
361 (sizeof(video) - offsetof(typeof(video), width)))
364 s->base.codec = _video_codec_id_to_str(video.compression_id);
365 s->base.video.width = get_le32(&video.width);
366 s->base.video.height = get_le32(&video.height);
368 reduce_gcd(s->base.video.width, s->base.video.height, &num, &den);
369 asprintf(&s->base.video.aspect_ratio.str, "%u:%u", num, den);
370 s->base.video.aspect_ratio.len = s->base.video.aspect_ratio.str ?
371 strlen(s->base.video.aspect_ratio.str) : 0;
384 _parse_content_description(lms_charset_conv_t *cs_conv, int fd, struct asf_info *info)
386 int title_length = _read_word(fd);
387 int artist_length = _read_word(fd);
388 int copyright_length = _read_word(fd);
389 int comment_length = _read_word(fd);
390 int rating_length = _read_word(fd);
392 _read_string(fd, title_length, &info->title.str, &info->title.len);
393 lms_charset_conv_force(cs_conv, &info->title.str, &info->title.len);
394 _read_string(fd, artist_length, &info->artist.str, &info->artist.len);
395 lms_charset_conv_force(cs_conv, &info->artist.str, &info->artist.len);
396 /* ignore copyright, comment and rating */
397 lseek(fd, copyright_length + comment_length + rating_length, SEEK_CUR);
401 _parse_attribute_name(int fd,
403 unsigned int *attr_name_len,
407 int attr_name_length;
409 attr_name_length = _read_word(fd);
410 _read_string(fd, attr_name_length, attr_name, attr_name_len);
411 *attr_type = _read_word(fd);
412 *attr_size = _read_word(fd);
416 _parse_attribute_string_data(lms_charset_conv_t *cs_conv,
420 unsigned int *attr_data_len)
422 _read_string(fd, attr_size, attr_data, attr_data_len);
423 lms_charset_conv_force(cs_conv, attr_data, attr_data_len);
427 _skip_attribute_data(int fd, int kind, int attr_type, int attr_size)
431 lseek(fd, 2, SEEK_CUR);
436 lseek(fd, 4, SEEK_CUR);
438 lseek(fd, 2, SEEK_CUR);
441 case ATTR_TYPE_DWORD:
442 lseek(fd, 4, SEEK_CUR);
445 case ATTR_TYPE_QWORD:
446 lseek(fd, 8, SEEK_CUR);
449 case ATTR_TYPE_UNICODE:
450 case ATTR_TYPE_BYTES:
452 lseek(fd, attr_size, SEEK_CUR);
461 _parse_extended_content_description_object(lms_charset_conv_t *cs_conv, int fd, struct asf_info *info)
463 int count = _read_word(fd);
465 unsigned int attr_name_len;
466 int attr_type, attr_size;
469 _parse_attribute_name(fd,
470 &attr_name, &attr_name_len,
471 &attr_type, &attr_size);
472 if (attr_type == ATTR_TYPE_UNICODE) {
473 if (memcmp(attr_name, attr_name_wm_album_title, attr_name_len) == 0)
474 _parse_attribute_string_data(cs_conv,
478 else if (memcmp(attr_name, attr_name_wm_genre, attr_name_len) == 0)
479 _parse_attribute_string_data(cs_conv,
483 else if (memcmp(attr_name, attr_name_wm_album_artist, attr_name_len) == 0)
484 _parse_attribute_string_data(cs_conv,
488 else if (memcmp(attr_name, attr_name_wm_track_number, attr_name_len) == 0) {
490 unsigned int trackno_len;
491 _parse_attribute_string_data(cs_conv,
496 info->trackno = atoi(trackno);
501 _skip_attribute_data(fd, 0, attr_type, attr_size);
504 _skip_attribute_data(fd, 0, attr_type, attr_size);
510 _match(struct plugin *p, const char *path, int len, int base)
514 i = lms_which_extension(path, len, _exts, LMS_ARRAY_SIZE(_exts));
518 return (void*)(i + 1);
521 static void streams_free(struct stream *streams)
524 struct stream *s = streams;
525 streams = (struct stream *) s->base.next;
527 switch (s->base.type) {
528 case LMS_STREAM_TYPE_VIDEO:
529 free(s->base.video.aspect_ratio.str);
540 * Parse the "Extended Stream Properties Object" (sec. 4.1). It contains some
541 * missing fields: bitrate and language
543 * It may also contain the "Stream Properties Object" embedded in it.
544 * For language we also need to parse "Language List Object" (sec 4.6) which
545 * contains an array with all the languages used (they are in UTF-16, so they
546 * need to be properly converted).
548 * Oh, well... there's also the optional "Stream Bitrate Properties Object"
549 * which also may contain the bitrate. This property must be very important so
550 * they duplicated it everywhere.
552 * Apparently the length can also be obtained from the "Extended Stream
553 * Properties Object": start_time, end_time (paying attention to the preroll
554 * field in the header).
556 * Knowing the length, frame rate can be calculated with the "AverageTime Per
557 * Frame" field of the "Extended Stream Properties Object"
560 _parse(struct plugin *plugin, struct lms_context *ctxt, const struct lms_file_info *finfo, void *match)
562 struct asf_info info = { .type = LMS_STREAM_TYPE_UNKNOWN };
563 int r, fd, num_objects, i;
567 fd = open(finfo->path, O_RDONLY);
573 if (read(fd, &guid, 16) != 16) {
578 if (memcmp(guid, header_guid, 16) != 0) {
579 fprintf(stderr, "ERROR: invalid header (%s).\n", finfo->path);
584 size = _read_qword(fd);
585 num_objects = _read_dword(fd);
587 lseek(fd, 2, SEEK_CUR);
589 for (i = 0; i < num_objects; ++i) {
591 size = _read_qword(fd);
593 if (memcmp(guid, file_properties_guid, 16) == 0) {
594 r = _parse_file_properties(fd, &info);
597 lseek(fd, size - (24 + r), SEEK_CUR);
598 } else if (memcmp(guid, stream_properties_guid, 16) == 0) {
599 struct stream *s = NULL;
600 r = _parse_stream_properties(fd, &s);
604 lseek(fd, size - (24 + r), SEEK_CUR);
607 if (info.type != LMS_STREAM_TYPE_VIDEO)
608 info.type = s->base.type;
610 s->base.next = (struct lms_stream *) info.streams;
613 } else if (memcmp(guid, content_description_guid, 16) == 0)
614 _parse_content_description(plugin->cs_conv, fd, &info);
615 else if (memcmp(guid, extended_content_description_guid, 16) == 0)
616 _parse_extended_content_description_object(plugin->cs_conv, fd,
618 else if (memcmp(guid, content_encryption_object_guid, 16) == 0 ||
619 memcmp(guid, extended_content_encryption_object_guid, 16) == 0) {
620 /* ignore DRM'd files */
621 fprintf(stderr, "ERROR: ignoring DRM'd file %s\n", finfo->path);
625 lseek(fd, size - 24, SEEK_CUR);
628 /* try to define stream type by extension */
629 if (info.type == LMS_STREAM_TYPE_UNKNOWN) {
630 long ext_idx = ((long)match) - 1;
631 if (strcmp(_exts[ext_idx].str, ".wma") == 0)
632 info.type = LMS_STREAM_TYPE_AUDIO;
633 /* consider wmv and asf as video */
635 info.type = LMS_STREAM_TYPE_VIDEO;
638 lms_string_size_strip_and_free(&info.title);
639 lms_string_size_strip_and_free(&info.artist);
640 lms_string_size_strip_and_free(&info.album);
641 lms_string_size_strip_and_free(&info.genre);
644 info.title = str_extract_name_from_path(finfo->path, finfo->path_len,
646 &_exts[((long) match) - 1],
649 if (info.type == LMS_STREAM_TYPE_AUDIO) {
650 struct lms_audio_info audio_info = { };
652 audio_info.id = finfo->id;
653 audio_info.title = info.title;
654 audio_info.artist = info.artist;
655 audio_info.album = info.album;
656 audio_info.genre = info.genre;
657 audio_info.trackno = info.trackno;
658 audio_info.length = info.length;
659 audio_info.container = _container;
661 /* ignore additional streams, use only the first one */
663 struct stream *s = info.streams;
664 audio_info.channels = s->base.audio.channels;
665 audio_info.bitrate = s->base.audio.bitrate;
666 audio_info.sampling_rate = s->priv.sampling_rate;
667 audio_info.codec = s->base.codec;
669 r = lms_db_audio_add(plugin->audio_db, &audio_info);
671 struct lms_video_info video_info = { };
673 video_info.id = finfo->id;
674 video_info.title = info.title;
675 video_info.artist = info.artist;
676 video_info.length = info.length;
677 video_info.streams = (struct lms_stream *) info.streams;
678 r = lms_db_video_add(plugin->video_db, &video_info);
682 streams_free(info.streams);
684 free(info.title.str);
685 free(info.artist.str);
686 free(info.album.str);
687 free(info.genre.str);
689 posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
696 _setup(struct plugin *plugin, struct lms_context *ctxt)
698 plugin->audio_db = lms_db_audio_new(ctxt->db);
699 if (!plugin->audio_db)
701 plugin->video_db = lms_db_video_new(ctxt->db);
702 if (!plugin->video_db)
704 plugin->cs_conv = lms_charset_conv_new();
705 if (!plugin->cs_conv)
707 lms_charset_conv_add(plugin->cs_conv, "UTF-16LE");
713 _start(struct plugin *plugin, struct lms_context *ctxt)
716 r = lms_db_audio_start(plugin->audio_db);
717 r |= lms_db_video_start(plugin->video_db);
722 _finish(struct plugin *plugin, struct lms_context *ctxt)
724 if (plugin->audio_db)
725 lms_db_audio_free(plugin->audio_db);
726 if (plugin->video_db)
727 lms_db_video_free(plugin->video_db);
729 lms_charset_conv_free(plugin->cs_conv);
735 _close(struct plugin *plugin)
741 API struct lms_plugin *
742 lms_plugin_open(void)
744 struct plugin *plugin;
746 plugin = (struct plugin *)malloc(sizeof(*plugin));
747 plugin->plugin.name = _name;
748 plugin->plugin.match = (lms_plugin_match_fn_t)_match;
749 plugin->plugin.parse = (lms_plugin_parse_fn_t)_parse;
750 plugin->plugin.close = (lms_plugin_close_fn_t)_close;
751 plugin->plugin.setup = (lms_plugin_setup_fn_t)_setup;
752 plugin->plugin.start = (lms_plugin_start_fn_t)_start;
753 plugin->plugin.finish = (lms_plugin_finish_fn_t)_finish;
755 return (struct lms_plugin *)plugin;
758 API const struct lms_plugin_info *
759 lms_plugin_info(void)
761 static struct lms_plugin_info info = {
764 "Microsoft WMA, WMV and ASF",
767 "http://lms.garage.maemo.org"