2 * Copyright (C) 2008 by INdT
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * @author Andre Moreira Magalhaes <andre.magalhaes@openbossa.org>
24 * asf/wma file parser.
27 * http://www.microsoft.com/en-us/download/details.aspx?id=14995
30 #include <lightmediascanner_plugin.h>
31 #include <lightmediascanner_db.h>
32 #include <shared/util.h>
36 #include <sys/types.h>
45 ATTR_TYPE_UNICODE = 0,
55 struct lms_stream base;
57 unsigned int sampling_rate;
64 struct lms_string_size title;
65 struct lms_string_size artist;
66 struct lms_string_size album;
67 struct lms_string_size genre;
68 enum lms_stream_type type;
70 unsigned char trackno;
72 struct stream *streams;
76 struct lms_plugin plugin;
77 lms_db_audio_t *audio_db;
78 lms_db_video_t *video_db;
79 lms_charset_conv_t *cs_conv;
82 static const char _name[] = "asf";
83 static const struct lms_string_size _container = LMS_STATIC_STRING_SIZE("asf");
84 static const struct lms_string_size _exts[] = {
85 LMS_STATIC_STRING_SIZE(".wma"),
86 LMS_STATIC_STRING_SIZE(".wmv"),
87 LMS_STATIC_STRING_SIZE(".asf")
89 static const char *_cats[] = {
94 static const char *_authors[] = {
95 "Andre Moreira Magalhaes",
99 /* TODO: Add the gazillion of possible codecs -- possibly a task to gperf */
100 static const struct {
102 struct lms_string_size name;
103 } _audio_codecs[] = {
104 /* id == 0 is special, check callers if it's needed */
105 { 0x0160, LMS_STATIC_STRING_SIZE("wmav1") },
106 { 0x0161, LMS_STATIC_STRING_SIZE("wmav2") },
107 { 0x0162, LMS_STATIC_STRING_SIZE("wmavpro") },
108 { 0x0163, LMS_STATIC_STRING_SIZE("wmavlossless") },
109 { 0x1600, LMS_STATIC_STRING_SIZE("aac") },
110 { 0x706d, LMS_STATIC_STRING_SIZE("aac") },
111 { 0x4143, LMS_STATIC_STRING_SIZE("aac") },
112 { 0xA106, LMS_STATIC_STRING_SIZE("aac") },
113 { 0xF1AC, LMS_STATIC_STRING_SIZE("flac") },
114 { 0x0055, LMS_STATIC_STRING_SIZE("mp3") },
118 /* TODO: Add the gazillion of possible codecs -- possibly a task to gperf */
119 static const struct {
121 struct lms_string_size name;
122 } _video_codecs[] = {
123 /* id == 0 is special, check callers if it's needed */
124 { "WMV1", LMS_STATIC_STRING_SIZE("wmv1") },
125 { "WMV2", LMS_STATIC_STRING_SIZE("wmv2") },
126 { "WMV3", LMS_STATIC_STRING_SIZE("wmv3") },
133 * Microsoft defines these 16-byte (128-bit) GUIDs as:
134 * first 8 bytes are in little-endian order
135 * next 8 bytes are in big-endian order
137 * Eg.: AaBbCcDd-EeFf-GgHh-IiJj-KkLlMmNnOoPp:
139 * to convert to byte string do as follow:
141 * $Dd $Cc $Bb $Aa $Ff $Ee $Hh $Gg $Ii $Jj $Kk $Ll $Mm $Nn $Oo $Pp
143 * See http://www.microsoft.com/windows/windowsmedia/forpros/format/asfspec.aspx
145 static const char header_guid[16] = "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C";
146 static const char header_extension_guid[16] = "\xB5\x03\xBF\x5F\x2E\xA9\xCF\x11\x8E\xE3\x00\xC0\x0C\x20\x53\x65";
147 static const char extended_stream_properties_guid[16] = "\xCB\xA5\xE6\x14\x72\xC6\x32\x43\x83\x99\xA9\x69\x52\x06\x5B\x5A";
148 static const char language_list_guid[16] = "\xA9\x46\x43\x7C\xE0\xEF\xFC\x4B\xB2\x29\x39\x3E\xDE\x41\x5C\x85";
149 static const char file_properties_guid[16] = "\xA1\xDC\xAB\x8C\x47\xA9\xCF\x11\x8E\xE4\x00\xC0\x0C\x20\x53\x65";
150 static const char stream_properties_guid[16] = "\x91\x07\xDC\xB7\xB7\xA9\xCF\x11\x8E\xE6\x00\xC0\x0C\x20\x53\x65";
151 static const char stream_type_audio_guid[16] = "\x40\x9E\x69\xF8\x4D\x5B\xCF\x11\xA8\xFD\x00\x80\x5F\x5C\x44\x2B";
152 static const char stream_type_video_guid[16] = "\xC0\xEF\x19\xBC\x4D\x5B\xCF\x11\xA8\xFD\x00\x80\x5F\x5C\x44\x2B";
153 static const char content_description_guid[16] = "\x33\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C";
154 static const char extended_content_description_guid[16] = "\x40\xA4\xD0\xD2\x07\xE3\xD2\x11\x97\xF0\x00\xA0\xC9\x5E\xA8\x50";
155 static const char metadata_guid[16] = "\xEA\xCB\xF8\xC5\xAF[wH\204g\xAA\214D\xFAL\xCA";
156 static const char metadata_library_guid[16] = "\224\034#D\230\224\321I\241A\x1d\x13NEpT";
157 static const char content_encryption_object_guid[16] = "\xFB\xB3\x11\x22\x23\xBD\xD2\x11\xB4\xB7\x00\xA0\xC9\x55\xFC\x6E";
158 static const char extended_content_encryption_object_guid[16] = "\x14\xE6\x8A\x29\x22\x26\x17\x4C\xB9\x35\xDA\xE0\x7E\xE9\x28\x9C";
160 static const char attr_name_wm_album_artist[28] = "\x57\x00\x4d\x00\x2f\x00\x41\x00\x6c\x00\x62\x00\x75\x00\x6d\x00\x41\x00\x72\x00\x74\x00\x69\x00\x73\x00\x74\x00";
161 static const char attr_name_wm_album_title[26] = "\x57\x00\x4d\x00\x2f\x00\x41\x00\x6c\x00\x62\x00\x75\x00\x6d\x00\x54\x00\x69\x00\x74\x00\x6c\x00\x65\x00";
162 static const char attr_name_wm_genre[16] = "\x57\x00\x4d\x00\x2f\x00\x47\x00\x65\x00\x6e\x00\x72\x00\x65\x00";
163 static const char attr_name_wm_track_number[28] = "\x57\x00\x4d\x00\x2f\x00\x54\x00\x72\x00\x61\x00\x63\x00\x6b\x00\x4e\x00\x75\x00\x6d\x00\x62\x00\x65\x00\x72\x00";
166 _to_number(const char *data, unsigned int type_size, unsigned int data_size)
169 unsigned int last, i;
171 last = data_size > type_size ? type_size : data_size;
173 for (i = 0; i < last; i++)
174 sum |= (unsigned char) (data[i]) << (i * 8);
183 if (read(fd, &v, 2) != 2)
185 return (short) _to_number(v, sizeof(unsigned short), 2);
192 if (read(fd, &v, 4) != 4)
194 return (unsigned int) _to_number(v, sizeof(unsigned int), 4);
201 if (read(fd, &v, 8) != 8)
203 return _to_number(v, sizeof(unsigned long long), 8);
207 _read_string(int fd, size_t count, char **str, unsigned int *len)
210 ssize_t data_size, size;
212 data = malloc(sizeof(char) * count);
213 data_size = read(fd, data, count);
214 if (data_size == -1) {
221 if (data[size - 1] != '\0' || data[size - 2] != '\0')
233 _parse_file_properties(int fd, struct asf_info *info)
238 uint64_t creation_date;
239 uint64_t data_packets_count;
240 uint64_t play_duration;
241 uint64_t send_duration;
244 uint32_t min_data_packet_size;
245 uint32_t max_data_packet_size;
246 uint32_t max_bitrate;
247 } __attribute__((packed)) props;
250 r = read(fd, &props, sizeof(props));
251 if (r != sizeof(props))
255 if (le32toh(props.flags) & 0x1)
258 /* ASF spec 01.20.06 sec. 3.2: we need to subtract the preroll value from
259 * the duration in order to obtain the real duration */
260 info->length = (unsigned int)(
261 (le64toh(props.play_duration) / NSEC100_PER_SEC) -
262 le64toh(props.preroll) / MSEC_PER_SEC);
267 static struct lms_string_size
268 _audio_codec_id_to_str(uint16_t id)
272 for (i = 0; _audio_codecs[i].name.str != NULL; i++)
273 if (_audio_codecs[i].id == id)
274 return _audio_codecs[i].name;
276 return _audio_codecs[i].name;
279 static struct lms_string_size
280 _video_codec_id_to_str(uint8_t id[4])
284 for (i = 0; _video_codecs[i].name.str != NULL; i++)
285 if (memcmp(id, _video_codecs[i].id, 4) == 0)
286 return _video_codecs[i].name;
288 return _video_codecs[i].name;
291 static struct stream * _stream_get_or_create(struct asf_info *info,
292 unsigned int stream_id)
296 for (s = info->streams; s; s = (struct stream *) s->base.next) {
297 if (s->base.stream_id == stream_id)
301 s = calloc(1, sizeof(*s));
305 /* The Stream Properties Object can be anywhere inside the Header Object:
306 * before the Header Extension Object, after it or embedded into the
307 * Extended Stream Properties, inside the Header Extension Object.
309 * When parsing we either create a new stream and prepend it to the list or
310 * we return the one already created by a previous object (see the loop
313 * Note that the stream type is only available in the Stream Properties
314 * Object. A file with an Extended Stream Properties Object referring to a
315 * stream that doesn't have a corresponding Stream Properties is invalid. We
316 * let it into the list, but it won't have the stream_type set. In this case
317 * LMS will end up ignoring the stream when we try to add the file in the
318 * database -- this is why we set type to -1 here */
319 s->base.stream_id = stream_id;
321 s->base.next = (struct lms_stream *) info->streams;
327 static void _stream_copy_extension_properties(struct stream *s)
329 switch (s->base.type) {
330 case LMS_STREAM_TYPE_AUDIO:
331 s->base.audio.bitrate = s->priv.bitrate;
333 case LMS_STREAM_TYPE_VIDEO:
334 s->base.video.bitrate = s->priv.bitrate;
335 s->base.video.framerate = s->priv.framerate;
343 _parse_stream_properties(int fd, struct asf_info *info)
346 char stream_type[16];
347 char error_correction_type[16];
348 uint64_t time_offset;
349 uint32_t type_specific_len;
350 uint32_t error_correction_data_len;
352 uint32_t reserved; /* don't use, unaligned */
353 } __attribute__((packed)) props;
354 unsigned int stream_id;
358 r = read(fd, &props, sizeof(props));
359 if (r != sizeof(props))
362 stream_id = le16toh(props.flags) & 0x7F;
364 /* Not a valid stream */
368 if (memcmp(props.stream_type, stream_type_audio_guid, 16) == 0)
369 type = LMS_STREAM_TYPE_AUDIO;
370 else if (memcmp(props.stream_type, stream_type_video_guid, 16) == 0)
371 type = LMS_STREAM_TYPE_VIDEO;
376 s = _stream_get_or_create(info, stream_id);
382 if (s->base.type == LMS_STREAM_TYPE_AUDIO) {
383 if (le32toh(props.type_specific_len) < 18)
386 s->base.codec = _audio_codec_id_to_str(_read_word(fd));
387 s->base.audio.channels = _read_word(fd);
388 s->priv.sampling_rate = _read_dword(fd);
389 s->base.audio.bitrate = _read_dword(fd) * 8;
392 uint32_t width_unused;
393 uint32_t height_unused;
395 uint16_t data_size_unused;
401 uint16_t bits_per_pixel;
402 uint8_t compression_id[4];
405 /* other fields are ignored */
406 } __attribute__((packed)) video;
407 unsigned int num, den;
409 r = read(fd, &video, sizeof(video));
410 if (r != sizeof(video))
413 if ((unsigned int) r < get_le32(&video.size) -
414 (sizeof(video) - offsetof(typeof(video), width)))
417 s->base.codec = _video_codec_id_to_str(video.compression_id);
418 s->base.video.width = get_le32(&video.width);
419 s->base.video.height = get_le32(&video.height);
421 reduce_gcd(s->base.video.width, s->base.video.height, &num, &den);
422 asprintf(&s->base.video.aspect_ratio.str, "%u:%u", num, den);
423 s->base.video.aspect_ratio.len = s->base.video.aspect_ratio.str ?
424 strlen(s->base.video.aspect_ratio.str) : 0;
427 _stream_copy_extension_properties(s);
430 /* If there's any video stream, consider the file as video */
431 if (info->type != LMS_STREAM_TYPE_VIDEO)
432 info->type = s->base.type;
437 static int _parse_extended_stream_properties(lms_charset_conv_t *cs_conv,
438 int fd, struct asf_info *info)
447 uint64_t avg_time_per_frame;
448 uint16_t stream_name_count;
449 uint16_t payload_extension_system_count;
450 } __attribute__((packed)) props;
452 unsigned int stream_id;
456 r = read(fd, &props, sizeof(props));
457 if (r != sizeof(props))
460 stream_id = get_le16(&props.stream_id);
461 s = _stream_get_or_create(info, stream_id);
463 s->priv.bitrate = get_le32(&props.bitrate);
464 s->priv.framerate = (NSEC100_PER_SEC /
465 (double) get_le64(&props.avg_time_per_frame));
466 for (n = get_le16(&props.stream_name_count); n; n--) {
468 lseek(fd, 2, SEEK_CUR);
470 lseek(fd, j, SEEK_CUR);
472 for (n = get_le16(&props.payload_extension_system_count); n; n--) {
474 lseek(fd, 18, SEEK_CUR);
476 lseek(fd, j, SEEK_CUR);
482 /* Lazy implementation, let the parsing of subframes to the caller. Techically
483 * this is wrong, since it might parse objects in the extension header that
484 * should be in the header object, however this should parse ok all good files
485 * and eventually the bad ones. */
486 static int _parse_header_extension(lms_charset_conv_t *cs_conv, int fd,
487 struct asf_info *info)
489 lseek(fd, 22, SEEK_CUR);
494 _parse_content_description(lms_charset_conv_t *cs_conv, int fd,
495 struct asf_info *info)
497 int title_length = _read_word(fd);
498 int artist_length = _read_word(fd);
500 lseek(fd, 6, SEEK_CUR);
502 _read_string(fd, title_length, &info->title.str, &info->title.len);
503 lms_charset_conv_force(cs_conv, &info->title.str, &info->title.len);
504 _read_string(fd, artist_length, &info->artist.str, &info->artist.len);
505 lms_charset_conv_force(cs_conv, &info->artist.str, &info->artist.len);
507 /* ignore copyright, comment and rating */
512 _parse_attribute_name(int fd,
514 unsigned int *attr_name_len,
518 int attr_name_length;
520 attr_name_length = _read_word(fd);
521 _read_string(fd, attr_name_length, attr_name, attr_name_len);
522 *attr_type = _read_word(fd);
523 *attr_size = _read_word(fd);
527 _parse_attribute_string_data(lms_charset_conv_t *cs_conv,
531 unsigned int *attr_data_len)
533 _read_string(fd, attr_size, attr_data, attr_data_len);
534 lms_charset_conv_force(cs_conv, attr_data, attr_data_len);
538 _skip_attribute_data(int fd, int kind, int attr_type, int attr_size)
542 lseek(fd, 2, SEEK_CUR);
547 lseek(fd, 4, SEEK_CUR);
549 lseek(fd, 2, SEEK_CUR);
552 case ATTR_TYPE_DWORD:
553 lseek(fd, 4, SEEK_CUR);
556 case ATTR_TYPE_QWORD:
557 lseek(fd, 8, SEEK_CUR);
560 case ATTR_TYPE_UNICODE:
561 case ATTR_TYPE_BYTES:
563 lseek(fd, attr_size, SEEK_CUR);
572 _parse_extended_content_description_object(lms_charset_conv_t *cs_conv, int fd,
573 struct asf_info *info)
575 int count = _read_word(fd);
577 unsigned int attr_name_len;
578 int attr_type, attr_size;
582 _parse_attribute_name(fd,
583 &attr_name, &attr_name_len,
584 &attr_type, &attr_size);
585 if (attr_type == ATTR_TYPE_UNICODE) {
586 if (memcmp(attr_name, attr_name_wm_album_title, attr_name_len) == 0)
587 _parse_attribute_string_data(cs_conv,
591 else if (memcmp(attr_name, attr_name_wm_genre, attr_name_len) == 0)
592 _parse_attribute_string_data(cs_conv,
596 else if (memcmp(attr_name, attr_name_wm_album_artist, attr_name_len) == 0)
597 _parse_attribute_string_data(cs_conv,
601 else if (memcmp(attr_name, attr_name_wm_track_number, attr_name_len) == 0) {
603 unsigned int trackno_len;
604 _parse_attribute_string_data(cs_conv,
609 info->trackno = atoi(trackno);
614 _skip_attribute_data(fd, 0, attr_type, attr_size);
617 _skip_attribute_data(fd, 0, attr_type, attr_size);
625 _match(struct plugin *p, const char *path, int len, int base)
629 i = lms_which_extension(path, len, _exts, LMS_ARRAY_SIZE(_exts));
633 return (void*)(i + 1);
636 static void streams_free(struct stream *streams)
639 struct stream *s = streams;
640 streams = (struct stream *) s->base.next;
642 switch (s->base.type) {
643 case LMS_STREAM_TYPE_VIDEO:
644 free(s->base.video.aspect_ratio.str);
654 /* TODO: Parse "Language List Object" (sec 4.6) which contains an array with all
655 * the languages used (they are in UTF-16, so they need to be properly
658 _parse(struct plugin *plugin, struct lms_context *ctxt, const struct lms_file_info *finfo, void *match)
660 struct asf_info info = { .type = LMS_STREAM_TYPE_UNKNOWN };
664 unsigned long long hdrsize;
665 off_t pos_end, pos = 0;
667 fd = open(finfo->path, O_RDONLY);
673 if (read(fd, &guid, 16) != 16) {
679 if (memcmp(guid, header_guid, 16) != 0) {
680 fprintf(stderr, "ERROR: invalid header (%s).\n", finfo->path);
685 hdrsize = _read_qword(fd);
686 pos_end = lseek(fd, 6, SEEK_CUR) - 24 + hdrsize;
690 pos = lseek(fd, 0, SEEK_CUR);
691 if (pos > pos_end - 24)
695 size = _read_qword(fd);
697 if (memcmp(guid, header_extension_guid, 16) == 0)
698 r = _parse_header_extension(plugin->cs_conv, fd, &info);
699 else if (memcmp(guid, extended_stream_properties_guid, 16) == 0)
700 r = _parse_extended_stream_properties(plugin->cs_conv, fd, &info);
701 else if (memcmp(guid, file_properties_guid, 16) == 0)
702 r = _parse_file_properties(fd, &info);
703 else if (memcmp(guid, stream_properties_guid, 16) == 0)
704 r = _parse_stream_properties(fd, &info);
705 else if (memcmp(guid, language_list_guid, 16) == 0)
707 else if (memcmp(guid, content_description_guid, 16) == 0)
708 r = _parse_content_description(plugin->cs_conv, fd, &info);
709 else if (memcmp(guid, extended_content_description_guid, 16) == 0)
710 r = _parse_extended_content_description_object(plugin->cs_conv, fd,
712 else if (memcmp(guid, content_encryption_object_guid, 16) == 0 ||
713 memcmp(guid, extended_content_encryption_object_guid, 16) == 0)
714 /* ignore DRM'd files */
723 pos = lseek(fd, pos + size, SEEK_SET);
728 /* try to define stream type by extension */
729 if (info.type == LMS_STREAM_TYPE_UNKNOWN) {
730 long ext_idx = ((long)match) - 1;
731 if (strcmp(_exts[ext_idx].str, ".wma") == 0)
732 info.type = LMS_STREAM_TYPE_AUDIO;
733 /* consider wmv and asf as video */
735 info.type = LMS_STREAM_TYPE_VIDEO;
738 lms_string_size_strip_and_free(&info.title);
739 lms_string_size_strip_and_free(&info.artist);
740 lms_string_size_strip_and_free(&info.album);
741 lms_string_size_strip_and_free(&info.genre);
744 info.title = str_extract_name_from_path(finfo->path, finfo->path_len,
746 &_exts[((long) match) - 1],
749 if (info.type == LMS_STREAM_TYPE_AUDIO) {
750 struct lms_audio_info audio_info = { };
752 audio_info.id = finfo->id;
753 audio_info.title = info.title;
754 audio_info.artist = info.artist;
755 audio_info.album = info.album;
756 audio_info.genre = info.genre;
757 audio_info.trackno = info.trackno;
758 audio_info.length = info.length;
759 audio_info.container = _container;
761 /* ignore additional streams, use only the first one */
763 struct stream *s = info.streams;
764 audio_info.channels = s->base.audio.channels;
765 audio_info.bitrate = s->base.audio.bitrate;
766 audio_info.sampling_rate = s->priv.sampling_rate;
767 audio_info.codec = s->base.codec;
769 r = lms_db_audio_add(plugin->audio_db, &audio_info);
771 struct lms_video_info video_info = { };
773 video_info.id = finfo->id;
774 video_info.title = info.title;
775 video_info.artist = info.artist;
776 video_info.length = info.length;
777 video_info.streams = (struct lms_stream *) info.streams;
778 r = lms_db_video_add(plugin->video_db, &video_info);
782 streams_free(info.streams);
784 free(info.title.str);
785 free(info.artist.str);
786 free(info.album.str);
787 free(info.genre.str);
789 posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
796 _setup(struct plugin *plugin, struct lms_context *ctxt)
798 plugin->audio_db = lms_db_audio_new(ctxt->db);
799 if (!plugin->audio_db)
801 plugin->video_db = lms_db_video_new(ctxt->db);
802 if (!plugin->video_db)
804 plugin->cs_conv = lms_charset_conv_new();
805 if (!plugin->cs_conv)
807 lms_charset_conv_add(plugin->cs_conv, "UTF-16LE");
813 _start(struct plugin *plugin, struct lms_context *ctxt)
816 r = lms_db_audio_start(plugin->audio_db);
817 r |= lms_db_video_start(plugin->video_db);
822 _finish(struct plugin *plugin, struct lms_context *ctxt)
824 if (plugin->audio_db)
825 lms_db_audio_free(plugin->audio_db);
826 if (plugin->video_db)
827 lms_db_video_free(plugin->video_db);
829 lms_charset_conv_free(plugin->cs_conv);
835 _close(struct plugin *plugin)
841 API struct lms_plugin *
842 lms_plugin_open(void)
844 struct plugin *plugin;
846 plugin = (struct plugin *)malloc(sizeof(*plugin));
847 plugin->plugin.name = _name;
848 plugin->plugin.match = (lms_plugin_match_fn_t)_match;
849 plugin->plugin.parse = (lms_plugin_parse_fn_t)_parse;
850 plugin->plugin.close = (lms_plugin_close_fn_t)_close;
851 plugin->plugin.setup = (lms_plugin_setup_fn_t)_setup;
852 plugin->plugin.start = (lms_plugin_start_fn_t)_start;
853 plugin->plugin.finish = (lms_plugin_finish_fn_t)_finish;
855 return (struct lms_plugin *)plugin;
858 API const struct lms_plugin_info *
859 lms_plugin_info(void)
861 static struct lms_plugin_info info = {
864 "Microsoft WMA, WMV and ASF",
867 "http://lms.garage.maemo.org"