2 * Copyright (C) 2008 by INdT
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * @author Andre Moreira Magalhaes <andre.magalhaes@openbossa.org>
24 * asf/wma file parser.
31 #define _XOPEN_SOURCE 600
33 #include <lightmediascanner_plugin.h>
34 #include <lightmediascanner_db.h>
37 #include <sys/types.h>
45 #define NSEC100_PER_SEC 10000000ULL
46 #define MSEC_PER_SEC 1000ULL
49 STREAM_TYPE_UNKNOWN = 0,
55 ATTR_TYPE_UNICODE = 0,
65 struct lms_string_size title;
66 struct lms_string_size artist;
67 struct lms_string_size album;
68 struct lms_string_size genre;
69 unsigned char trackno;
73 struct lms_plugin plugin;
74 lms_db_audio_t *audio_db;
75 lms_db_video_t *video_db;
76 lms_charset_conv_t *cs_conv;
79 static const char _name[] = "asf";
80 static const struct lms_string_size _container = LMS_STATIC_STRING_SIZE("asf");
81 static const struct lms_string_size _exts[] = {
82 LMS_STATIC_STRING_SIZE(".wma"),
83 LMS_STATIC_STRING_SIZE(".wmv"),
84 LMS_STATIC_STRING_SIZE(".asf")
86 static const char *_cats[] = {
91 static const char *_authors[] = {
92 "Andre Moreira Magalhaes",
98 enum StreamTypes type;
104 uint32_t sampling_rate;
112 * Microsoft defines these 16-byte (128-bit) GUIDs as:
113 * first 8 bytes are in little-endian order
114 * next 8 bytes are in big-endian order
116 * Eg.: AaBbCcDd-EeFf-GgHh-IiJj-KkLlMmNnOoPp:
118 * to convert to byte string do as follow:
120 * $Dd $Cc $Bb $Aa $Ff $Ee $Hh $Gg $Ii $Jj $Kk $Ll $Mm $Nn $Oo $Pp
122 * See http://www.microsoft.com/windows/windowsmedia/forpros/format/asfspec.aspx
124 static const char header_guid[16] = "\x30\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C";
125 static const char file_properties_guid[16] = "\xA1\xDC\xAB\x8C\x47\xA9\xCF\x11\x8E\xE4\x00\xC0\x0C\x20\x53\x65";
126 static const char stream_properties_guid[16] = "\x91\x07\xDC\xB7\xB7\xA9\xCF\x11\x8E\xE6\x00\xC0\x0C\x20\x53\x65";
127 static const char stream_type_audio_guid[16] = "\x40\x9E\x69\xF8\x4D\x5B\xCF\x11\xA8\xFD\x00\x80\x5F\x5C\x44\x2B";
128 static const char stream_type_video_guid[16] = "\xC0\xEF\x19\xBC\x4D\x5B\xCF\x11\xA8\xFD\x00\x80\x5F\x5C\x44\x2B";
129 static const char content_description_guid[16] = "\x33\x26\xB2\x75\x8E\x66\xCF\x11\xA6\xD9\x00\xAA\x00\x62\xCE\x6C";
130 static const char extended_content_description_guid[16] = "\x40\xA4\xD0\xD2\x07\xE3\xD2\x11\x97\xF0\x00\xA0\xC9\x5E\xA8\x50";
131 static const char header_extension_guid[16] = "\xb5\x03\xbf_.\xa9\xcf\x11\x8e\xe3\x00\xc0\x0c Se";
132 static const char metadata_guid[16] = "\xEA\xCB\xF8\xC5\xAF[wH\204g\xAA\214D\xFAL\xCA";
133 static const char metadata_library_guid[16] = "\224\034#D\230\224\321I\241A\x1d\x13NEpT";
134 static const char content_encryption_object_guid[16] = "\xFB\xB3\x11\x22\x23\xBD\xD2\x11\xB4\xB7\x00\xA0\xC9\x55\xFC\x6E";
135 static const char extended_content_encryption_object_guid[16] = "\x14\xE6\x8A\x29\x22\x26\x17\x4C\xB9\x35\xDA\xE0\x7E\xE9\x28\x9C";
137 static const char attr_name_wm_album_artist[28] = "\x57\x00\x4d\x00\x2f\x00\x41\x00\x6c\x00\x62\x00\x75\x00\x6d\x00\x41\x00\x72\x00\x74\x00\x69\x00\x73\x00\x74\x00";
138 static const char attr_name_wm_album_title[26] = "\x57\x00\x4d\x00\x2f\x00\x41\x00\x6c\x00\x62\x00\x75\x00\x6d\x00\x54\x00\x69\x00\x74\x00\x6c\x00\x65\x00";
139 static const char attr_name_wm_genre[16] = "\x57\x00\x4d\x00\x2f\x00\x47\x00\x65\x00\x6e\x00\x72\x00\x65\x00";
140 static const char attr_name_wm_track_number[28] = "\x57\x00\x4d\x00\x2f\x00\x54\x00\x72\x00\x61\x00\x63\x00\x6b\x00\x4e\x00\x75\x00\x6d\x00\x62\x00\x65\x00\x72\x00";
143 _to_number(const char *data, unsigned int type_size, unsigned int data_size)
146 unsigned int last, i;
148 last = data_size > type_size ? type_size : data_size;
150 for (i = 0; i < last; i++)
151 sum |= (unsigned char) (data[i]) << (i * 8);
160 if (read(fd, &v, 2) != 2)
162 return (short) _to_number(v, sizeof(unsigned short), 2);
169 if (read(fd, &v, 4) != 4)
171 return (unsigned int) _to_number(v, sizeof(unsigned int), 4);
178 if (read(fd, &v, 8) != 8)
180 return _to_number(v, sizeof(unsigned long long), 8);
184 _read_string(int fd, size_t count, char **str, unsigned int *len)
187 ssize_t data_size, size;
189 data = malloc(sizeof(char) * count);
190 data_size = read(fd, data, count);
191 if (data_size == -1) {
198 if (data[size - 1] != '\0' || data[size - 2] != '\0')
210 _parse_file_properties(lms_charset_conv_t *cs_conv, int fd,
211 struct lms_audio_info *info)
216 uint64_t creation_date;
217 uint64_t data_packets_count;
218 uint64_t play_duration;
219 uint64_t send_duration;
222 uint32_t min_data_packet_size;
223 uint32_t max_data_packet_size;
224 uint32_t max_bitrate;
225 } __attribute__((packed)) props;
228 r = read(fd, &props, sizeof(props));
229 if (r != sizeof(props))
233 if (le32toh(props.flags) & 0x1)
236 /* ASF spec 01.20.06 sec. 3.2: we need to subtract the preroll value from
237 * the duration in order to obtain the real duration */
238 info->length = (unsigned int)((le64toh(props.play_duration) / NSEC100_PER_SEC)
239 - le64toh(props.preroll) / MSEC_PER_SEC);
245 _parse_stream_properties(int fd, struct stream **pstream)
248 char stream_type[16];
249 char error_correction_type[16];
250 uint64_t time_offset;
251 uint32_t type_specific_len;
252 uint32_t error_correction_data_len;
254 uint32_t reserved; /* don't use, unaligned */
255 } __attribute__((packed)) props;
262 s = calloc(1, sizeof(struct stream));
266 r = read(fd, &props, sizeof(props));
267 if (r != sizeof(props))
270 if (memcmp(props.stream_type, stream_type_audio_guid, 16) == 0)
271 s->type = STREAM_TYPE_AUDIO;
272 else if (memcmp(props.stream_type, stream_type_video_guid, 16) == 0)
273 s->type = STREAM_TYPE_VIDEO;
279 s->id = le16toh(props.flags) & 0x7F;
280 /* Not a valid stream */
284 if (s->type == STREAM_TYPE_AUDIO) {
285 if (le32toh(props.type_specific_len) < 18)
288 s->audio.codec_id = _read_word(fd);
289 s->audio.channels = _read_word(fd);
290 s->audio.sampling_rate = _read_dword(fd);
291 s->audio.byterate = _read_dword(fd);
305 _parse_content_description(lms_charset_conv_t *cs_conv, int fd, struct asf_info *info)
307 int title_length = _read_word(fd);
308 int artist_length = _read_word(fd);
309 int copyright_length = _read_word(fd);
310 int comment_length = _read_word(fd);
311 int rating_length = _read_word(fd);
313 _read_string(fd, title_length, &info->title.str, &info->title.len);
314 lms_charset_conv_force(cs_conv, &info->title.str, &info->title.len);
315 _read_string(fd, artist_length, &info->artist.str, &info->artist.len);
316 lms_charset_conv_force(cs_conv, &info->artist.str, &info->artist.len);
317 /* ignore copyright, comment and rating */
318 lseek(fd, copyright_length + comment_length + rating_length, SEEK_CUR);
322 _parse_attribute_name(int fd,
324 unsigned int *attr_name_len,
328 int attr_name_length;
330 attr_name_length = _read_word(fd);
331 _read_string(fd, attr_name_length, attr_name, attr_name_len);
332 *attr_type = _read_word(fd);
333 *attr_size = _read_word(fd);
337 _parse_attribute_string_data(lms_charset_conv_t *cs_conv,
341 unsigned int *attr_data_len)
343 _read_string(fd, attr_size, attr_data, attr_data_len);
344 lms_charset_conv_force(cs_conv, attr_data, attr_data_len);
348 _skip_attribute_data(int fd, int kind, int attr_type, int attr_size)
352 lseek(fd, 2, SEEK_CUR);
357 lseek(fd, 4, SEEK_CUR);
359 lseek(fd, 2, SEEK_CUR);
362 case ATTR_TYPE_DWORD:
363 lseek(fd, 4, SEEK_CUR);
366 case ATTR_TYPE_QWORD:
367 lseek(fd, 8, SEEK_CUR);
370 case ATTR_TYPE_UNICODE:
371 case ATTR_TYPE_BYTES:
373 lseek(fd, attr_size, SEEK_CUR);
382 _parse_extended_content_description_object(lms_charset_conv_t *cs_conv, int fd, struct asf_info *info)
384 int count = _read_word(fd);
386 unsigned int attr_name_len;
387 int attr_type, attr_size;
390 _parse_attribute_name(fd,
391 &attr_name, &attr_name_len,
392 &attr_type, &attr_size);
393 if (attr_type == ATTR_TYPE_UNICODE) {
394 if (memcmp(attr_name, attr_name_wm_album_title, attr_name_len) == 0)
395 _parse_attribute_string_data(cs_conv,
399 else if (memcmp(attr_name, attr_name_wm_genre, attr_name_len) == 0)
400 _parse_attribute_string_data(cs_conv,
404 else if (memcmp(attr_name, attr_name_wm_album_artist, attr_name_len) == 0)
405 _parse_attribute_string_data(cs_conv,
409 else if (memcmp(attr_name, attr_name_wm_track_number, attr_name_len) == 0) {
411 unsigned int trackno_len;
412 _parse_attribute_string_data(cs_conv,
417 info->trackno = atoi(trackno);
422 _skip_attribute_data(fd, 0, attr_type, attr_size);
425 _skip_attribute_data(fd, 0, attr_type, attr_size);
432 _match(struct plugin *p, const char *path, int len, int base)
436 i = lms_which_extension(path, len, _exts, LMS_ARRAY_SIZE(_exts));
440 return (void*)(i + 1);
444 _parse(struct plugin *plugin, struct lms_context *ctxt, const struct lms_file_info *finfo, void *match)
446 struct asf_info info = { };
447 struct lms_audio_info audio_info = { };
448 struct lms_video_info video_info = { };
449 int r, fd, num_objects, i;
452 int stream_type = STREAM_TYPE_UNKNOWN;
453 struct stream *streams = NULL;
455 fd = open(finfo->path, O_RDONLY);
461 if (read(fd, &guid, 16) != 16) {
466 if (memcmp(guid, header_guid, 16) != 0) {
467 fprintf(stderr, "ERROR: invalid header (%s).\n", finfo->path);
472 size = _read_qword(fd);
473 num_objects = _read_dword(fd);
475 lseek(fd, 2, SEEK_CUR);
477 for (i = 0; i < num_objects; ++i) {
479 size = _read_qword(fd);
481 if (memcmp(guid, file_properties_guid, 16) == 0) {
482 r = _parse_file_properties(plugin->cs_conv, fd, &audio_info);
485 lseek(fd, size - (24 + r), SEEK_CUR);
486 } else if (memcmp(guid, stream_properties_guid, 16) == 0) {
488 r = _parse_stream_properties(fd, &s);
492 lseek(fd, size - (24 + r), SEEK_CUR);
496 if (stream_type != STREAM_TYPE_VIDEO)
497 stream_type = s->type;
501 } else if (memcmp(guid, content_description_guid, 16) == 0)
502 _parse_content_description(plugin->cs_conv, fd, &info);
503 else if (memcmp(guid, extended_content_description_guid, 16) == 0)
504 _parse_extended_content_description_object(plugin->cs_conv, fd, &info);
505 else if (memcmp(guid, content_encryption_object_guid, 16) == 0 ||
506 memcmp(guid, extended_content_encryption_object_guid, 16) == 0) {
507 /* ignore DRM'd files */
508 fprintf(stderr, "ERROR: ignoring DRM'd file %s\n", finfo->path);
512 lseek(fd, size - 24, SEEK_CUR);
515 /* try to define stream type by extension */
516 if (stream_type == STREAM_TYPE_UNKNOWN) {
517 long ext_idx = ((long)match) - 1;
518 if (strcmp(_exts[ext_idx].str, ".wma") == 0)
519 stream_type = STREAM_TYPE_AUDIO;
520 /* consider wmv and asf as video */
522 stream_type = STREAM_TYPE_VIDEO;
525 lms_string_size_strip_and_free(&info.title);
526 lms_string_size_strip_and_free(&info.artist);
527 lms_string_size_strip_and_free(&info.album);
528 lms_string_size_strip_and_free(&info.genre);
530 if (!info.title.str) {
532 ext_idx = ((long)match) - 1;
533 info.title.len = finfo->path_len - finfo->base - _exts[ext_idx].len;
534 info.title.str = malloc((info.title.len + 1) * sizeof(char));
535 memcpy(info.title.str, finfo->path + finfo->base, info.title.len);
536 info.title.str[info.title.len] = '\0';
537 lms_charset_conv(ctxt->cs_conv, &info.title.str, &info.title.len);
541 fprintf(stderr, "file %s info\n", finfo->path);
542 fprintf(stderr, "\ttitle='%s'\n", info.title.str);
543 fprintf(stderr, "\tartist='%s'\n", info.artist.str);
544 fprintf(stderr, "\talbum='%s'\n", info.album.str);
545 fprintf(stderr, "\tgenre='%s'\n", info.genre.str);
546 fprintf(stderr, "\ttrackno=%d\n", info.trackno);
549 audio_info.container = _container;
551 if (stream_type == STREAM_TYPE_AUDIO) {
552 audio_info.id = finfo->id;
553 audio_info.title = info.title;
554 audio_info.artist = info.artist;
555 audio_info.album = info.album;
556 audio_info.genre = info.genre;
557 audio_info.trackno = info.trackno;
559 audio_info.channels = streams->audio.channels;
560 audio_info.bitrate = streams->audio.byterate * 8;
561 audio_info.sampling_rate = streams->audio.sampling_rate;
563 r = lms_db_audio_add(plugin->audio_db, &audio_info);
565 video_info.id = finfo->id;
566 video_info.title = info.title;
567 video_info.artist = info.artist;
568 r = lms_db_video_add(plugin->video_db, &video_info);
573 struct stream *s = streams;
578 free(info.title.str);
579 free(info.artist.str);
580 free(info.album.str);
581 free(info.genre.str);
583 posix_fadvise(fd, 0, 0, POSIX_FADV_DONTNEED);
590 _setup(struct plugin *plugin, struct lms_context *ctxt)
592 plugin->audio_db = lms_db_audio_new(ctxt->db);
593 if (!plugin->audio_db)
595 plugin->video_db = lms_db_video_new(ctxt->db);
596 if (!plugin->video_db)
598 plugin->cs_conv = lms_charset_conv_new();
599 if (!plugin->cs_conv)
601 lms_charset_conv_add(plugin->cs_conv, "UTF-16LE");
607 _start(struct plugin *plugin, struct lms_context *ctxt)
610 r = lms_db_audio_start(plugin->audio_db);
611 r |= lms_db_video_start(plugin->video_db);
616 _finish(struct plugin *plugin, struct lms_context *ctxt)
618 if (plugin->audio_db)
619 lms_db_audio_free(plugin->audio_db);
620 if (plugin->video_db)
621 lms_db_video_free(plugin->video_db);
623 lms_charset_conv_free(plugin->cs_conv);
629 _close(struct plugin *plugin)
635 API struct lms_plugin *
636 lms_plugin_open(void)
638 struct plugin *plugin;
640 plugin = (struct plugin *)malloc(sizeof(*plugin));
641 plugin->plugin.name = _name;
642 plugin->plugin.match = (lms_plugin_match_fn_t)_match;
643 plugin->plugin.parse = (lms_plugin_parse_fn_t)_parse;
644 plugin->plugin.close = (lms_plugin_close_fn_t)_close;
645 plugin->plugin.setup = (lms_plugin_setup_fn_t)_setup;
646 plugin->plugin.start = (lms_plugin_start_fn_t)_start;
647 plugin->plugin.finish = (lms_plugin_finish_fn_t)_finish;
649 return (struct lms_plugin *)plugin;
652 API const struct lms_plugin_info *
653 lms_plugin_info(void)
655 static struct lms_plugin_info info = {
658 "Microsoft WMA, WMV and ASF",
661 "http://lms.garage.maemo.org"