1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: t; c-basic-offset: 2 -*- */
2 /* Copyright 2006-2008 Tim-Philipp Müller <tim centricular net>
3 * Copyright 2005 Jan Schmidt <thaytan@mad.scientist.com>
4 * Copyright 2002,2003 Scott Wheeler <wheeler@kde.org> (portions from taglib)
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
29 #include <gst/tag/tag.h>
30 #include <gst/base/gsttypefindhelper.h>
38 #ifndef GST_DISABLE_GST_DEBUG
39 #define GST_CAT_DEFAULT id3v2_ensure_debug_category()
42 static gboolean parse_comment_frame (ID3TagsWorking * work);
43 static gchar *parse_url_link_frame (ID3TagsWorking * work,
44 const gchar ** tag_name);
45 static GArray *parse_text_identification_frame (ID3TagsWorking * work);
46 static gchar *parse_user_text_identification_frame (ID3TagsWorking * work,
47 const gchar ** tag_name);
48 static gchar *parse_unique_file_identifier (ID3TagsWorking * work,
49 const gchar ** tag_name);
50 static gboolean parse_relative_volume_adjustment_two (ID3TagsWorking * work);
51 static void parse_obsolete_tdat_frame (ID3TagsWorking * work);
52 static gboolean id3v2_tag_to_taglist (ID3TagsWorking * work,
53 const gchar * tag_name, const gchar * tag_str);
54 /* Parse a single string into an array of gchar* */
55 static void parse_split_strings (guint8 encoding, gchar * data, gint data_size,
56 GArray ** out_fields);
57 static void free_tag_strings (GArray * fields);
59 id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
61 static gboolean parse_picture_frame (ID3TagsWorking * work);
63 #define ID3V2_ENCODING_ISO8859 0x00
64 #define ID3V2_ENCODING_UTF16 0x01
65 #define ID3V2_ENCODING_UTF16BE 0x02
66 #define ID3V2_ENCODING_UTF8 0x03
69 id3v2_parse_frame (ID3TagsWorking * work)
71 const gchar *tag_name;
72 gboolean result = FALSE;
74 guint8 *frame_data = work->hdr.frame_data;
75 guint frame_data_size = work->cur_frame_size;
76 gchar *tag_str = NULL;
77 GArray *tag_fields = NULL;
78 guint8 *uu_data = NULL;
81 guint8 *uncompressed_data = NULL;
84 /* Check that the frame id is valid */
85 for (i = 0; i < 5 && work->frame_id[i] != '\0'; i++) {
86 if (!g_ascii_isalnum (work->frame_id[i])) {
87 GST_DEBUG ("Encountered invalid frame_id");
92 /* Can't handle encrypted frames right now (in case we ever do, we'll have
93 * to do the decryption after the un-unsynchronisation and decompression,
95 if (work->frame_flags & ID3V2_FRAME_FORMAT_ENCRYPTION) {
96 GST_WARNING ("Encrypted frames are not supported");
100 tag_name = gst_tag_from_id3_tag (work->frame_id);
101 if (tag_name == NULL &&
102 strncmp (work->frame_id, "RVA2", 4) != 0 &&
103 strncmp (work->frame_id, "TXXX", 4) != 0 &&
104 strncmp (work->frame_id, "TDAT", 4) != 0 &&
105 strncmp (work->frame_id, "UFID", 4) != 0) {
109 if (work->frame_flags & (ID3V2_FRAME_FORMAT_COMPRESSION |
110 ID3V2_FRAME_FORMAT_DATA_LENGTH_INDICATOR)) {
111 if (work->hdr.frame_data_size <= 4)
113 if (ID3V2_VER_MAJOR (work->hdr.version) == 3) {
114 work->parse_size = GST_READ_UINT32_BE (frame_data);
116 work->parse_size = id3v2_read_synch_uint (frame_data, 4);
119 frame_data_size -= 4;
120 GST_LOG ("Un-unsynced data size %d (of %d)", work->parse_size,
122 if (work->parse_size > frame_data_size) {
123 GST_WARNING ("ID3v2 frame %s data has invalid size %d (>%d)",
124 work->frame_id, work->parse_size, frame_data_size);
129 /* in v2.3 the frame sizes are not syncsafe, so the entire tag had to be
130 * unsynced. In v2.4 the frame sizes are syncsafe so it's just the frame
131 * data that needs un-unsyncing, but not the frame headers. */
132 if (ID3V2_VER_MAJOR (work->hdr.version) == 4) {
133 if ((work->hdr.flags & ID3V2_HDR_FLAG_UNSYNC) != 0 ||
134 ((work->frame_flags & ID3V2_FRAME_FORMAT_UNSYNCHRONISATION) != 0)) {
135 GST_DEBUG ("Un-unsyncing frame %s", work->frame_id);
136 uu_data = id3v2_ununsync_data (frame_data, &frame_data_size);
137 frame_data = uu_data;
138 GST_MEMDUMP ("ID3v2 frame (un-unsyced)", frame_data, frame_data_size);
142 work->parse_size = frame_data_size;
144 if (work->frame_flags & ID3V2_FRAME_FORMAT_COMPRESSION) {
146 uLongf destSize = work->parse_size;
149 uncompressed_data = g_malloc (work->parse_size);
151 dest = (Bytef *) uncompressed_data;
152 src = (Bytef *) frame_data;
154 if (uncompress (dest, &destSize, src, frame_data_size) != Z_OK) {
155 g_free (uncompressed_data);
159 if (destSize != work->parse_size) {
161 ("Decompressing ID3v2 frame %s did not produce expected size %d bytes (got %lu)",
162 tag_name, work->parse_size, destSize);
163 g_free (uncompressed_data);
167 work->parse_data = uncompressed_data;
169 GST_WARNING ("Compressed ID3v2 tag frame could not be decompressed, because"
170 " libgsttag-" GST_MAJORMINOR " was compiled without zlib support");
175 work->parse_data = frame_data;
178 if (work->frame_id[0] == 'T') {
179 if (strcmp (work->frame_id, "TDAT") == 0) {
180 parse_obsolete_tdat_frame (work);
182 } else if (strcmp (work->frame_id, "TXXX") == 0) {
183 /* Handle user text frame */
184 tag_str = parse_user_text_identification_frame (work, &tag_name);
186 /* Text identification frame */
187 tag_fields = parse_text_identification_frame (work);
189 } else if (work->frame_id[0] == 'W' && strcmp (work->frame_id, "WXXX") != 0) {
190 /* URL link frame: ISO-8859-1 encoded, one frame per tag */
191 tag_str = parse_url_link_frame (work, &tag_name);
192 } else if (!strcmp (work->frame_id, "COMM")) {
194 result = parse_comment_frame (work);
195 } else if (!strcmp (work->frame_id, "APIC")) {
196 /* Attached picture */
197 result = parse_picture_frame (work);
198 } else if (!strcmp (work->frame_id, "RVA2")) {
199 /* Relative volume */
200 result = parse_relative_volume_adjustment_two (work);
201 } else if (!strcmp (work->frame_id, "UFID")) {
202 /* Unique file identifier */
203 tag_str = parse_unique_file_identifier (work, &tag_name);
206 if (work->frame_flags & ID3V2_FRAME_FORMAT_COMPRESSION) {
207 g_free (uncompressed_data);
208 uncompressed_data = NULL;
209 work->parse_data = frame_data;
213 if (tag_str != NULL) {
214 /* g_print ("Tag %s value %s\n", tag_name, tag_str); */
215 result = id3v2_tag_to_taglist (work, tag_name, tag_str);
218 if (tag_fields != NULL) {
219 if (strcmp (work->frame_id, "TCON") == 0) {
220 /* Genre strings need special treatment */
221 result |= id3v2_genre_fields_to_taglist (work, tag_name, tag_fields);
225 for (t = 0; t < tag_fields->len; t++) {
226 tag_str = g_array_index (tag_fields, gchar *, t);
227 if (tag_str != NULL && tag_str[0] != '\0')
228 result |= id3v2_tag_to_taglist (work, tag_name, tag_str);
231 free_tag_strings (tag_fields);
240 parse_comment_frame (ID3TagsWorking * work)
245 GArray *fields = NULL;
246 gchar *description, *text;
248 if (work->parse_size < 6)
251 encoding = work->parse_data[0];
252 language[0] = g_ascii_tolower (work->parse_data[1]);
253 language[1] = g_ascii_tolower (work->parse_data[2]);
254 language[2] = g_ascii_tolower (work->parse_data[3]);
257 parse_split_strings (encoding, (gchar *) work->parse_data + 4,
258 work->parse_size - 4, &fields);
260 if (fields == NULL || fields->len < 2) {
261 GST_WARNING ("Failed to decode comment frame");
264 description = g_array_index (fields, gchar *, 0);
265 text = g_array_index (fields, gchar *, 1);
267 if (!g_utf8_validate (text, -1, NULL)) {
268 GST_WARNING ("Converted string is not valid utf-8");
272 /* skip our own dummy descriptions (from id3v2mux) */
273 if (strlen (description) > 0 && g_utf8_validate (description, -1, NULL) &&
274 sscanf (description, "c%u", &dummy) != 1) {
277 /* must be either an ISO-639-1 or ISO-639-2 language code */
278 if (language[0] != '\0' &&
279 g_ascii_isalpha (language[0]) &&
280 g_ascii_isalpha (language[1]) &&
281 (g_ascii_isalpha (language[2]) || language[2] == '\0')) {
282 const gchar *lang_code;
284 /* prefer two-letter ISO 639-1 code if we have a mapping */
285 lang_code = gst_tag_get_language_code (language);
286 s = g_strdup_printf ("%s[%s]=%s", description,
287 (lang_code) ? lang_code : language, text);
289 s = g_strdup_printf ("%s=%s", description, text);
291 gst_tag_list_add (work->tags, GST_TAG_MERGE_APPEND,
292 GST_TAG_EXTENDED_COMMENT, s, NULL);
294 } else if (text != NULL && *text != '\0') {
295 gst_tag_list_add (work->tags, GST_TAG_MERGE_APPEND,
296 GST_TAG_COMMENT, text, NULL);
301 free_tag_strings (fields);
306 GST_WARNING ("failed to parse COMM frame");
307 free_tag_strings (fields);
313 parse_text_identification_frame (ID3TagsWorking * work)
316 GArray *fields = NULL;
318 if (work->parse_size < 2)
321 encoding = work->parse_data[0];
322 parse_split_strings (encoding, (gchar *) work->parse_data + 1,
323 work->parse_size - 1, &fields);
325 if (fields->len > 0) {
326 GST_LOG ("Read %d fields from Text ID frame of size %d with encoding %d"
327 ". First is '%s'", fields->len, work->parse_size - 1, encoding,
328 g_array_index (fields, gchar *, 0));
330 GST_LOG ("Read 0 fields from Text ID frame of size %d with encoding %d",
331 work->parse_size - 1, encoding);
339 link_is_known_license (const gchar * url)
341 return g_str_has_prefix (url, "http://creativecommons.org/licenses/");
345 parse_url_link_frame (ID3TagsWorking * work, const gchar ** tag_name)
348 gchar *nul, *data, *link;
352 if (work->parse_size == 0)
355 data = (gchar *) work->parse_data;
356 /* if there's more data then the string is long, we only want to parse the
357 * data up to the terminating zero to g_convert and ignore the rest, as
359 nul = memchr (data, '\0', work->parse_size);
361 len = (gsize) (nul - data);
363 len = work->parse_size;
366 link = g_convert (data, len, "UTF-8", "ISO-8859-1", NULL, NULL, NULL);
368 if (link == NULL || !gst_uri_is_valid (link)) {
369 GST_DEBUG ("Invalid URI in %s frame: %s", work->frame_id,
370 GST_STR_NULL (link));
375 /* we don't know if it's a link to a page that explains the copyright
376 * situation, or a link that points to/represents a license, the ID3 spec
377 * does not separate those two things; for now only put known license URIs
378 * into GST_TAG_LICENSE_URI and everything else into GST_TAG_COPYRIGHT_URI */
379 if (strcmp (work->frame_id, "WCOP") == 0) {
380 if (link_is_known_license (link))
381 *tag_name = GST_TAG_LICENSE_URI;
383 *tag_name = GST_TAG_COPYRIGHT_URI;
384 } else if (strcmp (work->frame_id, "WOAF") == 0) {
385 /* can't be bothered to create a CONTACT_URI tag for this, so let's just
386 * put into into GST_TAG_CONTACT, which is where it ends up when reading
387 * the info from vorbis comments as well */
388 *tag_name = GST_TAG_CONTACT;
396 parse_user_text_identification_frame (ID3TagsWorking * work,
397 const gchar ** tag_name)
401 GArray *fields = NULL;
405 if (work->parse_size < 2)
408 encoding = work->parse_data[0];
410 parse_split_strings (encoding, (gchar *) work->parse_data + 1,
411 work->parse_size - 1, &fields);
416 if (fields->len != 2) {
417 GST_WARNING ("Expected 2 fields in TXXX frame, but got %d", fields->len);
418 free_tag_strings (fields);
423 gst_tag_from_id3_user_tag ("TXXX", g_array_index (fields, gchar *, 0));
425 GST_LOG ("TXXX frame of size %d. Mapped descriptor '%s' to GStreamer tag %s",
426 work->parse_size - 1, g_array_index (fields, gchar *, 0),
427 GST_STR_NULL (*tag_name));
430 ret = g_strdup (g_array_index (fields, gchar *, 1));
431 /* GST_LOG ("%s = %s", *tag_name, GST_STR_NULL (ret)); */
436 free_tag_strings (fields);
441 parse_id_string (ID3TagsWorking * work, gchar ** p_str, gint * p_len,
446 if (work->parse_size < 2)
449 for (len = 0; len < work->parse_size - 1; ++len) {
450 if (work->parse_data[len] == '\0')
454 datalen = work->parse_size - (len + 1);
455 if (len == 0 || datalen <= 0)
458 *p_str = g_strndup ((gchar *) work->parse_data, len);
460 *p_datalen = datalen;
466 parse_unique_file_identifier (ID3TagsWorking * work, const gchar ** tag_name)
469 gchar *owner_id, *data, *ret = NULL;
471 GST_LOG ("parsing UFID frame of size %d", work->parse_size);
473 if (!parse_id_string (work, &owner_id, &len, &datalen))
476 data = (gchar *) work->parse_data + len + 1;
477 GST_LOG ("UFID owner ID: %s (+ %d bytes of data)", owner_id, datalen);
479 if (strcmp (owner_id, "http://musicbrainz.org") == 0 &&
480 g_utf8_validate (data, datalen, NULL)) {
481 *tag_name = GST_TAG_MUSICBRAINZ_TRACKID;
482 ret = g_strndup (data, datalen);
484 GST_INFO ("Unknown UFID owner ID: %s", owner_id);
491 /* parse data and return length of the next string in the given encoding,
492 * including the NUL terminator */
494 scan_encoded_string (guint8 encoding, gchar * data, gint data_size)
499 case ID3V2_ENCODING_ISO8859:
500 case ID3V2_ENCODING_UTF8:
501 for (i = 0; i < data_size; ++i) {
506 case ID3V2_ENCODING_UTF16:
507 case ID3V2_ENCODING_UTF16BE:
508 /* we don't care about BOMs here and treat them as part of the string */
509 /* Find '\0\0' terminator */
510 for (i = 0; i < data_size - 1; i += 2) {
511 if (data[i] == '\0' && data[i + 1] == '\0')
523 parse_picture_frame (ID3TagsWorking * work)
525 guint8 txt_encoding, pic_type;
526 gchar *mime_str = NULL;
529 GST_LOG ("APIC frame (ID3v2.%u)", ID3V2_VER_MAJOR (work->hdr.version));
531 if (work->parse_size < 1 + 1 + 1 + 1 + 1)
532 goto not_enough_data;
534 txt_encoding = work->parse_data[0];
538 /* Read image format; in early ID3v2 versions this is a fixed-length
539 * 3-character string without terminator; in later versions (>= 2.3.0)
540 * this is a NUL-terminated string of variable length */
541 if (ID3V2_VER_MAJOR (work->hdr.version) < 3) {
542 if (work->parse_size < 3)
543 goto not_enough_data;
545 mime_str = g_strndup ((gchar *) work->parse_data, 3);
548 if (!parse_id_string (work, &mime_str, &len, &datalen))
550 ++len; /* for string terminator */
553 if (work->parse_size < len + 1 + 1 + 1)
554 goto not_enough_data;
556 work->parse_data += len;
557 work->parse_size -= len;
559 /* Read image type */
560 pic_type = work->parse_data[0];
564 GST_LOG ("APIC frame mime type : %s", GST_STR_NULL (mime_str));
565 GST_LOG ("APIC frame picture type : 0x%02x", (guint) pic_type);
567 if (work->parse_size < 1 + 1)
568 goto not_enough_data;
570 len = scan_encoded_string (txt_encoding, (gchar *) work->parse_data,
576 /* just skip the description string ... */
577 GST_LOG ("Skipping description string (%d bytes in original coding)", len);
579 if (work->parse_size < len + 1)
580 goto not_enough_data;
582 work->parse_data += len;
583 work->parse_size -= len;
585 GST_DEBUG ("image data is %u bytes", work->parse_size);
587 if (work->parse_size <= 0)
588 goto not_enough_data;
590 if (!gst_tag_list_add_id3_image (work->tags, (guint8 *) work->parse_data,
591 work->parse_size, pic_type)) {
600 GST_DEBUG ("not enough data, skipping APIC frame");
601 /* fall through to error */
605 GST_DEBUG ("problem parsing APIC frame, skipping");
611 #define ID3V2_RVA2_CHANNEL_MASTER 1
614 parse_relative_volume_adjustment_two (ID3TagsWorking * work)
616 const gchar *gain_tag_name = NULL;
617 const gchar *peak_tag_name = NULL;
618 gdouble gain_dB, peak_val;
620 guint8 *data, chan, peak_bits;
622 gint len, datalen, i;
624 if (!parse_id_string (work, &id, &len, &datalen))
627 if (datalen < (1 + 2 + 1)) {
628 GST_WARNING ("broken RVA2 frame, data size only %d bytes", datalen);
633 data = work->parse_data + len + 1;
634 chan = GST_READ_UINT8 (data);
635 gain_dB = (gdouble) ((gint16) GST_READ_UINT16_BE (data + 1)) / 512.0;
636 /* The meaning of the peak value is not defined in the ID3v2 spec. However,
637 * the first/only implementation of this seems to have been in XMMS, and
638 * other libs (like mutagen) seem to follow that implementation as well:
639 * see http://bugs.xmms.org/attachment.cgi?id=113&action=view */
640 peak_bits = GST_READ_UINT8 (data + 1 + 2);
641 if (peak_bits > 64) {
642 GST_WARNING ("silly peak precision of %d bits, ignoring", (gint) peak_bits);
646 datalen -= 1 + 2 + 1;
647 if (peak_bits == 16) {
648 peak = GST_READ_UINT16_BE (data);
651 for (i = 0; i < (GST_ROUND_UP_8 (peak_bits) / 8) && datalen > 0; ++i) {
653 peak |= GST_READ_UINT8 (data);
659 peak = peak << (64 - GST_ROUND_UP_8 (peak_bits));
661 gst_guint64_to_gdouble (peak) / gst_util_guint64_to_gdouble (G_MAXINT64);
662 GST_LOG ("RVA2 frame: id=%s, chan=%u, adj=%.2fdB, peak_bits=%u, peak=%.2f",
663 id, chan, gain_dB, (guint) peak_bits, peak_val);
665 if (chan == ID3V2_RVA2_CHANNEL_MASTER && strcmp (id, "track") == 0) {
666 gain_tag_name = GST_TAG_TRACK_GAIN;
667 peak_tag_name = GST_TAG_TRACK_PEAK;
668 } else if (chan == ID3V2_RVA2_CHANNEL_MASTER && strcmp (id, "album") == 0) {
669 gain_tag_name = GST_TAG_ALBUM_GAIN;
670 peak_tag_name = GST_TAG_ALBUM_PEAK;
672 GST_INFO ("Unhandled RVA2 frame id '%s' for channel %d", id, chan);
676 gst_tag_list_add (work->tags, GST_TAG_MERGE_APPEND,
677 gain_tag_name, gain_dB, NULL);
679 if (peak_tag_name && peak_bits > 0) {
680 gst_tag_list_add (work->tags, GST_TAG_MERGE_APPEND,
681 peak_tag_name, peak_val, NULL);
686 return (gain_tag_name != NULL || peak_tag_name != NULL);
690 parse_obsolete_tdat_frame (ID3TagsWorking * work)
692 if (work->parse_size >= 5 &&
693 work->parse_data[0] == ID3V2_ENCODING_ISO8859 &&
694 g_ascii_isdigit (work->parse_data[1]) &&
695 g_ascii_isdigit (work->parse_data[2]) &&
696 g_ascii_isdigit (work->parse_data[3]) &&
697 g_ascii_isdigit (work->parse_data[4])) {
698 work->pending_day = (10 * g_ascii_digit_value (work->parse_data[1])) +
699 g_ascii_digit_value (work->parse_data[2]);
700 work->pending_month = (10 * g_ascii_digit_value (work->parse_data[3])) +
701 g_ascii_digit_value (work->parse_data[4]);
702 GST_LOG ("date (dd/mm) %02u/%02u", work->pending_day, work->pending_month);
707 id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
708 const gchar * tag_str)
710 GType tag_type = gst_tag_get_type (tag_name);
711 GstTagList *tag_list = work->tags;
721 if (sscanf (tag_str, "%d/%d", ¤t, &total) == 2) {
723 GST_WARNING ("Ignoring invalid value for total %d in tag %s",
726 if (strcmp (tag_name, GST_TAG_TRACK_NUMBER) == 0) {
727 gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
728 GST_TAG_TRACK_COUNT, total, NULL);
729 } else if (strcmp (tag_name, GST_TAG_ALBUM_VOLUME_NUMBER) == 0) {
730 gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
731 GST_TAG_ALBUM_VOLUME_COUNT, total, NULL);
734 } else if (sscanf (tag_str, "%d", ¤t) != 1) {
735 /* Not an integer in the string */
736 GST_WARNING ("Tag string for tag %s does not contain an integer - "
737 "ignoring", tag_name);
742 GST_WARNING ("Ignoring invalid value %d in tag %s", current, tag_name);
744 gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND, tag_name, current,
753 g_assert (strcmp (tag_name, GST_TAG_DURATION) == 0);
754 tmp = strtoul (tag_str, NULL, 10);
758 gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
759 GST_TAG_DURATION, tmp * 1000 * 1000, NULL);
766 /* make sure we add each unique string only once per tag, we don't want
767 * to have the same genre in the genre list multiple times, for example,
768 * or the same DiscID in there twice just because it's contained in the
769 * tag multiple times under different TXXX user tags */
770 num = gst_tag_list_get_tag_size (tag_list, tag_name);
771 for (i = 0; i < num; ++i) {
772 val = gst_tag_list_get_value_index (tag_list, tag_name, i);
773 if (val != NULL && strcmp (g_value_get_string (val), tag_str) == 0)
777 gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
778 tag_name, tag_str, NULL);
786 GValue dest = { 0, };
788 /* Ensure that any date string is complete */
789 if (tag_type == GST_TYPE_DATE) {
790 guint year = 1901, month = 1, day = 1;
792 /* Dates can be yyyy-MM-dd, yyyy-MM or yyyy, but we need
794 if (sscanf (tag_str, "%04u-%02u-%02u", &year, &month, &day) == 0)
797 tmp = g_strdup_printf ("%04u-%02u-%02u", year, month, day);
801 /* handles anything else */
802 g_value_init (&src, G_TYPE_STRING);
803 g_value_set_string (&src, (const gchar *) tag_str);
804 g_value_init (&dest, tag_type);
806 if (g_value_transform (&src, &dest)) {
807 gst_tag_list_add_values (tag_list, GST_TAG_MERGE_APPEND,
808 tag_name, &dest, NULL);
809 } else if (tag_type == G_TYPE_DOUBLE) {
810 /* replaygain tags in TXXX frames ... */
811 g_value_set_double (&dest, g_strtod (tag_str, NULL));
812 gst_tag_list_add_values (tag_list, GST_TAG_MERGE_KEEP,
813 tag_name, &dest, NULL);
814 GST_LOG ("Converted string '%s' to double %f", tag_str,
815 g_value_get_double (&dest));
817 GST_WARNING ("Failed to transform tag from string to type '%s'",
818 g_type_name (tag_type));
821 g_value_unset (&src);
822 g_value_unset (&dest);
831 /* Check that an array of characters contains only digits */
833 id3v2_are_digits (const gchar * chars, gint size)
837 for (i = 0; i < size; i++) {
838 if (!g_ascii_isdigit (chars[i]))
845 id3v2_genre_string_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
846 const gchar * tag_str, gint len)
848 g_return_val_if_fail (tag_str != NULL, FALSE);
850 /* If it's a number, it might be a defined genre */
851 if (id3v2_are_digits (tag_str, len)) {
852 tag_str = gst_tag_id3_genre_get (strtol (tag_str, NULL, 10));
853 return id3v2_tag_to_taglist (work, tag_name, tag_str);
855 /* Otherwise it might be "RX" or "CR" */
857 if (g_ascii_strncasecmp ("rx", tag_str, len) == 0)
858 return id3v2_tag_to_taglist (work, tag_name, "Remix");
860 if (g_ascii_strncasecmp ("cr", tag_str, len) == 0)
861 return id3v2_tag_to_taglist (work, tag_name, "Cover");
864 /* Otherwise it's a string */
865 return id3v2_tag_to_taglist (work, tag_name, tag_str);
869 id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
872 gchar *tag_str = NULL;
873 gboolean result = FALSE;
876 for (i = 0; i < tag_fields->len; i++) {
879 tag_str = g_array_index (tag_fields, gchar *, i);
883 len = strlen (tag_str);
884 /* Only supposed to see '(n)' type numeric genre strings in ID3 <= 2.3.0
885 * but apparently we see them in 2.4.0 sometimes too */
886 if (TRUE || work->hdr.version <= 0x300) { /* <= 2.3.0 */
887 /* Check for genre numbers wrapped in parentheses, possibly
888 * followed by a string */
891 gboolean found = FALSE;
893 /* Double parenthesis ends the numeric genres, but we need
894 * to swallow the first one so we actually output '(' */
895 if (tag_str[0] == '(' && tag_str[1] == '(') {
901 /* If the first char is not a parenthesis, then stop
902 * looking for parenthesised genre strings */
903 if (tag_str[0] != '(')
906 for (pos = 1; pos < len; pos++) {
907 if (tag_str[pos] == ')') {
910 tmp_str = g_strndup (tag_str + 1, pos - 1);
912 id3v2_genre_string_to_taglist (work, tag_name, tmp_str,
921 /* If we encounter a non-digit while searching for a closing
922 * parenthesis, we should not try and interpret this as a
923 * numeric genre string */
924 if (!g_ascii_isdigit (tag_str[pos]))
928 break; /* There was no closing parenthesis */
932 if (len > 0 && tag_str != NULL)
933 result |= id3v2_genre_string_to_taglist (work, tag_name, tag_str, len);
938 static const gchar utf16enc[] = "UTF-16";
939 static const gchar utf16leenc[] = "UTF-16LE";
940 static const gchar utf16beenc[] = "UTF-16BE";
943 find_utf16_bom (gchar * data, const gchar ** p_in_encoding)
945 guint16 marker = (GST_READ_UINT8 (data) << 8) | GST_READ_UINT8 (data + 1);
949 *p_in_encoding = utf16leenc;
952 *p_in_encoding = utf16beenc;
961 string_utf8_dup (const gchar * start, const guint size)
967 /* Should we try the charsets specified
968 * via environment variables FIRST ? */
969 if (g_utf8_validate (start, size, NULL)) {
970 utf8 = g_strndup (start, size);
974 env = g_getenv ("GST_ID3V1_TAG_ENCODING");
975 if (!env || *env == '\0')
976 env = g_getenv ("GST_ID3_TAG_ENCODING");
977 if (!env || *env == '\0')
978 env = g_getenv ("GST_TAG_ENCODING");
980 /* Try charsets specified via the environment */
981 if (env && *env != '\0') {
984 csets = g_strsplit (env, G_SEARCHPATH_SEPARATOR_S, -1);
986 for (c = csets; c && *c; ++c) {
988 g_convert (start, size, "UTF-8", *c, &bytes_read, NULL, NULL))) {
989 if (bytes_read == size) {
990 GST_DEBUG ("Using charset %s to interperate id3 tags\n", *c);
999 /* Try current locale (if not UTF-8) */
1000 if (!g_get_charset (&env)) {
1001 if ((utf8 = g_locale_to_utf8 (start, size, &bytes_read, NULL, NULL))) {
1002 if (bytes_read == size) {
1010 /* Try ISO-8859-1 */
1012 g_convert (start, size, "UTF-8", "ISO-8859-1", &bytes_read, NULL, NULL);
1013 if (utf8 != NULL && bytes_read == size) {
1028 parse_insert_string_field (guint8 encoding, gchar * data, gint data_size,
1031 gchar *field = NULL;
1034 case ID3V2_ENCODING_UTF16:
1035 case ID3V2_ENCODING_UTF16BE:
1037 const gchar *in_encode;
1039 if (encoding == ID3V2_ENCODING_UTF16)
1040 in_encode = utf16enc;
1042 in_encode = utf16beenc;
1044 /* Sometimes we see strings with multiple BOM markers at the start.
1045 * In that case, we assume the innermost one is correct. If that fails
1046 * to produce valid UTF-8, we try the other endianness anyway */
1047 while (data_size > 2 && find_utf16_bom (data, &in_encode)) {
1048 data += 2; /* skip BOM */
1052 field = g_convert (data, data_size, "UTF-8", in_encode, NULL, NULL, NULL);
1054 if (field == NULL || g_utf8_validate (field, -1, NULL) == FALSE) {
1055 /* As a fallback, try interpreting UTF-16 in the other endianness */
1056 if (in_encode == utf16beenc)
1057 field = g_convert (data, data_size, "UTF-8", utf16leenc,
1063 case ID3V2_ENCODING_ISO8859:
1064 if (g_utf8_validate (data, data_size, NULL))
1065 field = g_strndup (data, data_size);
1067 /* field = g_convert (data, data_size, "UTF-8", "ISO-8859-1",
1068 NULL, NULL, NULL); */
1069 field = string_utf8_dup (data, data_size);
1072 field = g_strndup (data, data_size);
1077 if (g_utf8_validate (field, -1, NULL)) {
1078 g_array_append_val (fields, field);
1082 GST_DEBUG ("%s was bad UTF-8 after conversion from encoding %d. Ignoring",
1089 parse_split_strings (guint8 encoding, gchar * data, gint data_size,
1090 GArray ** out_fields)
1092 GArray *fields = g_array_new (FALSE, TRUE, sizeof (gchar *));
1096 g_return_if_fail (out_fields != NULL);
1099 case ID3V2_ENCODING_ISO8859:
1100 for (text_pos = 0; text_pos < data_size; text_pos++) {
1101 if (data[text_pos] == 0) {
1102 parse_insert_string_field (encoding, data + prev,
1103 text_pos - prev + 1, fields);
1104 prev = text_pos + 1;
1107 if (data_size - prev > 0 && data[prev] != 0x00) {
1108 parse_insert_string_field (encoding, data + prev,
1109 data_size - prev, fields);
1113 case ID3V2_ENCODING_UTF8:
1114 for (prev = 0, text_pos = 0; text_pos < data_size; text_pos++) {
1115 if (data[text_pos] == '\0') {
1116 parse_insert_string_field (encoding, data + prev,
1117 text_pos - prev + 1, fields);
1118 prev = text_pos + 1;
1121 if (data_size - prev > 0 && data[prev] != 0x00) {
1122 parse_insert_string_field (encoding, data + prev,
1123 data_size - prev, fields);
1126 case ID3V2_ENCODING_UTF16:
1127 case ID3V2_ENCODING_UTF16BE:
1129 /* Find '\0\0' terminator */
1130 for (text_pos = 0; text_pos < data_size - 1; text_pos += 2) {
1131 if (data[text_pos] == '\0' && data[text_pos + 1] == '\0') {
1132 /* found a delimiter */
1133 parse_insert_string_field (encoding, data + prev,
1134 text_pos - prev + 2, fields);
1135 text_pos++; /* Advance to the 2nd NULL terminator */
1136 prev = text_pos + 1;
1140 if (data_size - prev > 1 &&
1141 (data[prev] != 0x00 || data[prev + 1] != 0x00)) {
1142 /* There were 2 or more non-null chars left, convert those too */
1143 parse_insert_string_field (encoding, data + prev,
1144 data_size - prev, fields);
1149 if (fields->len > 0)
1150 *out_fields = fields;
1152 g_array_free (fields, TRUE);
1156 free_tag_strings (GArray * fields)
1162 for (i = 0; i < fields->len; i++) {
1163 c = g_array_index (fields, gchar *, i);
1166 g_array_free (fields, TRUE);