#endif
#include <string.h>
+#include <stdlib.h>
#include <gst/tag/tag.h>
#ifdef HAVE_ZLIB
#define GST_CAT_DEFAULT (id3demux_debug)
static gchar *parse_comment_frame (ID3TagsWorking * work);
-static gchar *parse_text_identification_frame (ID3TagsWorking * work);
+static GArray *parse_text_identification_frame (ID3TagsWorking * work);
static gboolean id3v2_tag_to_taglist (ID3TagsWorking * work,
- const gchar * tag_name, gchar * tag_str);
-static void parse_split_strings (ID3TagsWorking * work, guint8 encoding,
- gchar ** field1, gchar ** field2);
+ const gchar * tag_name, const gchar * tag_str);
+/* Parse a single string into an array of gchar* */
+static void parse_split_strings (guint8 encoding, gchar * data, gint data_size,
+ GArray ** out_fields);
+static void free_tag_strings (GArray * fields);
+static gboolean
+id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
+ GArray * tag_fields);
#define ID3V2_ENCODING_ISO8859 0x00
#define ID3V2_ENCODING_UTF16 0x01
guint8 *frame_data = work->hdr.frame_data;
guint frame_data_size = work->cur_frame_size;
gchar *tag_str = NULL;
+ GArray *tag_fields = NULL;
/* Check that the frame id is valid */
for (i = 0; i < 5 && work->frame_id[i] != '\0'; i++) {
if (work->frame_id[0] == 'T') {
if (strcmp (work->frame_id, "TXXX") != 0) {
/* Text identification frame */
- tag_str = parse_text_identification_frame (work);
+ tag_fields = parse_text_identification_frame (work);
} else {
/* Handle user text frame */
}
result = id3v2_tag_to_taglist (work, tag_name, tag_str);
g_free (tag_str);
}
+ if (tag_fields != NULL) {
+ if (strcmp (work->frame_id, "TCON") == 0) {
+ /* Genre strings need special treatment */
+ result |= id3v2_genre_fields_to_taglist (work, tag_name, tag_fields);
+ } else {
+ tag_str = g_array_index (tag_fields, gchar *, 0);
+ result |= id3v2_tag_to_taglist (work, tag_name, tag_str);
+ }
+ free_tag_strings (tag_fields);
+ }
return result;
}
{
guint8 encoding;
gchar language[4];
- gchar *description = NULL;
- gchar *text = NULL;
+ GArray *fields = NULL;
gchar *out_str = NULL;
+ gchar *description, *text;
if (work->parse_size < 6)
return NULL;
language[2] = work->parse_data[3];
language[3] = 0;
- parse_split_strings (work, encoding, &description, &text);
+ parse_split_strings (encoding, (gchar *) work->parse_data + 4,
+ work->parse_size - 4, &fields);
- if (text == NULL || description == NULL) {
+ if (fields == NULL || fields->len < 2) {
GST_WARNING ("Failed to decode comment frame");
goto fail;
}
+ description = g_array_index (fields, gchar *, 0);
+ text = g_array_index (fields, gchar *, 1);
if (!g_utf8_validate (text, -1, NULL)) {
GST_WARNING ("Converted string is not valid utf-8");
}
fail:
- g_free (description);
- g_free (text);
+ free_tag_strings (fields);
return out_str;
}
-static gchar *
+static GArray *
parse_text_identification_frame (ID3TagsWorking * work)
{
guchar encoding;
- gchar *text = NULL;
+ GArray *fields = NULL;
if (work->parse_size < 2)
return NULL;
encoding = work->parse_data[0];
+ parse_split_strings (encoding, (gchar *) work->parse_data + 1,
+ work->parse_size - 1, &fields);
- switch (encoding) {
- case ID3V2_ENCODING_ISO8859:
- text = g_convert ((gchar *) (work->parse_data + 1),
- work->parse_size - 1, "UTF-8", "ISO-8859-1", NULL, NULL, NULL);
- break;
- case ID3V2_ENCODING_UTF8:
- text = g_strndup ((gchar *) (work->parse_data + 1), work->parse_size - 1);
- break;
- case ID3V2_ENCODING_UTF16:
- text = g_convert ((gchar *) (work->parse_data + 1),
- work->parse_size - 1, "UTF-8", "UTF-16", NULL, NULL, NULL);
- break;
- case ID3V2_ENCODING_UTF16BE:
- text = g_convert ((gchar *) (work->parse_data + 1),
- work->parse_size - 1, "UTF-8", "UTF-16BE", NULL, NULL, NULL);
- break;
- }
-
- if (text != NULL && !g_utf8_validate (text, -1, NULL)) {
- GST_WARNING ("Converted string is not valid utf-8");
- g_free (text);
- text = NULL;
- }
-
- return text;
+ return fields;
}
static gboolean
id3v2_tag_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
- gchar * tag_str)
+ const gchar * tag_str)
{
GType tag_type = gst_tag_get_type (tag_name);
GstTagList *tag_list = work->tags;
tmp = strtoul ((char *) tag_str, &check, 10);
- if (strcmp (tag_name, GST_TAG_DATE) == 0) {
- GDate *d;
-
- if (*check != '\0')
- break;
- if (tmp == 0)
- break;
- d = g_date_new_dmy (1, 1, tmp);
- tmp = g_date_get_julian (d);
- g_date_free (d);
- } else if (strcmp (tag_name, GST_TAG_TRACK_NUMBER) == 0) {
+ if (strcmp (tag_name, GST_TAG_TRACK_NUMBER) == 0) {
if (*check == '/') {
guint total;
guint64 tmp;
g_assert (strcmp (tag_name, GST_TAG_DURATION) == 0);
- tmp = strtoul ((char *) tag_str, NULL, 10);
+ tmp = strtoul (tag_str, NULL, 10);
if (tmp == 0) {
break;
}
break;
}
case G_TYPE_STRING:{
+ if (!strcmp (tag_name, GST_TAG_GENRE)) {
+ if (work->prev_genre && !strcmp (tag_str, work->prev_genre))
+ break; /* Same as the last genre */
+ g_free (work->prev_genre);
+ work->prev_genre = g_strdup (tag_str);
+ }
gst_tag_list_add (tag_list, GST_TAG_MERGE_APPEND,
- tag_name, (const gchar *) tag_str, NULL);
+ tag_name, tag_str, NULL);
break;
}
- /* handles GST_TYPE_DATE and anything else */
+
default:{
+ gchar *tmp = NULL;
+
+ if (tag_type == GST_TYPE_DATE) {
+ guint year = 1901, month = 1, day = 1;
+
+ /* Dates can be yyyy-MM-dd, yyyy-MM or yyyy, but we need
+ * the first type */
+ if (sscanf (tag_str, "%04u-%02u-%02u", &year, &month, &day) == 0)
+ break;
+
+ tmp = g_strdup_printf ("%04u-%02u-%02u", year, month, day);
+ tag_str = tmp;
+ break;
+ }
+
+ /* handles anything else */
GValue src = { 0, };
GValue dest = { 0, };
g_value_init (&src, G_TYPE_STRING);
g_value_set_string (&src, (const gchar *) tag_str);
-
g_value_init (&dest, tag_type);
+
if (g_value_transform (&src, &dest)) {
gst_tag_list_add_values (tag_list, GST_TAG_MERGE_APPEND,
tag_name, &dest, NULL);
GST_WARNING ("Failed to transform tag from string to type '%s'",
g_type_name (tag_type));
}
+
g_value_unset (&src);
g_value_unset (&dest);
+ g_free (tmp);
break;
}
}
return TRUE;
}
+/* Check that an array of characters contains only digits */
+static gboolean
+id3v2_are_digits (const gchar * chars, gint size)
+{
+ gint i;
+
+ for (i = 0; i < size; i++) {
+ if (!g_ascii_isdigit (chars[i]))
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static gboolean
+id3v2_genre_string_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
+ const gchar * tag_str, gint len)
+{
+ g_return_val_if_fail (tag_str != NULL, FALSE);
+
+ /* If it's a number, it might be a defined genre */
+ if (id3v2_are_digits (tag_str, len)) {
+ tag_str = gst_tag_id3_genre_get (strtol (tag_str, NULL, 10));
+ if (tag_str != NULL)
+ return id3v2_tag_to_taglist (work, tag_name, tag_str);
+ }
+ /* Otherwise it might be "RX" or "CR" */
+ if (len == 2) {
+ if (g_ascii_strncasecmp ("rx", tag_str, len) == 0)
+ return id3v2_tag_to_taglist (work, tag_name, "Remix");
+
+ if (g_ascii_strncasecmp ("cr", tag_str, len) == 0)
+ return id3v2_tag_to_taglist (work, tag_name, "Cover");
+ }
+
+ /* Otherwise it's a string */
+ return id3v2_tag_to_taglist (work, tag_name, tag_str);
+}
+
+static gboolean
+id3v2_genre_fields_to_taglist (ID3TagsWorking * work, const gchar * tag_name,
+ GArray * tag_fields)
+{
+ gchar *tag_str = NULL;
+ gboolean result = FALSE;
+ gint i;
+
+ for (i = 0; i < tag_fields->len; i++) {
+ gint len;
+
+ tag_str = g_array_index (tag_fields, gchar *, 0);
+ if (tag_str == NULL)
+ continue;
+
+ len = strlen (tag_str);
+ if (work->hdr.version <= 0x300) { /* <= 2.3.0 */
+ /* Check for genre numbers wrapped in parentheses, possibly
+ * followed by a string */
+ while (len >= 2) {
+ gint pos;
+ gboolean found = FALSE;
+
+ /* Double parenthesis ends the numeric genres */
+ if (tag_str[0] == '(' && tag_str[1] == '(')
+ break;
+
+ for (pos = 1; pos < len; pos++) {
+ if (tag_str[pos] == ')') {
+ gchar *tmp_str;
+
+ tmp_str = g_strndup (tag_str + 1, pos - 1);
+ result |=
+ id3v2_genre_string_to_taglist (work, tag_name, tmp_str,
+ pos - 1);
+ g_free (tmp_str);
+ tag_str += pos + 1;
+ len -= pos + 1;
+ found = TRUE;
+ break;
+ }
+ }
+ if (!found)
+ break; /* There was no closing parenthesis */
+ }
+ }
+
+ if (len > 0)
+ result |= id3v2_genre_string_to_taglist (work, tag_name, tag_str, len);
+ }
+ return result;
+}
+
static void
-parse_split_strings (ID3TagsWorking * work, guint8 encoding,
- gchar ** field1, gchar ** field2)
+parse_split_strings (guint8 encoding, gchar * data, gint data_size,
+ GArray ** out_fields)
{
- guint text_pos;
+ GArray *fields = g_array_new (FALSE, TRUE, sizeof (gchar *));
+ gchar *field;
+ gint text_pos;
+ gint prev = 0;
- *field1 = *field2 = NULL;
+ g_return_if_fail (out_fields != NULL);
switch (encoding) {
case ID3V2_ENCODING_ISO8859:
- for (text_pos = 4; text_pos < work->parse_size - 5; text_pos++) {
- if (work->parse_data[text_pos] == 0) {
- *field1 = g_convert ((gchar *) (work->parse_data + 4),
- text_pos - 4, "UTF-8", "ISO-8859-1", NULL, NULL, NULL);
- *field2 = g_convert ((gchar *) (work->parse_data + text_pos + 5),
- work->parse_size - text_pos - 5,
+ for (text_pos = 0; text_pos < data_size; text_pos++) {
+ if (data[text_pos] == 0) {
+ field = g_convert (data + prev, text_pos - prev + 1,
"UTF-8", "ISO-8859-1", NULL, NULL, NULL);
- break;
+ if (field)
+ g_array_append_val (fields, field);
+ prev = text_pos + 1;
}
}
+ if (data_size - prev > 0 && data[prev] != 0x00) {
+ field = g_convert (data + prev, data_size - prev,
+ "UTF-8", "ISO-8859-1", NULL, NULL, NULL);
+ if (field)
+ g_array_append_val (fields, field);
+ }
+
break;
case ID3V2_ENCODING_UTF8:
- *field1 = g_strndup ((gchar *) (work->parse_data + 4),
- work->parse_size - 4);
- text_pos = 4 + strlen (*field1) + 1; /* Offset by one more for the null */
- if (text_pos < work->parse_size) {
- *field2 = g_strndup ((gchar *) (work->parse_data + text_pos),
- work->parse_size - text_pos);
+ for (prev = 0, text_pos = 0; text_pos < data_size; text_pos++) {
+ if (data[text_pos]) {
+ field = g_strndup (data + prev, text_pos - prev + 1);
+ if (field)
+ g_array_append_val (fields, field);
+ prev = text_pos + 1;
+ }
+ }
+ if (data_size - prev > 0 && data[prev] != 0x00) {
+ field = g_strndup (data + prev, data_size - prev);
+ if (field)
+ g_array_append_val (fields, field);
}
break;
case ID3V2_ENCODING_UTF16:
case ID3V2_ENCODING_UTF16BE:
{
/* Find '\0\0' terminator */
- for (text_pos = 4; text_pos < work->parse_size - 6; text_pos++) {
- if (work->parse_data[text_pos] == 0 &&
- work->parse_data[text_pos + 1] == 0) {
- /* found our delimiter */
+ for (text_pos = 0; text_pos < data_size - 1; text_pos += 2) {
+ if (data[text_pos] == 0 && data[text_pos + 1] == 0) {
+ /* found a delimiter */
if (encoding == ID3V2_ENCODING_UTF16) {
- *field1 = g_convert ((gchar *) (work->parse_data + 4),
- text_pos - 4, "UTF-8", "UTF-16", NULL, NULL, NULL);
- *field2 = g_convert ((gchar *) (work->parse_data + text_pos + 6),
- work->parse_size - text_pos - 6,
+ field = g_convert (data + prev, text_pos - prev + 2,
"UTF-8", "UTF-16", NULL, NULL, NULL);
} else {
- *field1 = g_convert ((gchar *) (work->parse_data + 4),
- text_pos - 4, "UTF-8", "UTF-16BE", NULL, NULL, NULL);
- *field2 = g_convert ((gchar *) (work->parse_data + text_pos + 6),
- work->parse_size - text_pos - 6,
+ field = g_convert (data + prev, text_pos - prev + 2,
"UTF-8", "UTF-16BE", NULL, NULL, NULL);
}
+ if (field)
+ g_array_append_val (fields, field);
+ text_pos++; /* Advance to the 2nd NULL terminator */
+ prev = text_pos + 1;
break;
}
}
+ if (data_size - prev > 1 &&
+ (data[prev] != 0x00 || data[prev + 1] != 0x00)) {
+ /* There were 2 or more non-null chars left, convert those too */
+ if (encoding == ID3V2_ENCODING_UTF16) {
+ field = g_convert (data + prev, data_size - prev,
+ "UTF-8", "UTF-16", NULL, NULL, NULL);
+ } else {
+ field = g_convert (data + prev, data_size - prev,
+ "UTF-8", "UTF-16BE", NULL, NULL, NULL);
+ }
+ if (field)
+ g_array_append_val (fields, field);
+ }
break;
}
}
+ if (fields->len > 0)
+ *out_fields = fields;
+ else
+ g_array_free (fields, TRUE);
+}
+
+static void
+free_tag_strings (GArray * fields)
+{
+ if (fields) {
+ gint i;
+ gchar *c;
+
+ for (i = 0; i < fields->len; i++) {
+ c = g_array_index (fields, gchar *, i);
+ g_free (c);
+ }
+ g_array_free (fields, TRUE);
+ }
}