From 601592a866270dac88b747d0f2177b2727bd2a29 Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@googlemail.com>
Date: Fri, 14 Sep 2012 18:19:15 +0200
Subject: [PATCH] unicode: update comments

Update all the introduction and documentation comments. Also remove an old
TODO item regarding glib.

Signed-off-by: David Herrmann <dh.herrmann@googlemail.com>
---
 src/unicode.c | 54 ++++++++++++++++++++++++++++++++++++++----------------
 src/unicode.h | 23 +++++------------------
 2 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/src/unicode.c b/src/unicode.c
index 3fe6d99..0b14915 100644
--- a/src/unicode.c
+++ b/src/unicode.c
@@ -25,7 +25,7 @@
  */
 
 /*
- * This kmscon-utf8-state-machine is based on the wayland-compositor demos:
+ * The tsm-utf8-state-machine is based on the wayland-compositor demos:
  *
  * Copyright Â© 2008 Kristian HÃ¸gsberg
  *
@@ -49,23 +49,11 @@
  */
 
 /*
- * Unicode Handling
- * Main implementation of the symbol datatype. The symbol table contains two-way
- * references. The Hash Table contains all the symbols with the symbol ucs4
- * string as key and the symbol ID as value.
- * The index array contains the symbol ID as key and a pointer to the ucs4
- * string as value. But the hash table owns the ucs4 string.
- * This allows fast implementations of *_get() and *_append() without long
- * search intervals.
- *
- * When creating a new symbol, we simply return the UCS4 value as new symbol. We
- * do not add it to our symbol table as it is only one character. However, if a
- * character is appended to an existing symbol, we create a new ucs4 string and
- * push the new symbol into the symbol table.
+ * Unicode Helpers
+ * This implements several helpers for Unicode/UTF8/UCS4 input and output. See
+ * below for comments on each helper.
  */
 
-/* TODO: Remove the glib dependencies */
-
 #include <errno.h>
 #include <inttypes.h>
 #include <pthread.h>
@@ -77,6 +65,40 @@
 
 #define LOG_SUBSYSTEM "unicode"
 
+/*
+ * Unicode Symbol Handling
+ * The main goal of the kmscon_symbol_* functions is to provide a datatype which
+ * can contain the representation of any printable character. This includes all
+ * basic Unicode characters but also combined characters.
+ * To avoid all the memory management we still represent a character as a single
+ * integer value (kmscon_symbol_t) but internally we allocate a string which is
+ * represented by this value.
+ *
+ * A kmscon_symbol_t is an integer which represents a single character point.
+ * For most Unicode characters this is simply the UCS4 representation. In fact,
+ * every UCS4 characters is a valid kmscon_symbol_t object.
+ * However, Unicode standard allows combining marks. Therefore, some characters
+ * consists of more than one Unicode character.
+ * A global symbol-table provides all those combined characters as single
+ * integers. You simply create a valid base character and append your combining
+ * marks and the table will return a new valid kmscon_symbol_t. It is no longer
+ * a valid UCS4 value, though. But no memory management is needed as all
+ * kmscon_symbol_t objects are simple integers.
+ *
+ * The symbol table contains two-way
+ * references. The Hash Table contains all the symbols with the symbol ucs4
+ * string as key and the symbol ID as value.
+ * The index array contains the symbol ID as key and a pointer to the ucs4
+ * string as value. But the hash table owns the ucs4 string.
+ * This allows fast implementations of *_get() and *_append() without long
+ * search intervals.
+ *
+ * When creating a new symbol, we simply return the UCS4 value as new symbol. We
+ * do not add it to our symbol table as it is only one character. However, if a
+ * character is appended to an existing symbol, we create a new ucs4 string and
+ * push the new symbol into the symbol table.
+ */
+
 #define KMSCON_UCS4_MAXLEN 10
 #define KMSCON_UCS4_MAX 0x7fffffffUL
 #define KMSCON_UCS4_INVALID 0xfffd
diff --git a/src/unicode.h b/src/unicode.h
index 3b69e64..84312ca 100644
--- a/src/unicode.h
+++ b/src/unicode.h
@@ -25,24 +25,9 @@
  */
 
 /*
- * Unicode Handling
- * The main goal of the kmscon_symbol_* functions is to provide a datatype which
- * can contain the representation of any printable character. This includes all
- * basic Unicode characters but also combined characters.
- * To avoid all the memory management we still represent a character as a single
- * integer value (kmscon_symbol_t) but internally we allocate a string which is
- * represented by this value.
- *
- * A kmscon_symbol_t is an integer which represents a single character point.
- * For most Unicode characters this is simply the UCS4 representation. In fact,
- * every UCS4 characters is a valid kmscon_symbol_t object.
- * However, Unicode standard allows combining marks. Therefore, some characters
- * consists of more than one Unicode character.
- * A global symbol-table provides all those combined characters as single
- * integers. You simply create a valid base character and append your combining
- * marks and the table will return a new valid kmscon_symbol_t. It is no longer
- * a valid UCS4 value, though. But no memory management is needed as all
- * kmscon_symbol_t objects are simple integers.
+ * Unicode Helpers
+ * This file provides small helpers to make working with Unicode/UTF8/UCS4 input
+ * and output much easier.
  */
 
 #ifndef KMSCON_UNICODE_H
@@ -51,6 +36,8 @@
 #include <inttypes.h>
 #include <stdlib.h>
 
+/* UCS4 helpers */
+
 #define TSM_UCS4_MAX (0x7fffffffUL)
 #define TSM_UCS4_INVALID (TSM_UCS4_MAX + 1)
 #define TSM_UCS4_REPLACEMENT (0xfffdUL)
-- 
2.7.4