From 601592a866270dac88b747d0f2177b2727bd2a29 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Fri, 14 Sep 2012 18:19:15 +0200 Subject: [PATCH] unicode: update comments Update all the introduction and documentation comments. Also remove an old TODO item regarding glib. Signed-off-by: David Herrmann --- src/unicode.c | 54 ++++++++++++++++++++++++++++++++++++++---------------- src/unicode.h | 23 +++++------------------ 2 files changed, 43 insertions(+), 34 deletions(-) diff --git a/src/unicode.c b/src/unicode.c index 3fe6d99..0b14915 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -25,7 +25,7 @@ */ /* - * This kmscon-utf8-state-machine is based on the wayland-compositor demos: + * The tsm-utf8-state-machine is based on the wayland-compositor demos: * * Copyright © 2008 Kristian Høgsberg * @@ -49,23 +49,11 @@ */ /* - * Unicode Handling - * Main implementation of the symbol datatype. The symbol table contains two-way - * references. The Hash Table contains all the symbols with the symbol ucs4 - * string as key and the symbol ID as value. - * The index array contains the symbol ID as key and a pointer to the ucs4 - * string as value. But the hash table owns the ucs4 string. - * This allows fast implementations of *_get() and *_append() without long - * search intervals. - * - * When creating a new symbol, we simply return the UCS4 value as new symbol. We - * do not add it to our symbol table as it is only one character. However, if a - * character is appended to an existing symbol, we create a new ucs4 string and - * push the new symbol into the symbol table. + * Unicode Helpers + * This implements several helpers for Unicode/UTF8/UCS4 input and output. See + * below for comments on each helper. */ -/* TODO: Remove the glib dependencies */ - #include #include #include @@ -77,6 +65,40 @@ #define LOG_SUBSYSTEM "unicode" +/* + * Unicode Symbol Handling + * The main goal of the kmscon_symbol_* functions is to provide a datatype which + * can contain the representation of any printable character. This includes all + * basic Unicode characters but also combined characters. + * To avoid all the memory management we still represent a character as a single + * integer value (kmscon_symbol_t) but internally we allocate a string which is + * represented by this value. + * + * A kmscon_symbol_t is an integer which represents a single character point. + * For most Unicode characters this is simply the UCS4 representation. In fact, + * every UCS4 characters is a valid kmscon_symbol_t object. + * However, Unicode standard allows combining marks. Therefore, some characters + * consists of more than one Unicode character. + * A global symbol-table provides all those combined characters as single + * integers. You simply create a valid base character and append your combining + * marks and the table will return a new valid kmscon_symbol_t. It is no longer + * a valid UCS4 value, though. But no memory management is needed as all + * kmscon_symbol_t objects are simple integers. + * + * The symbol table contains two-way + * references. The Hash Table contains all the symbols with the symbol ucs4 + * string as key and the symbol ID as value. + * The index array contains the symbol ID as key and a pointer to the ucs4 + * string as value. But the hash table owns the ucs4 string. + * This allows fast implementations of *_get() and *_append() without long + * search intervals. + * + * When creating a new symbol, we simply return the UCS4 value as new symbol. We + * do not add it to our symbol table as it is only one character. However, if a + * character is appended to an existing symbol, we create a new ucs4 string and + * push the new symbol into the symbol table. + */ + #define KMSCON_UCS4_MAXLEN 10 #define KMSCON_UCS4_MAX 0x7fffffffUL #define KMSCON_UCS4_INVALID 0xfffd diff --git a/src/unicode.h b/src/unicode.h index 3b69e64..84312ca 100644 --- a/src/unicode.h +++ b/src/unicode.h @@ -25,24 +25,9 @@ */ /* - * Unicode Handling - * The main goal of the kmscon_symbol_* functions is to provide a datatype which - * can contain the representation of any printable character. This includes all - * basic Unicode characters but also combined characters. - * To avoid all the memory management we still represent a character as a single - * integer value (kmscon_symbol_t) but internally we allocate a string which is - * represented by this value. - * - * A kmscon_symbol_t is an integer which represents a single character point. - * For most Unicode characters this is simply the UCS4 representation. In fact, - * every UCS4 characters is a valid kmscon_symbol_t object. - * However, Unicode standard allows combining marks. Therefore, some characters - * consists of more than one Unicode character. - * A global symbol-table provides all those combined characters as single - * integers. You simply create a valid base character and append your combining - * marks and the table will return a new valid kmscon_symbol_t. It is no longer - * a valid UCS4 value, though. But no memory management is needed as all - * kmscon_symbol_t objects are simple integers. + * Unicode Helpers + * This file provides small helpers to make working with Unicode/UTF8/UCS4 input + * and output much easier. */ #ifndef KMSCON_UNICODE_H @@ -51,6 +36,8 @@ #include #include +/* UCS4 helpers */ + #define TSM_UCS4_MAX (0x7fffffffUL) #define TSM_UCS4_INVALID (TSM_UCS4_MAX + 1) #define TSM_UCS4_REPLACEMENT (0xfffdUL) -- 2.7.4