consider dual width fonts in output (bnc#766443)

author Michael Andres <ma@suse.de>

Fri, 22 Jun 2012 13:53:12 +0000 (15:53 +0200)

committer Michael Andres <ma@suse.de>

Fri, 22 Jun 2012 13:53:12 +0000 (15:53 +0200)
author Michael Andres <ma@suse.de>
Fri, 22 Jun 2012 13:53:12 +0000 (15:53 +0200)
committer Michael Andres <ma@suse.de>
Fri, 22 Jun 2012 13:53:12 +0000 (15:53 +0200)
diff --git a/src/output/Utf8.h b/src/output/Utf8.h

index 7635a04..116085e 100644 (file)
--- a/src/output/Utf8.h
+++ b/src/output/Utf8.h
@@ -1,6 +1,10 @@
  #ifndef UTF8_H_
  #define UTF8_H_
  
+#include <cstdlib>
+#include <cstring>
+#include <wchar.h>
+
  #include <iostream>
  #include <string>
  
@@ -31,6 +35,33 @@ namespace utf8
      /** utf8 size */
      size_type size() const
      {
+      // test for locales using dual width fonts:
+      static bool isCJK = []()->bool {
+       const char * lang = ::getenv( "LANG" );
+       return ( lang && ( !strncmp( lang, "zh", 2 )
+                       || !strncmp( lang, "ko", 2 )
+                       || !strncmp( lang, "ja", 2 ) ) );
+      }();
+
+      if ( isCJK )
+      {
+       // this should actually be correct for ALL locales:
+       size_type len = 0;
+       const char *s = _str.c_str();
+       for ( size_type slen = _str.size(); slen > 0; )
+       {
+         wchar_t wc;
+         size_t bytes = mbrtowc( &wc, s, slen, NULL );
+         if ( bytes <= 0 )
+           break;
+         len += wcwidth( wc );
+         slen -= bytes;
+         s += bytes;
+       }
+       return len;
+      }
+
+      // NON CJK: faster and hopefully accurate enough:
        // simply do not count continuation bytes '10xxxxxx'
        size_type ret = _str.size();
        for ( auto ch : _str )
author	Michael Andres <ma@suse.de>
	Fri, 22 Jun 2012 13:53:12 +0000 (15:53 +0200)
committer	Michael Andres <ma@suse.de>
	Fri, 22 Jun 2012 13:53:12 +0000 (15:53 +0200)