add assert and comment explaining why non-utf-8-aware hack is safe.

This commit is contained in:
Eric House 2013-04-18 07:15:14 -07:00
parent cdcfef4e18
commit e4f52c5678
2 changed files with 5 additions and 2 deletions

View file

@ -190,8 +190,10 @@ andLoadSpecialData( AndDictionaryCtxt* ctxt, XP_U8 const** ptrp,
text[txtlen] = '\0';
XP_ASSERT( *facep < nSpecials ); /* firing */
/* 0 is never part of a multi-byte utf8 char, so this little hack
is safe */
/* This little hack is safe because all bytes but the first in a
multi-byte utf-8 char have the high bit set. SYNONYM_DELIM
does not have its high bit set */
XP_ASSERT( 0 == (SYNONYM_DELIM & 0x80) );
for ( ; '\0' != *text; ++text ) {
if ( *text == SYNONYM_DELIM ) {
*text = '\0';

View file

@ -181,6 +181,7 @@ skipBitmaps( LinuxDictionaryCtxt* ctxt, const XP_U8** ptrp )
/* Now replace the delimiter char with \0. It must be one byte in
length and of course equal to the delimiter */
XP_ASSERT( 0 == (SYNONYM_DELIM & 0x80) );
while ( '\0' != *text ) {
XP_UCHAR* cp = g_utf8_offset_to_pointer( text, 1 );
if ( 1 == (cp - text) && *text == SYNONYM_DELIM ) {