2021-02-02 05:13:25 +01:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
|
|
|
|
#include "wasmdict.h"
|
2021-02-08 01:31:32 +01:00
|
|
|
#include "dictnryp.h"
|
2021-02-02 05:13:25 +01:00
|
|
|
#include "strutils.h"
|
2021-02-08 01:31:32 +01:00
|
|
|
#include "dictmgr.h"
|
2021-02-02 05:13:25 +01:00
|
|
|
|
|
|
|
typedef struct _WasmDictionaryCtxt {
|
|
|
|
DictionaryCtxt super;
|
2021-02-08 01:31:32 +01:00
|
|
|
Globals* globals;
|
2021-02-02 05:13:25 +01:00
|
|
|
size_t dictLength;
|
|
|
|
XP_U8* dictBase;
|
|
|
|
XP_Bool useMMap;
|
|
|
|
} WasmDictionaryCtxt;
|
|
|
|
|
2021-02-08 01:31:32 +01:00
|
|
|
static const XP_UCHAR*
|
|
|
|
getShortName( const DictionaryCtxt* dict )
|
|
|
|
{
|
|
|
|
const XP_UCHAR* full = dict_getName( dict );
|
|
|
|
const XP_UCHAR* ch = strchr( full, '/' );
|
|
|
|
if ( !!ch ) {
|
|
|
|
++ch;
|
|
|
|
} else {
|
|
|
|
ch = full;
|
|
|
|
}
|
|
|
|
return ch;
|
|
|
|
}
|
|
|
|
|
2021-02-02 05:13:25 +01:00
|
|
|
static XP_Bool
|
2021-02-08 01:31:32 +01:00
|
|
|
initFromDictFile( WasmDictionaryCtxt* dctx, const char* fileName )
|
2021-02-02 05:13:25 +01:00
|
|
|
{
|
|
|
|
XP_Bool formatOk = XP_TRUE;
|
2021-02-08 01:31:32 +01:00
|
|
|
size_t dictLength;
|
2021-02-02 05:13:25 +01:00
|
|
|
XP_U32 topOffset;
|
2021-02-08 01:31:32 +01:00
|
|
|
char path[256];
|
2021-02-02 05:13:25 +01:00
|
|
|
|
2021-02-08 01:31:32 +01:00
|
|
|
if ( !!fileName ) {
|
|
|
|
snprintf( path, VSIZE(path), "%s", fileName );
|
|
|
|
} else { // if ( !getDictPath( params, fileName, path, VSIZE(path) ) ) {
|
2021-02-02 05:13:25 +01:00
|
|
|
XP_LOGF( "%s: path=%s", __func__, path );
|
|
|
|
goto closeAndExit;
|
|
|
|
}
|
2021-02-08 01:31:32 +01:00
|
|
|
struct stat statbuf;
|
|
|
|
if ( 0 != stat( path, &statbuf ) || 0 == statbuf.st_size ) {
|
|
|
|
goto closeAndExit;
|
|
|
|
}
|
2021-02-02 05:13:25 +01:00
|
|
|
dctx->dictLength = statbuf.st_size;
|
|
|
|
|
|
|
|
{
|
|
|
|
FILE* dictF = fopen( path, "r" );
|
|
|
|
XP_ASSERT( !!dictF );
|
|
|
|
if ( dctx->useMMap ) {
|
|
|
|
dctx->dictBase = mmap( NULL, dctx->dictLength, PROT_READ,
|
|
|
|
MAP_PRIVATE, fileno(dictF), 0 );
|
|
|
|
} else {
|
|
|
|
dctx->dictBase = XP_MALLOC( dctx->super.mpool, dctx->dictLength );
|
|
|
|
if ( dctx->dictLength != fread( dctx->dictBase, 1,
|
|
|
|
dctx->dictLength, dictF ) ) {
|
|
|
|
XP_ASSERT( 0 );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fclose( dictF );
|
|
|
|
}
|
|
|
|
|
|
|
|
const XP_U8* ptr = dctx->dictBase;
|
|
|
|
const XP_U8* end = ptr + dctx->dictLength;
|
|
|
|
formatOk = parseCommon( &dctx->super, NULL, &ptr, end );
|
|
|
|
/* && loadSpecialData( &dctx->super, &ptr, end ); */
|
|
|
|
|
|
|
|
if ( formatOk ) {
|
|
|
|
size_t curPos = ptr - dctx->dictBase;
|
2021-02-08 01:31:32 +01:00
|
|
|
dictLength = dctx->dictLength - curPos;
|
2021-02-02 05:13:25 +01:00
|
|
|
|
|
|
|
if ( dictLength > 0 ) {
|
|
|
|
memcpy( &topOffset, ptr, sizeof(topOffset) );
|
|
|
|
/* it's in big-endian order */
|
|
|
|
topOffset = ntohl(topOffset);
|
|
|
|
dictLength -= sizeof(topOffset); /* first four bytes are offset */
|
|
|
|
ptr += sizeof(topOffset);
|
|
|
|
}
|
|
|
|
|
|
|
|
XP_U32 numEdges;
|
|
|
|
if ( dictLength > 0 ) {
|
|
|
|
numEdges = dictLength / dctx->super.nodeSize;
|
|
|
|
#ifdef DEBUG
|
|
|
|
XP_ASSERT( (dictLength % dctx->super.nodeSize) == 0 );
|
|
|
|
dctx->super.numEdges = numEdges;
|
|
|
|
#endif
|
|
|
|
dctx->super.base = (array_edge*)ptr;
|
|
|
|
|
|
|
|
dctx->super.topEdge = dctx->super.base + topOffset;
|
|
|
|
} else {
|
|
|
|
dctx->super.base = NULL;
|
|
|
|
dctx->super.topEdge = NULL;
|
|
|
|
numEdges = 0;
|
|
|
|
}
|
|
|
|
|
2021-02-08 01:31:32 +01:00
|
|
|
dctx->super.name = copyString( dctx->super.mpool, fileName );
|
2021-02-02 05:13:25 +01:00
|
|
|
|
|
|
|
if ( ! checkSanity( &dctx->super, numEdges ) ) {
|
|
|
|
goto closeAndExit;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
goto ok;
|
|
|
|
|
|
|
|
closeAndExit:
|
|
|
|
formatOk = XP_FALSE;
|
|
|
|
ok:
|
|
|
|
|
|
|
|
return formatOk;
|
|
|
|
} /* initFromDictFile */
|
|
|
|
|
2021-02-08 01:31:32 +01:00
|
|
|
static void
|
|
|
|
freeSpecials( WasmDictionaryCtxt* ctxt )
|
|
|
|
{
|
|
|
|
XP_U16 nSpecials = 0;
|
|
|
|
|
|
|
|
for ( XP_U16 ii = 0; ii < ctxt->super.nFaces; ++ii ) {
|
|
|
|
if ( IS_SPECIAL(ctxt->super.facePtrs[ii][0] ) ) {
|
|
|
|
if ( !!ctxt->super.bitmaps ) {
|
|
|
|
XP_Bitmap* bmp = ctxt->super.bitmaps[nSpecials].largeBM;
|
|
|
|
if ( !!bmp ) {
|
|
|
|
XP_FREE( ctxt->super.mpool, bmp );
|
|
|
|
}
|
|
|
|
bmp = ctxt->super.bitmaps[nSpecials].smallBM;
|
|
|
|
if ( !!bmp ) {
|
|
|
|
XP_FREE( ctxt->super.mpool, bmp );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ( !!ctxt->super.chars && !!ctxt->super.chars[nSpecials]) {
|
|
|
|
XP_FREE( ctxt->super.mpool, ctxt->super.chars[nSpecials] );
|
|
|
|
}
|
|
|
|
++nSpecials;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ( !!ctxt->super.bitmaps ) {
|
|
|
|
XP_FREE( ctxt->super.mpool, ctxt->super.bitmaps );
|
|
|
|
}
|
|
|
|
XP_FREEP( ctxt->super.mpool, &ctxt->super.chars );
|
|
|
|
XP_FREEP( ctxt->super.mpool, &ctxt->super.charEnds );
|
|
|
|
} /* freeSpecials */
|
|
|
|
|
|
|
|
static void
|
|
|
|
wasm_dictionary_destroy( DictionaryCtxt* dict, XWEnv xwe )
|
|
|
|
{
|
|
|
|
WasmDictionaryCtxt* ctxt = (WasmDictionaryCtxt*)dict;
|
|
|
|
|
|
|
|
freeSpecials( ctxt );
|
|
|
|
|
|
|
|
if ( !!ctxt->dictBase ) {
|
|
|
|
if ( ctxt->useMMap ) {
|
|
|
|
(void)munmap( ctxt->dictBase, ctxt->dictLength );
|
|
|
|
} else {
|
|
|
|
XP_FREE( dict->mpool, ctxt->dictBase );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* super's destructor should do this!!!! */
|
|
|
|
XP_FREEP( dict->mpool, &ctxt->super.desc );
|
|
|
|
XP_FREEP( dict->mpool, &ctxt->super.md5Sum );
|
|
|
|
XP_FREEP( dict->mpool, &ctxt->super.countsAndValues );
|
|
|
|
XP_FREEP( dict->mpool, &ctxt->super.faces );
|
|
|
|
XP_FREEP( dict->mpool, &ctxt->super.facePtrs );
|
|
|
|
XP_FREEP( dict->mpool, &ctxt->super.name );
|
|
|
|
XP_FREE( dict->mpool, ctxt );
|
|
|
|
}
|
|
|
|
|
|
|
|
DictionaryCtxt*
|
|
|
|
wasm_dictionary_make( MPFORMAL XWEnv xwe, Globals* globals,
|
|
|
|
const char* dictFileName, bool useMMap )
|
|
|
|
{
|
|
|
|
WasmDictionaryCtxt* result = NULL;
|
|
|
|
if ( !!dictFileName ) {
|
|
|
|
/* dmgr_get increments ref count before returning! */
|
|
|
|
result = (WasmDictionaryCtxt*)dmgr_get( globals->dictMgr, xwe,
|
|
|
|
dictFileName );
|
|
|
|
}
|
|
|
|
if ( !result ) {
|
|
|
|
result = (WasmDictionaryCtxt*)XP_CALLOC(mpool, sizeof(*result));
|
|
|
|
result->globals = globals;
|
|
|
|
|
|
|
|
dict_super_init( MPPARM(mpool) &result->super );
|
|
|
|
result->super.destructor = wasm_dictionary_destroy;
|
|
|
|
|
|
|
|
result->useMMap = useMMap;
|
|
|
|
|
|
|
|
if ( !!dictFileName ) {
|
|
|
|
XP_Bool success = initFromDictFile( result, dictFileName );
|
|
|
|
if ( success ) {
|
|
|
|
result->super.func_dict_getShortName = getShortName;
|
|
|
|
setBlankTile( &result->super );
|
|
|
|
} else {
|
|
|
|
XP_ASSERT( 0 ); /* gonna crash anyway */
|
|
|
|
XP_FREE( mpool, result );
|
|
|
|
result = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
dmgr_put( globals->dictMgr, xwe, dictFileName, &result->super );
|
|
|
|
} else {
|
|
|
|
XP_LOGF( "%s(): no file name!!", __func__ );
|
|
|
|
}
|
|
|
|
(void)dict_ref( &result->super, xwe );
|
|
|
|
}
|
|
|
|
|
|
|
|
return &result->super;
|
|
|
|
}
|
|
|
|
|
2021-02-02 05:13:25 +01:00
|
|
|
void
|
2021-02-08 01:31:32 +01:00
|
|
|
dict_splitFaces( DictionaryCtxt* dict, XWEnv xwe, const XP_U8* utf8,
|
2021-02-02 05:13:25 +01:00
|
|
|
XP_U16 nBytes, XP_U16 nFaces )
|
|
|
|
{
|
|
|
|
XP_UCHAR* faces = XP_MALLOC( dict->mpool, nBytes + nFaces );
|
|
|
|
const XP_UCHAR** ptrs = XP_MALLOC( dict->mpool, nFaces * sizeof(ptrs[0]));
|
|
|
|
XP_U16 ii;
|
|
|
|
XP_Bool isUTF8 = dict->isUTF8;
|
|
|
|
XP_UCHAR* next = faces;
|
|
|
|
const XP_U8* bytesIn = utf8;
|
|
|
|
const XP_U8* bytesEnd = bytesIn + nBytes;
|
|
|
|
|
|
|
|
for ( ii = 0; ii < nFaces; ++ii ) {
|
|
|
|
ptrs[ii] = next;
|
|
|
|
if ( isUTF8 ) {
|
|
|
|
for ( ; ; ) {
|
|
|
|
const XP_U8* cp = bytesIn + 1; // g_utf8_offset_to_pointer( bytesIn, 1 );
|
|
|
|
size_t len = cp - bytesIn;
|
|
|
|
XP_MEMCPY( next, bytesIn, len );
|
|
|
|
next += len;
|
|
|
|
bytesIn += len;
|
|
|
|
if ( bytesIn >= bytesEnd || SYNONYM_DELIM != bytesIn[0] ) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
++bytesIn; /* skip delimiter */
|
|
|
|
*next++ = '\0';
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
XP_ASSERT( 0 == *bytesIn );
|
|
|
|
++bytesIn; /* skip empty */
|
|
|
|
*next++ = *bytesIn++;
|
|
|
|
}
|
|
|
|
XP_ASSERT( next < faces + nFaces + nBytes );
|
|
|
|
*next++ = '\0';
|
|
|
|
}
|
|
|
|
XP_ASSERT( !dict->faces );
|
|
|
|
dict->faces = faces;
|
|
|
|
dict->facesEnd = faces + nFaces + nBytes;
|
|
|
|
XP_ASSERT( !dict->facePtrs );
|
|
|
|
dict->facePtrs = ptrs;
|
|
|
|
|
2021-02-10 02:53:30 +01:00
|
|
|
/* for ( int ii = 0; ii < nFaces; ++ii ) { */
|
|
|
|
/* XP_LOGFF( "face %d: %s", ii, dict->facePtrs[ii] ); */
|
|
|
|
/* } */
|
2021-02-02 05:13:25 +01:00
|
|
|
} /* dict_splitFaces */
|
|
|
|
|
|
|
|
void
|
|
|
|
computeChecksum( DictionaryCtxt* dctx, XWEnv xwe, const XP_U8* ptr,
|
|
|
|
XP_U32 len, XP_UCHAR* out )
|
|
|
|
{
|
|
|
|
*out = '\0';
|
|
|
|
}
|