From 86c65ebc4a340819459e37e0ec21dba4440daac3 Mon Sep 17 00:00:00 2001 From: Antoine Fraboulet Date: Sun, 16 Apr 2006 11:27:19 +0000 Subject: [PATCH] - add load dictionnary functions for bigendian arch regression test is ok on linux/ppc --- dic/compdic.c | 16 ++++++-- dic/dic.c | 76 ++++++++++++++++++++++++++++++++--- dic/dic.h | 12 +++++- dic/dic_internals.h | 37 ++++++++++++----- dic/er.l | 1 + dic/er.y | 2 +- dic/listdic.c | 98 +++++++++++++++------------------------------ 7 files changed, 155 insertions(+), 87 deletions(-) diff --git a/dic/compdic.c b/dic/compdic.c index b5b58b6..db0ba6a 100644 --- a/dic/compdic.c +++ b/dic/compdic.c @@ -33,8 +33,10 @@ #include #include #include + #include "hashtable.h" #include "dic_internals.h" +#include "dic.h" //#define DEBUG_LIST //#define DEBUG_OUTPUT @@ -103,7 +105,13 @@ fix_header(FILE* outfile, Dict_header* header) strcpy(header->ident,_COMPIL_KEYWORD_); header->root = header->edgesused; rewind (outfile); +#if defined(WORDS_BIGENDIAN) + #warning "**********************************************" + #warning "compdic does not run yet on bigendian machines" + #warning "**********************************************" +#else fwrite (header, sizeof(Dict_header), 1, outfile); +#endif } @@ -111,14 +119,14 @@ void print_header_info(Dict_header *header) { printf("============================\n"); - printf("keyword length %lu bytes\n", strlen(_COMPIL_KEYWORD_)); - printf("keyword size %lu bytes\n", sizeof(_COMPIL_KEYWORD_)); - printf("header size %lu bytes\n", sizeof(Dict_header)); + printf("keyword length %u bytes\n", strlen(_COMPIL_KEYWORD_)); + printf("keyword size %u bytes\n", sizeof(_COMPIL_KEYWORD_)); + printf("header size %u bytes\n", sizeof(Dict_header)); printf("\n"); printf("%d words\n",header->nwords); printf("\n"); printf("root : %7d (edge)\n",header->root); - printf("root : %7lu (byte)\n",header->root * sizeof(Dawg_edge)); + printf("root : %7u (byte)\n",header->root * sizeof(Dawg_edge)); printf("\n"); printf("nodes : %d+%d\n",header->nodesused, header->nodessaved); printf("edges : %d+%d\n",header->edgesused, header->edgessaved); diff --git a/dic/dic.c b/dic/dic.c index 803934c..ab4c8de 100644 --- a/dic/dic.c +++ b/dic/dic.c @@ -29,18 +29,76 @@ #include #include #include + +#include "config.h" #include "dic_internals.h" #include "dic.h" +#if defined(WORDS_BIGENDIAN) +static uint32_t swap4(uint32_t v) +{ + uint32_t r; + uint8_t *pv,*pr; + + pv = (uint8_t*)&v; + pr = (uint8_t*)&r; + + pr[0] = pv[3]; + pr[1] = pv[2]; + pr[2] = pv[1]; + pr[3] = pv[0]; + + return r; +} +#endif static int -check_header(FILE* file, Dict_header *header) +Dic_read_convert_header(Dict_header *header, FILE* file) { + if (fread(header,sizeof(Dict_header),1,file) != 1) return 1; - return strcmp(header->ident,_COMPIL_KEYWORD_); + +#if defined(WORDS_BIGENDIAN) + header->root = swap4(header->root); + header->nwords = swap4(header->nwords); + header->nodesused = swap4(header->nodesused); + header->edgesused = swap4(header->edgesused); + header->nodessaved = swap4(header->nodessaved); + header->edgessaved = swap4(header->edgessaved); +#else + +#endif + return 0; } +int +Dic_check_header(Dict_header *header, const char *path) +{ + int r; + FILE* file; + if ((file = fopen(path,"rb")) == NULL) + return 1; + + r = Dic_read_convert_header(header,file); + fclose(file); + + return r || strcmp(header->ident,_COMPIL_KEYWORD_); +} + +static void +Dic_convert_data_to_arch(Dictionary dic) +{ +#if defined(WORDS_BIGENDIAN) + int i; + uint32_t* p; + p = (uint32_t*)dic->dawg; + for(i=0; i < (dic->nedges + 1); i++) + { + p[i] = swap4(p[i]); + } +#endif +} int Dic_load(Dictionary *dic, const char* path) @@ -48,20 +106,23 @@ Dic_load(Dictionary *dic, const char* path) FILE* file; Dict_header header; + *dic = NULL; if ((file = fopen(path,"rb")) == NULL) return 1; - if (check_header(file,&header)) - return 2; + + Dic_read_convert_header(&header,file); + if ((*dic = (Dictionary) malloc(sizeof(struct _Dictionary))) == NULL) return 3; - if (((*dic)->dawg = (Dawg_edge*)malloc((header.edgesused + 1)* - sizeof(Dawg_edge))) == NULL) + + if (((*dic)->dawg = (Dawg_edge*)malloc((header.edgesused + 1)*sizeof(Dawg_edge))) == NULL) { free(*dic); *dic = NULL; return 4; } + if (fread((*dic)->dawg,sizeof(Dawg_edge),header.edgesused + 1,file) != (header.edgesused + 1)) { @@ -70,11 +131,14 @@ Dic_load(Dictionary *dic, const char* path) *dic = NULL; return 5; } + (*dic)->root = header.root; (*dic)->nwords = header.nwords; (*dic)->nnodes = header.nodesused; (*dic)->nedges = header.edgesused; + Dic_convert_data_to_arch(*dic); + fclose(file); return 0; } diff --git a/dic/dic.h b/dic/dic.h index c57d300..a6dad3b 100644 --- a/dic/dic.h +++ b/dic/dic.h @@ -41,10 +41,20 @@ extern "C" */ #define DIC_WORD_MAX 16 -typedef struct _Dictionary* Dictionary; +typedef struct _Dict_header Dict_header; +typedef struct _Dictionary *Dictionary; typedef unsigned int dic_elt_t; typedef unsigned char dic_code_t; + + /** + * Dictionary header loading from a file + * @param dic : pointer to a header + * @param path : compressed dictionary path + * @return 0 ok, otherwise error + */ +int Dic_check_header(Dict_header *header, const char* path); + /** * Dictionary creation and loading from a file * @param dic : pointer to a dictionary diff --git a/dic/dic_internals.h b/dic/dic_internals.h index 3f6e600..9aeb887 100644 --- a/dic/dic_internals.h +++ b/dic/dic_internals.h @@ -31,6 +31,9 @@ extern "C" { #endif +#include +#include "config.h" + /** * bit masking for ascii characters \n * ('a' & CHAR) == ('A' & CHAR) == 1 @@ -57,15 +60,30 @@ extern "C" * ---------------- */ -typedef struct _Dawg_edge { - unsigned int ptr : 24; - unsigned int term : 1; - unsigned int last : 1; - unsigned int fill : 1; - unsigned int chr : 5; -} Dawg_edge; +#if defined(WORDS_BIGENDIAN) +struct __attribute__ ((packed)) _Dawg_edge { + uint32_t + chr : 5, + fill : 1, + last : 1, + term : 1, + ptr : 24; +}; +#else +struct __attribute__ ((packed)) _Dawg_edge { + uint32_t + ptr : 24, + term : 1, + last : 1, + fill : 1, + chr : 5; +}; +#endif -typedef struct _Dict_header { +typedef struct _Dawg_edge Dawg_edge; + + +struct _Dict_header { char ident[sizeof(_COMPIL_KEYWORD_)]; char unused_1; char unused_2; @@ -75,7 +93,8 @@ typedef struct _Dict_header { unsigned int nodesused; unsigned int nodessaved; unsigned int edgessaved; -} Dict_header; +}; + struct _Dictionary { diff --git a/dic/er.l b/dic/er.l index b4a20bc..a4c0a2a 100644 --- a/dic/er.l +++ b/dic/er.l @@ -18,6 +18,7 @@ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "dic_internals.h" #include "dic.h" #include "regexp.h" #include "libdic_a-er.h" diff --git a/dic/er.y b/dic/er.y index a0a71bb..17a2e43 100644 --- a/dic/er.y +++ b/dic/er.y @@ -23,8 +23,8 @@ #include #include -#include "dic.h" #include "dic_internals.h" +#include "dic.h" #include "dic.h" #include "regexp.h" diff --git a/dic/listdic.c b/dic/listdic.c index 625ba7a..3729981 100644 --- a/dic/listdic.c +++ b/dic/listdic.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "dic_internals.h" #include "dic.h" @@ -50,6 +51,7 @@ print_dic_rec(FILE* out, Dictionary dic, char *buf, char* s, Dawg_edge i) } } + void dic_load(Dictionary* dic, char* filename) { @@ -68,6 +70,7 @@ dic_load(Dictionary* dic, char* filename) } } + void print_dic_list(char* filename, char* out) { @@ -91,85 +94,47 @@ print_dic_list(char* filename, char* out) Dic_destroy(dic); } -char -b2h(int i) -{ - if (i < 10) - return i+'0'; - return i-10+'a'; -} - -char* -hexb(unsigned char h) -{ - static char buf[3]; - buf[0] = b2h((h & 0xf0) >> 4); - buf[1] = b2h((h & 0x0f)); - buf[2] = 0; - return buf; -} - -char* -hexl(unsigned int h) -{ - static char buf[9]; - int i; - for(i=0; i<4; i++) - { - int l = h >> (24 - i*8); - buf[i*2+0] = b2h((l & 0xf0) >> 4); - buf[i*2+1] = b2h((l & 0x0f)); - } - buf[8] = 0; - return buf; -} - -char* -offset(void* base, void* off) -{ - static char buf[20]; - int o = (char*)off - (char*)base; - sprintf(buf,"%s",hexb(o)); - return buf; -} void print_header(char* filename) { - FILE* file; Dict_header header; - if ((file = fopen(filename,"rb")) == NULL) - return; - if (fread(&header,sizeof(Dict_header),1,file) != 1) - return; - fclose(file); + Dic_check_header(&header,filename); + +#define OO(IDENT) offsetof(Dict_header,IDENT) printf("Dictionary header information\n"); - printf("0x%s ident : %s\n",offset(&header,&header.ident),header.ident); - printf("0x%s unused 1 : %6d %s\n",offset(&header,&header.unused_1) ,header.unused_1 ,hexl(header.unused_1)); - printf("0x%s unused 2 : %6d %s\n",offset(&header,&header.unused_2) ,header.unused_2 ,hexl(header.unused_2)); - printf("0x%s root : %6d %s\n",offset(&header,&header.root) ,header.root ,hexl(header.root)); - printf("0x%s words : %6d %s\n",offset(&header,&header.nwords) ,header.nwords ,hexl(header.nwords)); - printf("0x%s edges used : %6d %s\n",offset(&header,&header.edgesused) ,header.edgesused ,hexl(header.edgesused)); - printf("0x%s nodes used : %6d %s\n",offset(&header,&header.nodesused) ,header.nodesused ,hexl(header.nodesused)); - printf("0x%s nodes saved : %6d %s\n",offset(&header,&header.nodessaved),header.nodessaved,hexl(header.nodessaved)); - printf("0x%s edges saved : %6d %s\n",offset(&header,&header.edgessaved),header.edgessaved,hexl(header.edgessaved)); + printf("0x%02x ident : %s\n", OO(ident) ,header.ident); + printf("0x%02x unused 1 : %6d %06x\n",OO(unused_1) ,header.unused_1 ,header.unused_1); + printf("0x%02x unused 2 : %6d %06x\n",OO(unused_2) ,header.unused_2 ,header.unused_2); + printf("0x%02x root : %6d %06x\n",OO(root) ,header.root ,header.root); + printf("0x%02x words : %6d %06x\n",OO(nwords) ,header.nwords ,header.nwords); + printf("0x%02x edges used : %6d %06x\n",OO(edgesused) ,header.edgesused ,header.edgesused); + printf("0x%02x nodes used : %6d %06x\n",OO(nodesused) ,header.nodesused ,header.nodesused); + printf("0x%02x nodes saved : %6d %06x\n",OO(nodessaved),header.nodessaved,header.nodessaved); + printf("0x%02x edges saved : %6d %06x\n",OO(edgessaved),header.edgessaved,header.edgessaved); printf("\n"); - printf("sizeof(header) = 0x%s (%lu)\n", hexb(sizeof(header)), sizeof(header)); + printf("sizeof(header) = 0x%x (%u)\n", sizeof(header), sizeof(header)); } -void -print_node_hex(int i, Dictionary dic) + +static void +print_node_hex(Dictionary dic, int i) { - unsigned int* pe; - Dawg_edge e = dic->dawg[i]; - pe = (unsigned int*)&e; - printf("0x%s %s |%2d ptr=%2d t=%d l=%d f=%d chr=%d (%c)\n", - offset(&(dic->dawg[0]),&(dic->dawg[i])),hexl(*pe),i, - e.ptr, e.term, e.last, e.fill, e.chr, e.chr +'a' -1); + union edge_t { + Dawg_edge e; + uint32_t s; + } ee; + + ee.e = dic->dawg[i]; + + printf("0x%04x %08x |%4d ptr=%8d t=%d l=%d f=%d chr=%2d (%c)\n", + i*sizeof(ee), (unsigned int)(ee.s), + i, ee.e.ptr, ee.e.term, ee.e.last, ee.e.fill, ee.e.chr, ee.e.chr +'a' -1); } + void print_dic_hex(char* filename) { @@ -180,10 +145,11 @@ print_dic_hex(char* filename) printf("offs binary structure \n"); printf("---- -------- | ------------------\n"); for(i=0; i < (dic->nedges + 1); i++) - print_node_hex(i,dic); + print_node_hex(dic,i); Dic_destroy(dic); } + void usage(char* name) {