- add load dictionnary functions for bigendian arch

regression test is ok on linux/ppc
This commit is contained in:
Antoine Fraboulet 2006-04-16 11:27:19 +00:00
parent d362281fba
commit 86c65ebc4a
7 changed files with 155 additions and 87 deletions

View file

@ -33,8 +33,10 @@
#include <string.h> #include <string.h>
#include <ctype.h> #include <ctype.h>
#include <assert.h> #include <assert.h>
#include "hashtable.h" #include "hashtable.h"
#include "dic_internals.h" #include "dic_internals.h"
#include "dic.h"
//#define DEBUG_LIST //#define DEBUG_LIST
//#define DEBUG_OUTPUT //#define DEBUG_OUTPUT
@ -103,7 +105,13 @@ fix_header(FILE* outfile, Dict_header* header)
strcpy(header->ident,_COMPIL_KEYWORD_); strcpy(header->ident,_COMPIL_KEYWORD_);
header->root = header->edgesused; header->root = header->edgesused;
rewind (outfile); rewind (outfile);
#if defined(WORDS_BIGENDIAN)
#warning "**********************************************"
#warning "compdic does not run yet on bigendian machines"
#warning "**********************************************"
#else
fwrite (header, sizeof(Dict_header), 1, outfile); fwrite (header, sizeof(Dict_header), 1, outfile);
#endif
} }
@ -111,14 +119,14 @@ void
print_header_info(Dict_header *header) print_header_info(Dict_header *header)
{ {
printf("============================\n"); printf("============================\n");
printf("keyword length %lu bytes\n", strlen(_COMPIL_KEYWORD_)); printf("keyword length %u bytes\n", strlen(_COMPIL_KEYWORD_));
printf("keyword size %lu bytes\n", sizeof(_COMPIL_KEYWORD_)); printf("keyword size %u bytes\n", sizeof(_COMPIL_KEYWORD_));
printf("header size %lu bytes\n", sizeof(Dict_header)); printf("header size %u bytes\n", sizeof(Dict_header));
printf("\n"); printf("\n");
printf("%d words\n",header->nwords); printf("%d words\n",header->nwords);
printf("\n"); printf("\n");
printf("root : %7d (edge)\n",header->root); printf("root : %7d (edge)\n",header->root);
printf("root : %7lu (byte)\n",header->root * sizeof(Dawg_edge)); printf("root : %7u (byte)\n",header->root * sizeof(Dawg_edge));
printf("\n"); printf("\n");
printf("nodes : %d+%d\n",header->nodesused, header->nodessaved); printf("nodes : %d+%d\n",header->nodesused, header->nodessaved);
printf("edges : %d+%d\n",header->edgesused, header->edgessaved); printf("edges : %d+%d\n",header->edgesused, header->edgessaved);

View file

@ -29,18 +29,76 @@
#include <string.h> #include <string.h>
#include <errno.h> #include <errno.h>
#include <ctype.h> #include <ctype.h>
#include "config.h"
#include "dic_internals.h" #include "dic_internals.h"
#include "dic.h" #include "dic.h"
#if defined(WORDS_BIGENDIAN)
static uint32_t swap4(uint32_t v)
{
uint32_t r;
uint8_t *pv,*pr;
pv = (uint8_t*)&v;
pr = (uint8_t*)&r;
pr[0] = pv[3];
pr[1] = pv[2];
pr[2] = pv[1];
pr[3] = pv[0];
return r;
}
#endif
static int static int
check_header(FILE* file, Dict_header *header) Dic_read_convert_header(Dict_header *header, FILE* file)
{ {
if (fread(header,sizeof(Dict_header),1,file) != 1) if (fread(header,sizeof(Dict_header),1,file) != 1)
return 1; return 1;
return strcmp(header->ident,_COMPIL_KEYWORD_);
#if defined(WORDS_BIGENDIAN)
header->root = swap4(header->root);
header->nwords = swap4(header->nwords);
header->nodesused = swap4(header->nodesused);
header->edgesused = swap4(header->edgesused);
header->nodessaved = swap4(header->nodessaved);
header->edgessaved = swap4(header->edgessaved);
#else
#endif
return 0;
} }
int
Dic_check_header(Dict_header *header, const char *path)
{
int r;
FILE* file;
if ((file = fopen(path,"rb")) == NULL)
return 1;
r = Dic_read_convert_header(header,file);
fclose(file);
return r || strcmp(header->ident,_COMPIL_KEYWORD_);
}
static void
Dic_convert_data_to_arch(Dictionary dic)
{
#if defined(WORDS_BIGENDIAN)
int i;
uint32_t* p;
p = (uint32_t*)dic->dawg;
for(i=0; i < (dic->nedges + 1); i++)
{
p[i] = swap4(p[i]);
}
#endif
}
int int
Dic_load(Dictionary *dic, const char* path) Dic_load(Dictionary *dic, const char* path)
@ -48,20 +106,23 @@ Dic_load(Dictionary *dic, const char* path)
FILE* file; FILE* file;
Dict_header header; Dict_header header;
*dic = NULL; *dic = NULL;
if ((file = fopen(path,"rb")) == NULL) if ((file = fopen(path,"rb")) == NULL)
return 1; return 1;
if (check_header(file,&header))
return 2; Dic_read_convert_header(&header,file);
if ((*dic = (Dictionary) malloc(sizeof(struct _Dictionary))) == NULL) if ((*dic = (Dictionary) malloc(sizeof(struct _Dictionary))) == NULL)
return 3; return 3;
if (((*dic)->dawg = (Dawg_edge*)malloc((header.edgesused + 1)*
sizeof(Dawg_edge))) == NULL) if (((*dic)->dawg = (Dawg_edge*)malloc((header.edgesused + 1)*sizeof(Dawg_edge))) == NULL)
{ {
free(*dic); free(*dic);
*dic = NULL; *dic = NULL;
return 4; return 4;
} }
if (fread((*dic)->dawg,sizeof(Dawg_edge),header.edgesused + 1,file) != if (fread((*dic)->dawg,sizeof(Dawg_edge),header.edgesused + 1,file) !=
(header.edgesused + 1)) (header.edgesused + 1))
{ {
@ -70,11 +131,14 @@ Dic_load(Dictionary *dic, const char* path)
*dic = NULL; *dic = NULL;
return 5; return 5;
} }
(*dic)->root = header.root; (*dic)->root = header.root;
(*dic)->nwords = header.nwords; (*dic)->nwords = header.nwords;
(*dic)->nnodes = header.nodesused; (*dic)->nnodes = header.nodesused;
(*dic)->nedges = header.edgesused; (*dic)->nedges = header.edgesused;
Dic_convert_data_to_arch(*dic);
fclose(file); fclose(file);
return 0; return 0;
} }

View file

@ -41,10 +41,20 @@ extern "C"
*/ */
#define DIC_WORD_MAX 16 #define DIC_WORD_MAX 16
typedef struct _Dictionary* Dictionary; typedef struct _Dict_header Dict_header;
typedef struct _Dictionary *Dictionary;
typedef unsigned int dic_elt_t; typedef unsigned int dic_elt_t;
typedef unsigned char dic_code_t; typedef unsigned char dic_code_t;
/**
* Dictionary header loading from a file
* @param dic : pointer to a header
* @param path : compressed dictionary path
* @return 0 ok, otherwise error
*/
int Dic_check_header(Dict_header *header, const char* path);
/** /**
* Dictionary creation and loading from a file * Dictionary creation and loading from a file
* @param dic : pointer to a dictionary * @param dic : pointer to a dictionary

View file

@ -31,6 +31,9 @@ extern "C"
{ {
#endif #endif
#include <stdint.h>
#include "config.h"
/** /**
* bit masking for ascii characters \n * bit masking for ascii characters \n
* ('a' & CHAR) == ('A' & CHAR) == 1 * ('a' & CHAR) == ('A' & CHAR) == 1
@ -57,15 +60,30 @@ extern "C"
* ---------------- * ----------------
*/ */
typedef struct _Dawg_edge { #if defined(WORDS_BIGENDIAN)
unsigned int ptr : 24; struct __attribute__ ((packed)) _Dawg_edge {
unsigned int term : 1; uint32_t
unsigned int last : 1; chr : 5,
unsigned int fill : 1; fill : 1,
unsigned int chr : 5; last : 1,
} Dawg_edge; term : 1,
ptr : 24;
};
#else
struct __attribute__ ((packed)) _Dawg_edge {
uint32_t
ptr : 24,
term : 1,
last : 1,
fill : 1,
chr : 5;
};
#endif
typedef struct _Dict_header { typedef struct _Dawg_edge Dawg_edge;
struct _Dict_header {
char ident[sizeof(_COMPIL_KEYWORD_)]; char ident[sizeof(_COMPIL_KEYWORD_)];
char unused_1; char unused_1;
char unused_2; char unused_2;
@ -75,7 +93,8 @@ typedef struct _Dict_header {
unsigned int nodesused; unsigned int nodesused;
unsigned int nodessaved; unsigned int nodessaved;
unsigned int edgessaved; unsigned int edgessaved;
} Dict_header; };
struct _Dictionary struct _Dictionary
{ {

View file

@ -18,6 +18,7 @@
/* along with this program; if not, write to the Free Software */ /* along with this program; if not, write to the Free Software */
/* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
#include "dic_internals.h"
#include "dic.h" #include "dic.h"
#include "regexp.h" #include "regexp.h"
#include "libdic_a-er.h" #include "libdic_a-er.h"

View file

@ -23,8 +23,8 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "dic.h"
#include "dic_internals.h" #include "dic_internals.h"
#include "dic.h"
#include "dic.h" #include "dic.h"
#include "regexp.h" #include "regexp.h"

View file

@ -27,6 +27,7 @@
#include <string.h> #include <string.h>
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include <stddef.h>
#include "dic_internals.h" #include "dic_internals.h"
#include "dic.h" #include "dic.h"
@ -50,6 +51,7 @@ print_dic_rec(FILE* out, Dictionary dic, char *buf, char* s, Dawg_edge i)
} }
} }
void void
dic_load(Dictionary* dic, char* filename) dic_load(Dictionary* dic, char* filename)
{ {
@ -68,6 +70,7 @@ dic_load(Dictionary* dic, char* filename)
} }
} }
void void
print_dic_list(char* filename, char* out) print_dic_list(char* filename, char* out)
{ {
@ -91,85 +94,47 @@ print_dic_list(char* filename, char* out)
Dic_destroy(dic); Dic_destroy(dic);
} }
char
b2h(int i)
{
if (i < 10)
return i+'0';
return i-10+'a';
}
char*
hexb(unsigned char h)
{
static char buf[3];
buf[0] = b2h((h & 0xf0) >> 4);
buf[1] = b2h((h & 0x0f));
buf[2] = 0;
return buf;
}
char*
hexl(unsigned int h)
{
static char buf[9];
int i;
for(i=0; i<4; i++)
{
int l = h >> (24 - i*8);
buf[i*2+0] = b2h((l & 0xf0) >> 4);
buf[i*2+1] = b2h((l & 0x0f));
}
buf[8] = 0;
return buf;
}
char*
offset(void* base, void* off)
{
static char buf[20];
int o = (char*)off - (char*)base;
sprintf(buf,"%s",hexb(o));
return buf;
}
void void
print_header(char* filename) print_header(char* filename)
{ {
FILE* file;
Dict_header header; Dict_header header;
if ((file = fopen(filename,"rb")) == NULL) Dic_check_header(&header,filename);
return;
if (fread(&header,sizeof(Dict_header),1,file) != 1) #define OO(IDENT) offsetof(Dict_header,IDENT)
return;
fclose(file);
printf("Dictionary header information\n"); printf("Dictionary header information\n");
printf("0x%s ident : %s\n",offset(&header,&header.ident),header.ident); printf("0x%02x ident : %s\n", OO(ident) ,header.ident);
printf("0x%s unused 1 : %6d %s\n",offset(&header,&header.unused_1) ,header.unused_1 ,hexl(header.unused_1)); printf("0x%02x unused 1 : %6d %06x\n",OO(unused_1) ,header.unused_1 ,header.unused_1);
printf("0x%s unused 2 : %6d %s\n",offset(&header,&header.unused_2) ,header.unused_2 ,hexl(header.unused_2)); printf("0x%02x unused 2 : %6d %06x\n",OO(unused_2) ,header.unused_2 ,header.unused_2);
printf("0x%s root : %6d %s\n",offset(&header,&header.root) ,header.root ,hexl(header.root)); printf("0x%02x root : %6d %06x\n",OO(root) ,header.root ,header.root);
printf("0x%s words : %6d %s\n",offset(&header,&header.nwords) ,header.nwords ,hexl(header.nwords)); printf("0x%02x words : %6d %06x\n",OO(nwords) ,header.nwords ,header.nwords);
printf("0x%s edges used : %6d %s\n",offset(&header,&header.edgesused) ,header.edgesused ,hexl(header.edgesused)); printf("0x%02x edges used : %6d %06x\n",OO(edgesused) ,header.edgesused ,header.edgesused);
printf("0x%s nodes used : %6d %s\n",offset(&header,&header.nodesused) ,header.nodesused ,hexl(header.nodesused)); printf("0x%02x nodes used : %6d %06x\n",OO(nodesused) ,header.nodesused ,header.nodesused);
printf("0x%s nodes saved : %6d %s\n",offset(&header,&header.nodessaved),header.nodessaved,hexl(header.nodessaved)); printf("0x%02x nodes saved : %6d %06x\n",OO(nodessaved),header.nodessaved,header.nodessaved);
printf("0x%s edges saved : %6d %s\n",offset(&header,&header.edgessaved),header.edgessaved,hexl(header.edgessaved)); printf("0x%02x edges saved : %6d %06x\n",OO(edgessaved),header.edgessaved,header.edgessaved);
printf("\n"); printf("\n");
printf("sizeof(header) = 0x%s (%lu)\n", hexb(sizeof(header)), sizeof(header)); printf("sizeof(header) = 0x%x (%u)\n", sizeof(header), sizeof(header));
} }
void
print_node_hex(int i, Dictionary dic) static void
print_node_hex(Dictionary dic, int i)
{ {
unsigned int* pe; union edge_t {
Dawg_edge e = dic->dawg[i]; Dawg_edge e;
pe = (unsigned int*)&e; uint32_t s;
printf("0x%s %s |%2d ptr=%2d t=%d l=%d f=%d chr=%d (%c)\n", } ee;
offset(&(dic->dawg[0]),&(dic->dawg[i])),hexl(*pe),i,
e.ptr, e.term, e.last, e.fill, e.chr, e.chr +'a' -1); ee.e = dic->dawg[i];
printf("0x%04x %08x |%4d ptr=%8d t=%d l=%d f=%d chr=%2d (%c)\n",
i*sizeof(ee), (unsigned int)(ee.s),
i, ee.e.ptr, ee.e.term, ee.e.last, ee.e.fill, ee.e.chr, ee.e.chr +'a' -1);
} }
void void
print_dic_hex(char* filename) print_dic_hex(char* filename)
{ {
@ -180,10 +145,11 @@ print_dic_hex(char* filename)
printf("offs binary structure \n"); printf("offs binary structure \n");
printf("---- -------- | ------------------\n"); printf("---- -------- | ------------------\n");
for(i=0; i < (dic->nedges + 1); i++) for(i=0; i < (dic->nedges + 1); i++)
print_node_hex(i,dic); print_node_hex(dic,i);
Dic_destroy(dic); Dic_destroy(dic);
} }
void void
usage(char* name) usage(char* name)
{ {