mirror of
git://git.savannah.nongnu.org/eliot.git
synced 2025-01-17 06:11:49 +01:00
- add load dictionnary functions for bigendian arch
regression test is ok on linux/ppc
This commit is contained in:
parent
d362281fba
commit
86c65ebc4a
7 changed files with 155 additions and 87 deletions
|
@ -33,8 +33,10 @@
|
|||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "hashtable.h"
|
||||
#include "dic_internals.h"
|
||||
#include "dic.h"
|
||||
|
||||
//#define DEBUG_LIST
|
||||
//#define DEBUG_OUTPUT
|
||||
|
@ -103,7 +105,13 @@ fix_header(FILE* outfile, Dict_header* header)
|
|||
strcpy(header->ident,_COMPIL_KEYWORD_);
|
||||
header->root = header->edgesused;
|
||||
rewind (outfile);
|
||||
#if defined(WORDS_BIGENDIAN)
|
||||
#warning "**********************************************"
|
||||
#warning "compdic does not run yet on bigendian machines"
|
||||
#warning "**********************************************"
|
||||
#else
|
||||
fwrite (header, sizeof(Dict_header), 1, outfile);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -111,14 +119,14 @@ void
|
|||
print_header_info(Dict_header *header)
|
||||
{
|
||||
printf("============================\n");
|
||||
printf("keyword length %lu bytes\n", strlen(_COMPIL_KEYWORD_));
|
||||
printf("keyword size %lu bytes\n", sizeof(_COMPIL_KEYWORD_));
|
||||
printf("header size %lu bytes\n", sizeof(Dict_header));
|
||||
printf("keyword length %u bytes\n", strlen(_COMPIL_KEYWORD_));
|
||||
printf("keyword size %u bytes\n", sizeof(_COMPIL_KEYWORD_));
|
||||
printf("header size %u bytes\n", sizeof(Dict_header));
|
||||
printf("\n");
|
||||
printf("%d words\n",header->nwords);
|
||||
printf("\n");
|
||||
printf("root : %7d (edge)\n",header->root);
|
||||
printf("root : %7lu (byte)\n",header->root * sizeof(Dawg_edge));
|
||||
printf("root : %7u (byte)\n",header->root * sizeof(Dawg_edge));
|
||||
printf("\n");
|
||||
printf("nodes : %d+%d\n",header->nodesused, header->nodessaved);
|
||||
printf("edges : %d+%d\n",header->edgesused, header->edgessaved);
|
||||
|
|
76
dic/dic.c
76
dic/dic.c
|
@ -29,18 +29,76 @@
|
|||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "config.h"
|
||||
#include "dic_internals.h"
|
||||
#include "dic.h"
|
||||
|
||||
#if defined(WORDS_BIGENDIAN)
|
||||
static uint32_t swap4(uint32_t v)
|
||||
{
|
||||
uint32_t r;
|
||||
uint8_t *pv,*pr;
|
||||
|
||||
pv = (uint8_t*)&v;
|
||||
pr = (uint8_t*)&r;
|
||||
|
||||
pr[0] = pv[3];
|
||||
pr[1] = pv[2];
|
||||
pr[2] = pv[1];
|
||||
pr[3] = pv[0];
|
||||
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
check_header(FILE* file, Dict_header *header)
|
||||
Dic_read_convert_header(Dict_header *header, FILE* file)
|
||||
{
|
||||
|
||||
if (fread(header,sizeof(Dict_header),1,file) != 1)
|
||||
return 1;
|
||||
return strcmp(header->ident,_COMPIL_KEYWORD_);
|
||||
|
||||
#if defined(WORDS_BIGENDIAN)
|
||||
header->root = swap4(header->root);
|
||||
header->nwords = swap4(header->nwords);
|
||||
header->nodesused = swap4(header->nodesused);
|
||||
header->edgesused = swap4(header->edgesused);
|
||||
header->nodessaved = swap4(header->nodessaved);
|
||||
header->edgessaved = swap4(header->edgessaved);
|
||||
#else
|
||||
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
Dic_check_header(Dict_header *header, const char *path)
|
||||
{
|
||||
int r;
|
||||
FILE* file;
|
||||
if ((file = fopen(path,"rb")) == NULL)
|
||||
return 1;
|
||||
|
||||
r = Dic_read_convert_header(header,file);
|
||||
fclose(file);
|
||||
|
||||
return r || strcmp(header->ident,_COMPIL_KEYWORD_);
|
||||
}
|
||||
|
||||
static void
|
||||
Dic_convert_data_to_arch(Dictionary dic)
|
||||
{
|
||||
#if defined(WORDS_BIGENDIAN)
|
||||
int i;
|
||||
uint32_t* p;
|
||||
p = (uint32_t*)dic->dawg;
|
||||
for(i=0; i < (dic->nedges + 1); i++)
|
||||
{
|
||||
p[i] = swap4(p[i]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int
|
||||
Dic_load(Dictionary *dic, const char* path)
|
||||
|
@ -48,20 +106,23 @@ Dic_load(Dictionary *dic, const char* path)
|
|||
FILE* file;
|
||||
Dict_header header;
|
||||
|
||||
|
||||
*dic = NULL;
|
||||
if ((file = fopen(path,"rb")) == NULL)
|
||||
return 1;
|
||||
if (check_header(file,&header))
|
||||
return 2;
|
||||
|
||||
Dic_read_convert_header(&header,file);
|
||||
|
||||
if ((*dic = (Dictionary) malloc(sizeof(struct _Dictionary))) == NULL)
|
||||
return 3;
|
||||
if (((*dic)->dawg = (Dawg_edge*)malloc((header.edgesused + 1)*
|
||||
sizeof(Dawg_edge))) == NULL)
|
||||
|
||||
if (((*dic)->dawg = (Dawg_edge*)malloc((header.edgesused + 1)*sizeof(Dawg_edge))) == NULL)
|
||||
{
|
||||
free(*dic);
|
||||
*dic = NULL;
|
||||
return 4;
|
||||
}
|
||||
|
||||
if (fread((*dic)->dawg,sizeof(Dawg_edge),header.edgesused + 1,file) !=
|
||||
(header.edgesused + 1))
|
||||
{
|
||||
|
@ -70,11 +131,14 @@ Dic_load(Dictionary *dic, const char* path)
|
|||
*dic = NULL;
|
||||
return 5;
|
||||
}
|
||||
|
||||
(*dic)->root = header.root;
|
||||
(*dic)->nwords = header.nwords;
|
||||
(*dic)->nnodes = header.nodesused;
|
||||
(*dic)->nedges = header.edgesused;
|
||||
|
||||
Dic_convert_data_to_arch(*dic);
|
||||
|
||||
fclose(file);
|
||||
return 0;
|
||||
}
|
||||
|
|
12
dic/dic.h
12
dic/dic.h
|
@ -41,10 +41,20 @@ extern "C"
|
|||
*/
|
||||
#define DIC_WORD_MAX 16
|
||||
|
||||
typedef struct _Dictionary* Dictionary;
|
||||
typedef struct _Dict_header Dict_header;
|
||||
typedef struct _Dictionary *Dictionary;
|
||||
typedef unsigned int dic_elt_t;
|
||||
typedef unsigned char dic_code_t;
|
||||
|
||||
|
||||
/**
|
||||
* Dictionary header loading from a file
|
||||
* @param dic : pointer to a header
|
||||
* @param path : compressed dictionary path
|
||||
* @return 0 ok, otherwise error
|
||||
*/
|
||||
int Dic_check_header(Dict_header *header, const char* path);
|
||||
|
||||
/**
|
||||
* Dictionary creation and loading from a file
|
||||
* @param dic : pointer to a dictionary
|
||||
|
|
|
@ -31,6 +31,9 @@ extern "C"
|
|||
{
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
#include "config.h"
|
||||
|
||||
/**
|
||||
* bit masking for ascii characters \n
|
||||
* ('a' & CHAR) == ('A' & CHAR) == 1
|
||||
|
@ -57,15 +60,30 @@ extern "C"
|
|||
* ----------------
|
||||
*/
|
||||
|
||||
typedef struct _Dawg_edge {
|
||||
unsigned int ptr : 24;
|
||||
unsigned int term : 1;
|
||||
unsigned int last : 1;
|
||||
unsigned int fill : 1;
|
||||
unsigned int chr : 5;
|
||||
} Dawg_edge;
|
||||
#if defined(WORDS_BIGENDIAN)
|
||||
struct __attribute__ ((packed)) _Dawg_edge {
|
||||
uint32_t
|
||||
chr : 5,
|
||||
fill : 1,
|
||||
last : 1,
|
||||
term : 1,
|
||||
ptr : 24;
|
||||
};
|
||||
#else
|
||||
struct __attribute__ ((packed)) _Dawg_edge {
|
||||
uint32_t
|
||||
ptr : 24,
|
||||
term : 1,
|
||||
last : 1,
|
||||
fill : 1,
|
||||
chr : 5;
|
||||
};
|
||||
#endif
|
||||
|
||||
typedef struct _Dict_header {
|
||||
typedef struct _Dawg_edge Dawg_edge;
|
||||
|
||||
|
||||
struct _Dict_header {
|
||||
char ident[sizeof(_COMPIL_KEYWORD_)];
|
||||
char unused_1;
|
||||
char unused_2;
|
||||
|
@ -75,7 +93,8 @@ typedef struct _Dict_header {
|
|||
unsigned int nodesused;
|
||||
unsigned int nodessaved;
|
||||
unsigned int edgessaved;
|
||||
} Dict_header;
|
||||
};
|
||||
|
||||
|
||||
struct _Dictionary
|
||||
{
|
||||
|
|
1
dic/er.l
1
dic/er.l
|
@ -18,6 +18,7 @@
|
|||
/* along with this program; if not, write to the Free Software */
|
||||
/* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
||||
|
||||
#include "dic_internals.h"
|
||||
#include "dic.h"
|
||||
#include "regexp.h"
|
||||
#include "libdic_a-er.h"
|
||||
|
|
2
dic/er.y
2
dic/er.y
|
@ -23,8 +23,8 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "dic.h"
|
||||
#include "dic_internals.h"
|
||||
#include "dic.h"
|
||||
|
||||
#include "dic.h"
|
||||
#include "regexp.h"
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stddef.h>
|
||||
#include "dic_internals.h"
|
||||
#include "dic.h"
|
||||
|
||||
|
@ -50,6 +51,7 @@ print_dic_rec(FILE* out, Dictionary dic, char *buf, char* s, Dawg_edge i)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
dic_load(Dictionary* dic, char* filename)
|
||||
{
|
||||
|
@ -68,6 +70,7 @@ dic_load(Dictionary* dic, char* filename)
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
print_dic_list(char* filename, char* out)
|
||||
{
|
||||
|
@ -91,85 +94,47 @@ print_dic_list(char* filename, char* out)
|
|||
Dic_destroy(dic);
|
||||
}
|
||||
|
||||
char
|
||||
b2h(int i)
|
||||
{
|
||||
if (i < 10)
|
||||
return i+'0';
|
||||
return i-10+'a';
|
||||
}
|
||||
|
||||
char*
|
||||
hexb(unsigned char h)
|
||||
{
|
||||
static char buf[3];
|
||||
buf[0] = b2h((h & 0xf0) >> 4);
|
||||
buf[1] = b2h((h & 0x0f));
|
||||
buf[2] = 0;
|
||||
return buf;
|
||||
}
|
||||
|
||||
char*
|
||||
hexl(unsigned int h)
|
||||
{
|
||||
static char buf[9];
|
||||
int i;
|
||||
for(i=0; i<4; i++)
|
||||
{
|
||||
int l = h >> (24 - i*8);
|
||||
buf[i*2+0] = b2h((l & 0xf0) >> 4);
|
||||
buf[i*2+1] = b2h((l & 0x0f));
|
||||
}
|
||||
buf[8] = 0;
|
||||
return buf;
|
||||
}
|
||||
|
||||
char*
|
||||
offset(void* base, void* off)
|
||||
{
|
||||
static char buf[20];
|
||||
int o = (char*)off - (char*)base;
|
||||
sprintf(buf,"%s",hexb(o));
|
||||
return buf;
|
||||
}
|
||||
|
||||
void
|
||||
print_header(char* filename)
|
||||
{
|
||||
FILE* file;
|
||||
Dict_header header;
|
||||
|
||||
if ((file = fopen(filename,"rb")) == NULL)
|
||||
return;
|
||||
if (fread(&header,sizeof(Dict_header),1,file) != 1)
|
||||
return;
|
||||
fclose(file);
|
||||
Dic_check_header(&header,filename);
|
||||
|
||||
#define OO(IDENT) offsetof(Dict_header,IDENT)
|
||||
|
||||
printf("Dictionary header information\n");
|
||||
printf("0x%s ident : %s\n",offset(&header,&header.ident),header.ident);
|
||||
printf("0x%s unused 1 : %6d %s\n",offset(&header,&header.unused_1) ,header.unused_1 ,hexl(header.unused_1));
|
||||
printf("0x%s unused 2 : %6d %s\n",offset(&header,&header.unused_2) ,header.unused_2 ,hexl(header.unused_2));
|
||||
printf("0x%s root : %6d %s\n",offset(&header,&header.root) ,header.root ,hexl(header.root));
|
||||
printf("0x%s words : %6d %s\n",offset(&header,&header.nwords) ,header.nwords ,hexl(header.nwords));
|
||||
printf("0x%s edges used : %6d %s\n",offset(&header,&header.edgesused) ,header.edgesused ,hexl(header.edgesused));
|
||||
printf("0x%s nodes used : %6d %s\n",offset(&header,&header.nodesused) ,header.nodesused ,hexl(header.nodesused));
|
||||
printf("0x%s nodes saved : %6d %s\n",offset(&header,&header.nodessaved),header.nodessaved,hexl(header.nodessaved));
|
||||
printf("0x%s edges saved : %6d %s\n",offset(&header,&header.edgessaved),header.edgessaved,hexl(header.edgessaved));
|
||||
printf("0x%02x ident : %s\n", OO(ident) ,header.ident);
|
||||
printf("0x%02x unused 1 : %6d %06x\n",OO(unused_1) ,header.unused_1 ,header.unused_1);
|
||||
printf("0x%02x unused 2 : %6d %06x\n",OO(unused_2) ,header.unused_2 ,header.unused_2);
|
||||
printf("0x%02x root : %6d %06x\n",OO(root) ,header.root ,header.root);
|
||||
printf("0x%02x words : %6d %06x\n",OO(nwords) ,header.nwords ,header.nwords);
|
||||
printf("0x%02x edges used : %6d %06x\n",OO(edgesused) ,header.edgesused ,header.edgesused);
|
||||
printf("0x%02x nodes used : %6d %06x\n",OO(nodesused) ,header.nodesused ,header.nodesused);
|
||||
printf("0x%02x nodes saved : %6d %06x\n",OO(nodessaved),header.nodessaved,header.nodessaved);
|
||||
printf("0x%02x edges saved : %6d %06x\n",OO(edgessaved),header.edgessaved,header.edgessaved);
|
||||
printf("\n");
|
||||
printf("sizeof(header) = 0x%s (%lu)\n", hexb(sizeof(header)), sizeof(header));
|
||||
printf("sizeof(header) = 0x%x (%u)\n", sizeof(header), sizeof(header));
|
||||
}
|
||||
|
||||
void
|
||||
print_node_hex(int i, Dictionary dic)
|
||||
|
||||
static void
|
||||
print_node_hex(Dictionary dic, int i)
|
||||
{
|
||||
unsigned int* pe;
|
||||
Dawg_edge e = dic->dawg[i];
|
||||
pe = (unsigned int*)&e;
|
||||
printf("0x%s %s |%2d ptr=%2d t=%d l=%d f=%d chr=%d (%c)\n",
|
||||
offset(&(dic->dawg[0]),&(dic->dawg[i])),hexl(*pe),i,
|
||||
e.ptr, e.term, e.last, e.fill, e.chr, e.chr +'a' -1);
|
||||
union edge_t {
|
||||
Dawg_edge e;
|
||||
uint32_t s;
|
||||
} ee;
|
||||
|
||||
ee.e = dic->dawg[i];
|
||||
|
||||
printf("0x%04x %08x |%4d ptr=%8d t=%d l=%d f=%d chr=%2d (%c)\n",
|
||||
i*sizeof(ee), (unsigned int)(ee.s),
|
||||
i, ee.e.ptr, ee.e.term, ee.e.last, ee.e.fill, ee.e.chr, ee.e.chr +'a' -1);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
print_dic_hex(char* filename)
|
||||
{
|
||||
|
@ -180,10 +145,11 @@ print_dic_hex(char* filename)
|
|||
printf("offs binary structure \n");
|
||||
printf("---- -------- | ------------------\n");
|
||||
for(i=0; i < (dic->nedges + 1); i++)
|
||||
print_node_hex(i,dic);
|
||||
print_node_hex(dic,i);
|
||||
Dic_destroy(dic);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
usage(char* name)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue