mirror of
git://git.savannah.nongnu.org/eliot.git
synced 2025-01-18 10:26:15 +01:00
- add load dictionnary functions for bigendian arch
regression test is ok on linux/ppc
This commit is contained in:
parent
d362281fba
commit
86c65ebc4a
7 changed files with 155 additions and 87 deletions
|
@ -33,8 +33,10 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
|
|
||||||
#include "hashtable.h"
|
#include "hashtable.h"
|
||||||
#include "dic_internals.h"
|
#include "dic_internals.h"
|
||||||
|
#include "dic.h"
|
||||||
|
|
||||||
//#define DEBUG_LIST
|
//#define DEBUG_LIST
|
||||||
//#define DEBUG_OUTPUT
|
//#define DEBUG_OUTPUT
|
||||||
|
@ -103,7 +105,13 @@ fix_header(FILE* outfile, Dict_header* header)
|
||||||
strcpy(header->ident,_COMPIL_KEYWORD_);
|
strcpy(header->ident,_COMPIL_KEYWORD_);
|
||||||
header->root = header->edgesused;
|
header->root = header->edgesused;
|
||||||
rewind (outfile);
|
rewind (outfile);
|
||||||
|
#if defined(WORDS_BIGENDIAN)
|
||||||
|
#warning "**********************************************"
|
||||||
|
#warning "compdic does not run yet on bigendian machines"
|
||||||
|
#warning "**********************************************"
|
||||||
|
#else
|
||||||
fwrite (header, sizeof(Dict_header), 1, outfile);
|
fwrite (header, sizeof(Dict_header), 1, outfile);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -111,14 +119,14 @@ void
|
||||||
print_header_info(Dict_header *header)
|
print_header_info(Dict_header *header)
|
||||||
{
|
{
|
||||||
printf("============================\n");
|
printf("============================\n");
|
||||||
printf("keyword length %lu bytes\n", strlen(_COMPIL_KEYWORD_));
|
printf("keyword length %u bytes\n", strlen(_COMPIL_KEYWORD_));
|
||||||
printf("keyword size %lu bytes\n", sizeof(_COMPIL_KEYWORD_));
|
printf("keyword size %u bytes\n", sizeof(_COMPIL_KEYWORD_));
|
||||||
printf("header size %lu bytes\n", sizeof(Dict_header));
|
printf("header size %u bytes\n", sizeof(Dict_header));
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("%d words\n",header->nwords);
|
printf("%d words\n",header->nwords);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("root : %7d (edge)\n",header->root);
|
printf("root : %7d (edge)\n",header->root);
|
||||||
printf("root : %7lu (byte)\n",header->root * sizeof(Dawg_edge));
|
printf("root : %7u (byte)\n",header->root * sizeof(Dawg_edge));
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("nodes : %d+%d\n",header->nodesused, header->nodessaved);
|
printf("nodes : %d+%d\n",header->nodesused, header->nodessaved);
|
||||||
printf("edges : %d+%d\n",header->edgesused, header->edgessaved);
|
printf("edges : %d+%d\n",header->edgesused, header->edgessaved);
|
||||||
|
|
76
dic/dic.c
76
dic/dic.c
|
@ -29,18 +29,76 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
#include "dic_internals.h"
|
#include "dic_internals.h"
|
||||||
#include "dic.h"
|
#include "dic.h"
|
||||||
|
|
||||||
|
#if defined(WORDS_BIGENDIAN)
|
||||||
|
static uint32_t swap4(uint32_t v)
|
||||||
|
{
|
||||||
|
uint32_t r;
|
||||||
|
uint8_t *pv,*pr;
|
||||||
|
|
||||||
|
pv = (uint8_t*)&v;
|
||||||
|
pr = (uint8_t*)&r;
|
||||||
|
|
||||||
|
pr[0] = pv[3];
|
||||||
|
pr[1] = pv[2];
|
||||||
|
pr[2] = pv[1];
|
||||||
|
pr[3] = pv[0];
|
||||||
|
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static int
|
static int
|
||||||
check_header(FILE* file, Dict_header *header)
|
Dic_read_convert_header(Dict_header *header, FILE* file)
|
||||||
{
|
{
|
||||||
|
|
||||||
if (fread(header,sizeof(Dict_header),1,file) != 1)
|
if (fread(header,sizeof(Dict_header),1,file) != 1)
|
||||||
return 1;
|
return 1;
|
||||||
return strcmp(header->ident,_COMPIL_KEYWORD_);
|
|
||||||
|
#if defined(WORDS_BIGENDIAN)
|
||||||
|
header->root = swap4(header->root);
|
||||||
|
header->nwords = swap4(header->nwords);
|
||||||
|
header->nodesused = swap4(header->nodesused);
|
||||||
|
header->edgesused = swap4(header->edgesused);
|
||||||
|
header->nodessaved = swap4(header->nodessaved);
|
||||||
|
header->edgessaved = swap4(header->edgessaved);
|
||||||
|
#else
|
||||||
|
|
||||||
|
#endif
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
Dic_check_header(Dict_header *header, const char *path)
|
||||||
|
{
|
||||||
|
int r;
|
||||||
|
FILE* file;
|
||||||
|
if ((file = fopen(path,"rb")) == NULL)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
r = Dic_read_convert_header(header,file);
|
||||||
|
fclose(file);
|
||||||
|
|
||||||
|
return r || strcmp(header->ident,_COMPIL_KEYWORD_);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
Dic_convert_data_to_arch(Dictionary dic)
|
||||||
|
{
|
||||||
|
#if defined(WORDS_BIGENDIAN)
|
||||||
|
int i;
|
||||||
|
uint32_t* p;
|
||||||
|
p = (uint32_t*)dic->dawg;
|
||||||
|
for(i=0; i < (dic->nedges + 1); i++)
|
||||||
|
{
|
||||||
|
p[i] = swap4(p[i]);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
Dic_load(Dictionary *dic, const char* path)
|
Dic_load(Dictionary *dic, const char* path)
|
||||||
|
@ -48,20 +106,23 @@ Dic_load(Dictionary *dic, const char* path)
|
||||||
FILE* file;
|
FILE* file;
|
||||||
Dict_header header;
|
Dict_header header;
|
||||||
|
|
||||||
|
|
||||||
*dic = NULL;
|
*dic = NULL;
|
||||||
if ((file = fopen(path,"rb")) == NULL)
|
if ((file = fopen(path,"rb")) == NULL)
|
||||||
return 1;
|
return 1;
|
||||||
if (check_header(file,&header))
|
|
||||||
return 2;
|
Dic_read_convert_header(&header,file);
|
||||||
|
|
||||||
if ((*dic = (Dictionary) malloc(sizeof(struct _Dictionary))) == NULL)
|
if ((*dic = (Dictionary) malloc(sizeof(struct _Dictionary))) == NULL)
|
||||||
return 3;
|
return 3;
|
||||||
if (((*dic)->dawg = (Dawg_edge*)malloc((header.edgesused + 1)*
|
|
||||||
sizeof(Dawg_edge))) == NULL)
|
if (((*dic)->dawg = (Dawg_edge*)malloc((header.edgesused + 1)*sizeof(Dawg_edge))) == NULL)
|
||||||
{
|
{
|
||||||
free(*dic);
|
free(*dic);
|
||||||
*dic = NULL;
|
*dic = NULL;
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fread((*dic)->dawg,sizeof(Dawg_edge),header.edgesused + 1,file) !=
|
if (fread((*dic)->dawg,sizeof(Dawg_edge),header.edgesused + 1,file) !=
|
||||||
(header.edgesused + 1))
|
(header.edgesused + 1))
|
||||||
{
|
{
|
||||||
|
@ -70,11 +131,14 @@ Dic_load(Dictionary *dic, const char* path)
|
||||||
*dic = NULL;
|
*dic = NULL;
|
||||||
return 5;
|
return 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
(*dic)->root = header.root;
|
(*dic)->root = header.root;
|
||||||
(*dic)->nwords = header.nwords;
|
(*dic)->nwords = header.nwords;
|
||||||
(*dic)->nnodes = header.nodesused;
|
(*dic)->nnodes = header.nodesused;
|
||||||
(*dic)->nedges = header.edgesused;
|
(*dic)->nedges = header.edgesused;
|
||||||
|
|
||||||
|
Dic_convert_data_to_arch(*dic);
|
||||||
|
|
||||||
fclose(file);
|
fclose(file);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
12
dic/dic.h
12
dic/dic.h
|
@ -41,10 +41,20 @@ extern "C"
|
||||||
*/
|
*/
|
||||||
#define DIC_WORD_MAX 16
|
#define DIC_WORD_MAX 16
|
||||||
|
|
||||||
typedef struct _Dictionary* Dictionary;
|
typedef struct _Dict_header Dict_header;
|
||||||
|
typedef struct _Dictionary *Dictionary;
|
||||||
typedef unsigned int dic_elt_t;
|
typedef unsigned int dic_elt_t;
|
||||||
typedef unsigned char dic_code_t;
|
typedef unsigned char dic_code_t;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dictionary header loading from a file
|
||||||
|
* @param dic : pointer to a header
|
||||||
|
* @param path : compressed dictionary path
|
||||||
|
* @return 0 ok, otherwise error
|
||||||
|
*/
|
||||||
|
int Dic_check_header(Dict_header *header, const char* path);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Dictionary creation and loading from a file
|
* Dictionary creation and loading from a file
|
||||||
* @param dic : pointer to a dictionary
|
* @param dic : pointer to a dictionary
|
||||||
|
|
|
@ -31,6 +31,9 @@ extern "C"
|
||||||
{
|
{
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* bit masking for ascii characters \n
|
* bit masking for ascii characters \n
|
||||||
* ('a' & CHAR) == ('A' & CHAR) == 1
|
* ('a' & CHAR) == ('A' & CHAR) == 1
|
||||||
|
@ -57,15 +60,30 @@ extern "C"
|
||||||
* ----------------
|
* ----------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
typedef struct _Dawg_edge {
|
#if defined(WORDS_BIGENDIAN)
|
||||||
unsigned int ptr : 24;
|
struct __attribute__ ((packed)) _Dawg_edge {
|
||||||
unsigned int term : 1;
|
uint32_t
|
||||||
unsigned int last : 1;
|
chr : 5,
|
||||||
unsigned int fill : 1;
|
fill : 1,
|
||||||
unsigned int chr : 5;
|
last : 1,
|
||||||
} Dawg_edge;
|
term : 1,
|
||||||
|
ptr : 24;
|
||||||
|
};
|
||||||
|
#else
|
||||||
|
struct __attribute__ ((packed)) _Dawg_edge {
|
||||||
|
uint32_t
|
||||||
|
ptr : 24,
|
||||||
|
term : 1,
|
||||||
|
last : 1,
|
||||||
|
fill : 1,
|
||||||
|
chr : 5;
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
typedef struct _Dict_header {
|
typedef struct _Dawg_edge Dawg_edge;
|
||||||
|
|
||||||
|
|
||||||
|
struct _Dict_header {
|
||||||
char ident[sizeof(_COMPIL_KEYWORD_)];
|
char ident[sizeof(_COMPIL_KEYWORD_)];
|
||||||
char unused_1;
|
char unused_1;
|
||||||
char unused_2;
|
char unused_2;
|
||||||
|
@ -75,7 +93,8 @@ typedef struct _Dict_header {
|
||||||
unsigned int nodesused;
|
unsigned int nodesused;
|
||||||
unsigned int nodessaved;
|
unsigned int nodessaved;
|
||||||
unsigned int edgessaved;
|
unsigned int edgessaved;
|
||||||
} Dict_header;
|
};
|
||||||
|
|
||||||
|
|
||||||
struct _Dictionary
|
struct _Dictionary
|
||||||
{
|
{
|
||||||
|
|
1
dic/er.l
1
dic/er.l
|
@ -18,6 +18,7 @@
|
||||||
/* along with this program; if not, write to the Free Software */
|
/* along with this program; if not, write to the Free Software */
|
||||||
/* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
/* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
||||||
|
|
||||||
|
#include "dic_internals.h"
|
||||||
#include "dic.h"
|
#include "dic.h"
|
||||||
#include "regexp.h"
|
#include "regexp.h"
|
||||||
#include "libdic_a-er.h"
|
#include "libdic_a-er.h"
|
||||||
|
|
2
dic/er.y
2
dic/er.y
|
@ -23,8 +23,8 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include "dic.h"
|
|
||||||
#include "dic_internals.h"
|
#include "dic_internals.h"
|
||||||
|
#include "dic.h"
|
||||||
|
|
||||||
#include "dic.h"
|
#include "dic.h"
|
||||||
#include "regexp.h"
|
#include "regexp.h"
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stddef.h>
|
||||||
#include "dic_internals.h"
|
#include "dic_internals.h"
|
||||||
#include "dic.h"
|
#include "dic.h"
|
||||||
|
|
||||||
|
@ -50,6 +51,7 @@ print_dic_rec(FILE* out, Dictionary dic, char *buf, char* s, Dawg_edge i)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
dic_load(Dictionary* dic, char* filename)
|
dic_load(Dictionary* dic, char* filename)
|
||||||
{
|
{
|
||||||
|
@ -68,6 +70,7 @@ dic_load(Dictionary* dic, char* filename)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
print_dic_list(char* filename, char* out)
|
print_dic_list(char* filename, char* out)
|
||||||
{
|
{
|
||||||
|
@ -91,85 +94,47 @@ print_dic_list(char* filename, char* out)
|
||||||
Dic_destroy(dic);
|
Dic_destroy(dic);
|
||||||
}
|
}
|
||||||
|
|
||||||
char
|
|
||||||
b2h(int i)
|
|
||||||
{
|
|
||||||
if (i < 10)
|
|
||||||
return i+'0';
|
|
||||||
return i-10+'a';
|
|
||||||
}
|
|
||||||
|
|
||||||
char*
|
|
||||||
hexb(unsigned char h)
|
|
||||||
{
|
|
||||||
static char buf[3];
|
|
||||||
buf[0] = b2h((h & 0xf0) >> 4);
|
|
||||||
buf[1] = b2h((h & 0x0f));
|
|
||||||
buf[2] = 0;
|
|
||||||
return buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
char*
|
|
||||||
hexl(unsigned int h)
|
|
||||||
{
|
|
||||||
static char buf[9];
|
|
||||||
int i;
|
|
||||||
for(i=0; i<4; i++)
|
|
||||||
{
|
|
||||||
int l = h >> (24 - i*8);
|
|
||||||
buf[i*2+0] = b2h((l & 0xf0) >> 4);
|
|
||||||
buf[i*2+1] = b2h((l & 0x0f));
|
|
||||||
}
|
|
||||||
buf[8] = 0;
|
|
||||||
return buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
char*
|
|
||||||
offset(void* base, void* off)
|
|
||||||
{
|
|
||||||
static char buf[20];
|
|
||||||
int o = (char*)off - (char*)base;
|
|
||||||
sprintf(buf,"%s",hexb(o));
|
|
||||||
return buf;
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
print_header(char* filename)
|
print_header(char* filename)
|
||||||
{
|
{
|
||||||
FILE* file;
|
|
||||||
Dict_header header;
|
Dict_header header;
|
||||||
|
|
||||||
if ((file = fopen(filename,"rb")) == NULL)
|
Dic_check_header(&header,filename);
|
||||||
return;
|
|
||||||
if (fread(&header,sizeof(Dict_header),1,file) != 1)
|
#define OO(IDENT) offsetof(Dict_header,IDENT)
|
||||||
return;
|
|
||||||
fclose(file);
|
|
||||||
|
|
||||||
printf("Dictionary header information\n");
|
printf("Dictionary header information\n");
|
||||||
printf("0x%s ident : %s\n",offset(&header,&header.ident),header.ident);
|
printf("0x%02x ident : %s\n", OO(ident) ,header.ident);
|
||||||
printf("0x%s unused 1 : %6d %s\n",offset(&header,&header.unused_1) ,header.unused_1 ,hexl(header.unused_1));
|
printf("0x%02x unused 1 : %6d %06x\n",OO(unused_1) ,header.unused_1 ,header.unused_1);
|
||||||
printf("0x%s unused 2 : %6d %s\n",offset(&header,&header.unused_2) ,header.unused_2 ,hexl(header.unused_2));
|
printf("0x%02x unused 2 : %6d %06x\n",OO(unused_2) ,header.unused_2 ,header.unused_2);
|
||||||
printf("0x%s root : %6d %s\n",offset(&header,&header.root) ,header.root ,hexl(header.root));
|
printf("0x%02x root : %6d %06x\n",OO(root) ,header.root ,header.root);
|
||||||
printf("0x%s words : %6d %s\n",offset(&header,&header.nwords) ,header.nwords ,hexl(header.nwords));
|
printf("0x%02x words : %6d %06x\n",OO(nwords) ,header.nwords ,header.nwords);
|
||||||
printf("0x%s edges used : %6d %s\n",offset(&header,&header.edgesused) ,header.edgesused ,hexl(header.edgesused));
|
printf("0x%02x edges used : %6d %06x\n",OO(edgesused) ,header.edgesused ,header.edgesused);
|
||||||
printf("0x%s nodes used : %6d %s\n",offset(&header,&header.nodesused) ,header.nodesused ,hexl(header.nodesused));
|
printf("0x%02x nodes used : %6d %06x\n",OO(nodesused) ,header.nodesused ,header.nodesused);
|
||||||
printf("0x%s nodes saved : %6d %s\n",offset(&header,&header.nodessaved),header.nodessaved,hexl(header.nodessaved));
|
printf("0x%02x nodes saved : %6d %06x\n",OO(nodessaved),header.nodessaved,header.nodessaved);
|
||||||
printf("0x%s edges saved : %6d %s\n",offset(&header,&header.edgessaved),header.edgessaved,hexl(header.edgessaved));
|
printf("0x%02x edges saved : %6d %06x\n",OO(edgessaved),header.edgessaved,header.edgessaved);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf("sizeof(header) = 0x%s (%lu)\n", hexb(sizeof(header)), sizeof(header));
|
printf("sizeof(header) = 0x%x (%u)\n", sizeof(header), sizeof(header));
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
print_node_hex(int i, Dictionary dic)
|
static void
|
||||||
|
print_node_hex(Dictionary dic, int i)
|
||||||
{
|
{
|
||||||
unsigned int* pe;
|
union edge_t {
|
||||||
Dawg_edge e = dic->dawg[i];
|
Dawg_edge e;
|
||||||
pe = (unsigned int*)&e;
|
uint32_t s;
|
||||||
printf("0x%s %s |%2d ptr=%2d t=%d l=%d f=%d chr=%d (%c)\n",
|
} ee;
|
||||||
offset(&(dic->dawg[0]),&(dic->dawg[i])),hexl(*pe),i,
|
|
||||||
e.ptr, e.term, e.last, e.fill, e.chr, e.chr +'a' -1);
|
ee.e = dic->dawg[i];
|
||||||
|
|
||||||
|
printf("0x%04x %08x |%4d ptr=%8d t=%d l=%d f=%d chr=%2d (%c)\n",
|
||||||
|
i*sizeof(ee), (unsigned int)(ee.s),
|
||||||
|
i, ee.e.ptr, ee.e.term, ee.e.last, ee.e.fill, ee.e.chr, ee.e.chr +'a' -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
print_dic_hex(char* filename)
|
print_dic_hex(char* filename)
|
||||||
{
|
{
|
||||||
|
@ -180,10 +145,11 @@ print_dic_hex(char* filename)
|
||||||
printf("offs binary structure \n");
|
printf("offs binary structure \n");
|
||||||
printf("---- -------- | ------------------\n");
|
printf("---- -------- | ------------------\n");
|
||||||
for(i=0; i < (dic->nedges + 1); i++)
|
for(i=0; i < (dic->nedges + 1); i++)
|
||||||
print_node_hex(i,dic);
|
print_node_hex(dic,i);
|
||||||
Dic_destroy(dic);
|
Dic_destroy(dic);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
usage(char* name)
|
usage(char* name)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue