some more doc on dictionary and expected regexp syntax

2025-01-17 06:11:49 +01:00 · 2005-04-19 16:25:06 +00:00 · 2005-04-19 16:25:06 +00:00 · b1e67d947b
commit b1e67d947b
parent 1077565ace
2 changed files with 22 additions and 8 deletions
--- a/doc/dic.txt
+++ b/doc/dic.txt
@ -73,7 +73,7 @@ The header structure is the following:

 #define _COMPIL_KEYWORD_ "_COMPILED_DICTIONARY_"

-typedef struct _Dict_header {
+typedef struct _Dict_header {              // offset
  char ident[sizeof(_COMPIL_KEYWORD_)];    // 0x00
  char unused_1;                           // 0x16
  char unused_2;                           // 0x17
@ -82,7 +82,7 @@ typedef struct _Dict_header {
  unsigned int edgesused;                  // 0x20
  unsigned int nodesused;                  // 0x24
  unsigned int nodessaved;                 // 0x2c
-  unsigned int edgessaved;                 // 0x28
+  unsigned int edgessaved;                 // 0x30
 } Dict_header;

 binary output of the header:
@ -98,11 +98,11 @@ binary output of the header:
 0x2c edges saved :      1 00000001
 ===================================================================

-The real array of data begins at offset 0x30. Integer are stored in a
+The real array of data begins at offset 0x34. Integer are stored in a
 machine dependent way. This dictionary was compiled on a i386 and is
-not readable on a machine with a different endianess.  The array is
-stored 'as is' right after the header. Each array cell is a
-bit-structure:
+not readable on a machine with a different endianess (unless swapping
+all necessary information).  The array is stored 'as is' right after 
+the header. Each array cell is a bit-structure on 4 bytes :

 typedef struct _Dawg_edge { 
   unsigned int ptr  : 24; 
@ -115,8 +115,8 @@ typedef struct _Dawg_edge {
 Characters are not stored in ASCII. The order is preserved but
 we changed the values: A=1, B=2, ... This is very easy to do 
 with the ASCII table as ('A' & 0x1f) == ('a' & 0x1f) == 1.
-This may not work on machines that are not using ASCII (like 
-Macintosh.)
+This may not work on machines that are not using ASCII. The dictionary
+can thus handle up to 32 different letters but not more.

 offs binary       structure         
 ---- -------- |   ------------------
--- a/doc/regexp.tex
+++ b/doc/regexp.tex
@ -8,6 +8,10 @@ expressions rationnelles habituelles.

 \section{utilisation}

+Les mots recherchés sont complets : la recherche d'une expression
+\verb=e= correspond à l'expression \verb=^e$=. Pour rechercher un 
+motif \verb=m= dans un mot il faut donc utiliser l'expression \verb=.*m.*=
+
 \subsection{caractères}

 \begin{itemize}
@ -15,6 +19,8 @@ expressions rationnelles habituelles.
 \item \texttt{.} :  n'importe quel caractère
 \item \texttt{:v:} : n'importe quelle voyelle 
 \item \texttt{:c:} : n'importe quelle consonne
+\item \texttt{:1:} : liste 1 définie par l'utilisateur
+\item \texttt{:2:} : liste 2 définie par l'utilisateur
 \end{itemize}

 \subsection{répétitions}
@ -38,4 +44,12 @@ expressions rationnelles habituelles.

 \subsection{exemples}

+\begin{itemize}
+\item \verb=a.*= : liste des mots débutant par la lettre \verb=a=
+\item \verb=.*a= : liste des mots se terminant par la lettre \verb=a=
+\item \verb=.*oula.*= : liste des mots contenant le motif \verb=oula=
+\item \verb=a.*b= : liste des mots débutant par \verb=a= et se terminant par \verb=b=
+\item \verb=.*a.*e.*i.*o.*u.*= : liste des mots contenant les les lettres \verb=aeiou= dans l'ordre
+\end{itemize}
+
 \end{document}