From 3241a1c1af367722ed7a1ea3829546d4b2ff922c Mon Sep 17 00:00:00 2001 From: Antoine Fraboulet Date: Mon, 21 Jun 2004 16:06:54 +0000 Subject: [PATCH] ajout de regexp.c --- dic/regexp.c | 302 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 302 insertions(+) create mode 100644 dic/regexp.c diff --git a/dic/regexp.c b/dic/regexp.c new file mode 100644 index 0000000..2f77443 --- /dev/null +++ b/dic/regexp.c @@ -0,0 +1,302 @@ +/* Eliot */ +/* Copyright (C) 1999 antoine.fraboulet */ +/* antoine.fraboulet@free.fr */ +/* */ +/* This program is free software; you can redistribute it and/or modify */ +/* it under the terms of the GNU General Public License as published by */ +/* the Free Software Foundation; either version 2 of the License, or */ +/* (at your option) any later version. */ +/* */ +/* This program is distributed in the hope that it will be useful, */ +/* but WITHOUT ANY WARRANTY; without even the implied warranty of */ +/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ +/* GNU General Public License for more details. */ +/* */ +/* You should have received a copy of the GNU General Public License */ +/* along with this program; if not, write to the Free Software */ +/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* + * $Id: regexp.c,v 1.1 2004/06/21 16:06:54 afrab Exp $ + */ +#include +#include +#include +#include "regexp.h" +#include "automaton.h" + +#define MAX 32 + +#ifndef PDBG +#ifdef DEBUG +#define PDBG(x) { x ; } +#else +#define PDBG(x) { } +#endif +#endif + +////////////////////////////////////////////////// +// position, annulable, PP, DP +// r = root +// p = current leaf position +// n = current node number +// ptl = position to letter +////////////////////////////////////////////////// + +void parcours(NODE* r, int *p, int *n, int ptl[]) +{ + if (r == NULL) + return; + + parcours(r->fg,p,n,ptl); + parcours(r->fd,p,n,ptl); + + switch (r->type) + { + case NODE_VAR: + r->position = *p; + ptl[*p] = r->var; + *p = *p + 1; + + r->annulable = 0; + r->PP = 1 << (r->position - 1); + r->DP = 1 << (r->position - 1); + break; + case NODE_OR: + r->position = 0; + r->annulable = r->fg->annulable || r->fd->annulable; + r->PP = r->fg->PP | r->fd->PP; + r->DP = r->fg->DP | r->fd->DP; + break; + case NODE_AND: + r->position = 0; + r->annulable = r->fg->annulable && r->fd->annulable; + r->PP = (r->fg->annulable) ? (r->fg->PP | r->fd->PP) : r->fg->PP; + r->DP = (r->fd->annulable) ? (r->fg->DP | r->fd->DP) : r->fd->DP; + break; + case NODE_STAR: + r->position = 0; + r->annulable = 1; + r->PP = r->fg->PP; + r->DP = r->fg->DP; + break; + } + + r->numero = *n; + *n = *n + 1; + + PDBG(print_node(r)); +} + +////////////////////////////////////////////////// +// PosSuivante +////////////////////////////////////////////////// + +void possuivante(NODE* r, int PS[]) +{ + int pos; + if (r == NULL) + return; + + possuivante(r->fg,PS); + possuivante(r->fd,PS); + + switch (r->type) + { + case NODE_AND: + for(pos=1; pos <= PS[0]; pos++) + { + if (r->fg->DP & (1 << (pos-1))) + PS[pos] |= r->fd->PP; + } + break; + case NODE_STAR: + for(pos=1; pos <= PS[0]; pos++) + { + if (r->DP & (1 << (pos-1))) + PS[pos] |= r->PP; + } + break; + } +} + + +////////////////////////////////////////////////// +////////////////////////////////////////////////// +#if 0 +void print_node(NODE *n) +{ + if (n == NULL) + return; + + switch (n->type) + { + case NODE_VAR: + printf("%c (%d)",n->var,n->position); + break; + case NODE_OR: + printf("OR"); + break; + case NODE_AND: + printf("AND"); + break; + case NODE_STAR: + printf("STAR"); + break; + } + printf("\tannulable: %d",n->annulable); + printf(" PP: 0x%08x",n->PP); + printf(" DP: 0x%08x\n",n->DP); +} + +////////////////////////////////////////////////// +////////////////////////////////////////////////// + +void print_PS(int PS[]) +{ + int i; + printf("** positions suivantes **\n"); + for(i=1; i <= PS[0]; i++) + { + printf("%02d: 0x%08x\n", i, PS[i]); + } +} + +////////////////////////////////////////////////// +////////////////////////////////////////////////// + +void print_ptl(int ptl[]) +{ + int i; + printf("** pos -> lettre: "); + for(i=1; i <= ptl[0]; i++) + { + printf("%d=%c ",i,ptl[i]); + } + printf("\n"); +} + +////////////////////////////////////////////////// +////////////////////////////////////////////////// + +void print_tree_nodes(FILE* f, NODE* n) +{ + if (n == NULL) + return; + + print_tree_nodes(f,n->fg); + print_tree_nodes(f,n->fd); + + fprintf(f,"%d [ label=\"",n->numero); + switch (n->type) + { + case NODE_VAR: + fprintf(f,"%c (%d)",n->var,n->position); + break; + case NODE_OR: + fprintf(f,"OR"); + break; + case NODE_AND: + fprintf(f,"AND"); + break; + case NODE_STAR: + fprintf(f,"*"); + break; + } + fprintf(f,"\\n annulable=%d\\n PP=0x%08x\\n DP=0x%08x\"];\n",n->annulable,n->PP,n->DP); +} + +void print_tree_edges(FILE* f, NODE* n) +{ + if (n == NULL) + return; + + print_tree_edges(f,n->fg); + print_tree_edges(f,n->fd); + + switch (n->type) + { + case NODE_OR: + fprintf(f,"%d -> %d;",n->numero,n->fg->numero); + fprintf(f,"%d -> %d;",n->numero,n->fd->numero); + break; + case NODE_AND: + fprintf(f,"%d -> %d;",n->numero,n->fg->numero); + fprintf(f,"%d -> %d;",n->numero,n->fd->numero); + break; + case NODE_STAR: + fprintf(f,"%d -> %d;",n->numero,n->fg->numero); + break; + } +} + +void print_tree(NODE* n) +{ + FILE* f; + pid_t pid; + char name[] = "tree"; + + f=fopen(name,"w"); + fprintf(f,"digraph %s {\n",name); + print_tree_nodes(f,n); + print_tree_edges(f,n); + fprintf(f,"fontsize=20;\n"); + fprintf(f,"}\n"); + fclose(f); + + pid = fork (); + if (pid > 0) { + wait(NULL); + } else if (pid == 0) { + execlp("dotty","dotty",name,NULL); + printf("exec dotty failed\n"); + exit(1); + } +} +#endif +////////////////////////////////////////////////// +////////////////////////////////////////////////// +#if 0 +int main(int argc, char* argv[]) +{ + int i,p,n; + int ptl[MAX+1]; // mapping postition -> lettre + int PS [MAX+1]; // Position Suivante [ 1 << (position-1)] = \cup { 1 << (p-1) | p \in position acceptée } + automaton a,b; + + for(i=0; i < MAX; i++) + { + PS[i] = 0; + ptl[i] = 0; + } + + yyparse(); + + n = 1; + p = 1; + parcours(root,&p,&n,ptl); + PS [0] = p - 1; + ptl[0] = p - 1; + PDBG(printf("** regexp: nombre de terminaux: %d\n",PS[0])); + PDBG(printf("** regexp: nombre de noeuds dans l'arbre: %d\n",n)); + PDBG(print_ptl(ptl)); + + possuivante(root,PS); + PDBG(print_tree(root)); + PDBG(print_PS(PS)); + + automaton_build(&a,root->PP,ptl,PS); + PDBG(printf("** auto: nombre d'états: %d\n",a.nstate)); + print_automaton(&a); + + automaton_minimize(&a,&b); + PDBG(printf("** auto: nombre d'états: %d\n",b.nstate)); + PDBG(print_automaton(&b)); + + automaton_destroy(&a); + automaton_destroy(&b); + return 0; +} +#endif +////////////////////////////////////////////////// +////////////////////////////////////////////////// +