2004-06-21 18:06:54 +02:00
|
|
|
|
/* Eliot */
|
2005-05-06 01:45:04 +02:00
|
|
|
|
/* Copyright (C) 1999 Antoine Fraboulet */
|
2004-06-21 18:06:54 +02:00
|
|
|
|
/* */
|
2005-05-06 01:45:04 +02:00
|
|
|
|
/* This file is part of Eliot. */
|
|
|
|
|
/* */
|
|
|
|
|
/* Eliot is free software; you can redistribute it and/or modify */
|
2004-06-21 18:06:54 +02:00
|
|
|
|
/* it under the terms of the GNU General Public License as published by */
|
|
|
|
|
/* the Free Software Foundation; either version 2 of the License, or */
|
|
|
|
|
/* (at your option) any later version. */
|
|
|
|
|
/* */
|
2006-01-01 20:51:00 +01:00
|
|
|
|
/* Eliot is distributed in the hope that it will be useful, */
|
2004-06-21 18:06:54 +02:00
|
|
|
|
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
|
|
|
|
|
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
|
|
|
|
|
/* GNU General Public License for more details. */
|
|
|
|
|
/* */
|
|
|
|
|
/* You should have received a copy of the GNU General Public License */
|
|
|
|
|
/* along with this program; if not, write to the Free Software */
|
2005-10-23 16:53:42 +02:00
|
|
|
|
/* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
2005-04-19 18:26:50 +02:00
|
|
|
|
|
2005-11-05 18:56:22 +01:00
|
|
|
|
/**
|
|
|
|
|
* \file regexp.c
|
2006-01-01 20:51:00 +01:00
|
|
|
|
* \brief Regular Expression functions
|
2005-11-05 18:56:22 +01:00
|
|
|
|
* \author Antoine Fraboulet
|
|
|
|
|
* \date 2005
|
|
|
|
|
*/
|
|
|
|
|
|
2005-04-09 21:16:09 +02:00
|
|
|
|
#include "config.h"
|
2004-06-21 18:06:54 +02:00
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
2005-04-09 21:16:09 +02:00
|
|
|
|
#ifdef HAVE_SYS_WAIT_H
|
|
|
|
|
# include <sys/wait.h>
|
|
|
|
|
#endif
|
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
2005-04-19 18:26:50 +02:00
|
|
|
|
#include "dic.h"
|
2004-06-21 18:06:54 +02:00
|
|
|
|
#include "regexp.h"
|
|
|
|
|
#include "automaton.h"
|
|
|
|
|
|
|
|
|
|
#ifndef PDBG
|
2005-04-19 18:26:50 +02:00
|
|
|
|
#ifdef DEBUG_RE2
|
2005-11-04 21:00:05 +01:00
|
|
|
|
#define PDBG(x) x
|
2004-06-21 18:06:54 +02:00
|
|
|
|
#else
|
2005-11-04 21:00:05 +01:00
|
|
|
|
#define PDBG(x)
|
2004-06-21 18:06:54 +02:00
|
|
|
|
#endif
|
|
|
|
|
#endif
|
|
|
|
|
|
2005-04-16 22:55:51 +02:00
|
|
|
|
NODE* regexp_createNODE(int type,char v,NODE *fg,NODE *fd)
|
|
|
|
|
{
|
|
|
|
|
NODE *x;
|
|
|
|
|
x=(NODE *)malloc(sizeof(NODE));
|
|
|
|
|
x->type = type;
|
|
|
|
|
x->var = v;
|
|
|
|
|
x->fd = fd;
|
|
|
|
|
x->fg = fg;
|
|
|
|
|
x->numero = 0;
|
|
|
|
|
x->position = 0;
|
|
|
|
|
x->annulable = 0;
|
|
|
|
|
x->PP = 0;
|
|
|
|
|
x->DP = 0;
|
|
|
|
|
return x;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void regexp_delete_tree(NODE *root)
|
|
|
|
|
{
|
|
|
|
|
if (root == NULL)
|
|
|
|
|
return;
|
|
|
|
|
regexp_delete_tree(root->fg);
|
|
|
|
|
regexp_delete_tree(root->fd);
|
|
|
|
|
free(root);
|
|
|
|
|
}
|
|
|
|
|
|
2005-04-19 18:26:50 +02:00
|
|
|
|
#ifdef DEBUG_RE
|
|
|
|
|
static void print_node(FILE*, NODE *n, int detail);
|
|
|
|
|
#endif
|
2005-04-18 19:40:36 +02:00
|
|
|
|
|
2005-04-19 18:26:50 +02:00
|
|
|
|
/**
|
2005-11-04 21:00:05 +01:00
|
|
|
|
* computes position, annulable, PP, DP attributes
|
2005-04-19 18:26:50 +02:00
|
|
|
|
* @param r = root
|
|
|
|
|
* @param p = current leaf position
|
|
|
|
|
* @param n = current node number
|
|
|
|
|
* @param ptl = position to letter
|
|
|
|
|
*/
|
2004-06-21 18:06:54 +02:00
|
|
|
|
|
2005-04-09 21:16:09 +02:00
|
|
|
|
void regexp_parcours(NODE* r, int *p, int *n, int ptl[])
|
2004-06-21 18:06:54 +02:00
|
|
|
|
{
|
|
|
|
|
if (r == NULL)
|
|
|
|
|
return;
|
|
|
|
|
|
2005-04-09 21:16:09 +02:00
|
|
|
|
regexp_parcours(r->fg,p,n,ptl);
|
|
|
|
|
regexp_parcours(r->fd,p,n,ptl);
|
2004-06-21 18:06:54 +02:00
|
|
|
|
|
|
|
|
|
switch (r->type)
|
|
|
|
|
{
|
|
|
|
|
case NODE_VAR:
|
|
|
|
|
r->position = *p;
|
|
|
|
|
ptl[*p] = r->var;
|
|
|
|
|
*p = *p + 1;
|
|
|
|
|
r->annulable = 0;
|
|
|
|
|
r->PP = 1 << (r->position - 1);
|
|
|
|
|
r->DP = 1 << (r->position - 1);
|
|
|
|
|
break;
|
|
|
|
|
case NODE_OR:
|
|
|
|
|
r->position = 0;
|
|
|
|
|
r->annulable = r->fg->annulable || r->fd->annulable;
|
|
|
|
|
r->PP = r->fg->PP | r->fd->PP;
|
|
|
|
|
r->DP = r->fg->DP | r->fd->DP;
|
|
|
|
|
break;
|
|
|
|
|
case NODE_AND:
|
|
|
|
|
r->position = 0;
|
|
|
|
|
r->annulable = r->fg->annulable && r->fd->annulable;
|
|
|
|
|
r->PP = (r->fg->annulable) ? (r->fg->PP | r->fd->PP) : r->fg->PP;
|
|
|
|
|
r->DP = (r->fd->annulable) ? (r->fg->DP | r->fd->DP) : r->fd->DP;
|
|
|
|
|
break;
|
2005-04-25 11:17:53 +02:00
|
|
|
|
case NODE_PLUS:
|
|
|
|
|
r->position = 0;
|
|
|
|
|
r->annulable = 0;
|
|
|
|
|
r->PP = r->fg->PP;
|
|
|
|
|
r->DP = r->fg->DP;
|
|
|
|
|
break;
|
2004-06-21 18:06:54 +02:00
|
|
|
|
case NODE_STAR:
|
|
|
|
|
r->position = 0;
|
|
|
|
|
r->annulable = 1;
|
|
|
|
|
r->PP = r->fg->PP;
|
|
|
|
|
r->DP = r->fg->DP;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
r->numero = *n;
|
|
|
|
|
*n = *n + 1;
|
|
|
|
|
}
|
|
|
|
|
|
2005-04-19 18:26:50 +02:00
|
|
|
|
/**
|
2005-11-04 21:00:05 +01:00
|
|
|
|
* computes possuivante
|
2005-04-19 18:26:50 +02:00
|
|
|
|
* @param r = root
|
|
|
|
|
* @param PS = next position
|
|
|
|
|
*/
|
2004-06-21 18:06:54 +02:00
|
|
|
|
|
2005-04-09 21:16:09 +02:00
|
|
|
|
void regexp_possuivante(NODE* r, int PS[])
|
2004-06-21 18:06:54 +02:00
|
|
|
|
{
|
|
|
|
|
int pos;
|
|
|
|
|
if (r == NULL)
|
|
|
|
|
return;
|
|
|
|
|
|
2005-04-09 21:16:09 +02:00
|
|
|
|
regexp_possuivante(r->fg,PS);
|
|
|
|
|
regexp_possuivante(r->fd,PS);
|
2004-06-21 18:06:54 +02:00
|
|
|
|
|
|
|
|
|
switch (r->type)
|
|
|
|
|
{
|
|
|
|
|
case NODE_AND:
|
2005-04-25 11:17:53 +02:00
|
|
|
|
/************************************/
|
|
|
|
|
/* \forall p \in DP(left) */
|
2005-11-04 21:00:05 +01:00
|
|
|
|
/* PS[p] = PS[p] \cup PP(right) */
|
2005-04-25 11:17:53 +02:00
|
|
|
|
/************************************/
|
2004-06-21 18:06:54 +02:00
|
|
|
|
for(pos=1; pos <= PS[0]; pos++)
|
|
|
|
|
{
|
|
|
|
|
if (r->fg->DP & (1 << (pos-1)))
|
|
|
|
|
PS[pos] |= r->fd->PP;
|
|
|
|
|
}
|
|
|
|
|
break;
|
2005-04-25 11:17:53 +02:00
|
|
|
|
case NODE_PLUS:
|
|
|
|
|
/************************************/
|
|
|
|
|
/* == same as START */
|
|
|
|
|
/* \forall p \in DP(left) */
|
2005-11-04 21:00:05 +01:00
|
|
|
|
/* PS[p] = PS[p] \cup PP(left) */
|
2005-04-25 11:17:53 +02:00
|
|
|
|
/************************************/
|
|
|
|
|
for(pos=1; pos <= PS[0]; pos++)
|
|
|
|
|
{
|
|
|
|
|
if (r->DP & (1 << (pos-1)))
|
|
|
|
|
PS[pos] |= r->PP;
|
|
|
|
|
}
|
|
|
|
|
break;
|
2004-06-21 18:06:54 +02:00
|
|
|
|
case NODE_STAR:
|
2005-04-25 11:17:53 +02:00
|
|
|
|
/************************************/
|
|
|
|
|
/* \forall p \in DP(left) */
|
2005-11-04 21:00:05 +01:00
|
|
|
|
/* PS[p] = PS[p] \cup PP(left) */
|
2005-04-25 11:17:53 +02:00
|
|
|
|
/************************************/
|
2004-06-21 18:06:54 +02:00
|
|
|
|
for(pos=1; pos <= PS[0]; pos++)
|
|
|
|
|
{
|
|
|
|
|
if (r->DP & (1 << (pos-1)))
|
|
|
|
|
PS[pos] |= r->PP;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2005-04-18 19:40:36 +02:00
|
|
|
|
/*////////////////////////////////////////////////
|
2005-04-19 18:26:50 +02:00
|
|
|
|
// DEBUG only fonctions
|
2005-04-18 19:40:36 +02:00
|
|
|
|
////////////////////////////////////////////////*/
|
2004-06-21 18:06:54 +02:00
|
|
|
|
|
2005-04-19 18:26:50 +02:00
|
|
|
|
#ifdef DEBUG_RE
|
2005-04-09 21:16:09 +02:00
|
|
|
|
void regexp_print_PS(int PS[])
|
2004-06-21 18:06:54 +02:00
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
printf("** positions suivantes **\n");
|
|
|
|
|
for(i=1; i <= PS[0]; i++)
|
|
|
|
|
{
|
|
|
|
|
printf("%02d: 0x%08x\n", i, PS[i]);
|
|
|
|
|
}
|
|
|
|
|
}
|
2005-04-19 18:26:50 +02:00
|
|
|
|
#endif
|
2004-06-21 18:06:54 +02:00
|
|
|
|
|
2005-04-18 19:40:36 +02:00
|
|
|
|
/*////////////////////////////////////////////////
|
|
|
|
|
////////////////////////////////////////////////*/
|
2004-06-21 18:06:54 +02:00
|
|
|
|
|
2005-04-19 18:26:50 +02:00
|
|
|
|
#ifdef DEBUG_RE
|
2005-04-09 21:16:09 +02:00
|
|
|
|
void regexp_print_ptl(int ptl[])
|
2004-06-21 18:06:54 +02:00
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
printf("** pos -> lettre: ");
|
|
|
|
|
for(i=1; i <= ptl[0]; i++)
|
|
|
|
|
{
|
|
|
|
|
printf("%d=%c ",i,ptl[i]);
|
|
|
|
|
}
|
|
|
|
|
printf("\n");
|
|
|
|
|
}
|
2005-04-19 18:26:50 +02:00
|
|
|
|
#endif
|
2004-06-21 18:06:54 +02:00
|
|
|
|
|
2005-04-18 19:40:36 +02:00
|
|
|
|
/*////////////////////////////////////////////////
|
|
|
|
|
////////////////////////////////////////////////*/
|
2004-06-21 18:06:54 +02:00
|
|
|
|
|
2005-04-19 18:26:50 +02:00
|
|
|
|
void regexp_print_letter(FILE* f, char l)
|
2004-06-21 18:06:54 +02:00
|
|
|
|
{
|
2005-04-19 18:26:50 +02:00
|
|
|
|
switch (l)
|
|
|
|
|
{
|
2005-05-06 01:45:04 +02:00
|
|
|
|
case RE_EPSILON: fprintf(f,"( & [%d])",l); break;
|
|
|
|
|
case RE_FINAL_TOK: fprintf(f,"( # [%d])",l); break;
|
|
|
|
|
case RE_ALL_MATCH: fprintf(f,"( . [%d])",l); break;
|
|
|
|
|
case RE_VOWL_MATCH: fprintf(f,"(:v: [%d])",l); break;
|
|
|
|
|
case RE_CONS_MATCH: fprintf(f,"(:c: [%d])",l); break;
|
|
|
|
|
case RE_USR1_MATCH: fprintf(f,"(:1: [%d])",l); break;
|
|
|
|
|
case RE_USR2_MATCH: fprintf(f,"(:2: [%d])",l); break;
|
2005-11-04 21:00:05 +01:00
|
|
|
|
default:
|
2005-04-27 19:35:03 +02:00
|
|
|
|
if (l < RE_FINAL_TOK)
|
2005-11-04 21:00:05 +01:00
|
|
|
|
fprintf(f," (%c [%d]) ",l + 'a' - 1, l);
|
2005-04-27 19:35:03 +02:00
|
|
|
|
else
|
|
|
|
|
fprintf(f," (liste %d)",l - RE_LIST_USER_END);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*////////////////////////////////////////////////
|
|
|
|
|
////////////////////////////////////////////////*/
|
|
|
|
|
|
|
|
|
|
void regexp_print_letter2(FILE* f, char l)
|
|
|
|
|
{
|
|
|
|
|
switch (l)
|
|
|
|
|
{
|
2005-05-06 01:45:04 +02:00
|
|
|
|
case RE_EPSILON: fprintf(f,"&"); break;
|
2005-04-27 19:35:03 +02:00
|
|
|
|
case RE_FINAL_TOK: fprintf(f,"#"); break;
|
|
|
|
|
case RE_ALL_MATCH: fprintf(f,"."); break;
|
|
|
|
|
case RE_VOWL_MATCH: fprintf(f,":v:"); break;
|
|
|
|
|
case RE_CONS_MATCH: fprintf(f,":c:"); break;
|
|
|
|
|
case RE_USR1_MATCH: fprintf(f,":1:"); break;
|
|
|
|
|
case RE_USR2_MATCH: fprintf(f,":2:"); break;
|
2005-11-04 21:00:05 +01:00
|
|
|
|
default:
|
2005-04-27 19:35:03 +02:00
|
|
|
|
if (l < RE_FINAL_TOK)
|
2005-11-04 21:00:05 +01:00
|
|
|
|
fprintf(f,"%c",l + 'a' - 1);
|
2005-04-27 19:35:03 +02:00
|
|
|
|
else
|
|
|
|
|
fprintf(f,"l%d",l - RE_LIST_USER_END);
|
|
|
|
|
break;
|
2005-04-19 18:26:50 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*////////////////////////////////////////////////
|
|
|
|
|
////////////////////////////////////////////////*/
|
|
|
|
|
|
|
|
|
|
#ifdef DEBUG_RE
|
|
|
|
|
static void print_node(FILE* f, NODE *n, int detail)
|
|
|
|
|
{
|
|
|
|
|
if (n == NULL)
|
|
|
|
|
return;
|
2004-06-21 18:06:54 +02:00
|
|
|
|
|
|
|
|
|
switch (n->type)
|
|
|
|
|
{
|
|
|
|
|
case NODE_VAR:
|
2005-04-19 18:26:50 +02:00
|
|
|
|
regexp_print_letter(f,n->var);
|
2004-06-21 18:06:54 +02:00
|
|
|
|
break;
|
|
|
|
|
case NODE_OR:
|
|
|
|
|
fprintf(f,"OR");
|
|
|
|
|
break;
|
|
|
|
|
case NODE_AND:
|
|
|
|
|
fprintf(f,"AND");
|
|
|
|
|
break;
|
2005-04-25 11:17:53 +02:00
|
|
|
|
case NODE_PLUS:
|
2005-05-06 01:45:04 +02:00
|
|
|
|
fprintf(f,"+");
|
2005-04-25 11:17:53 +02:00
|
|
|
|
break;
|
2004-06-21 18:06:54 +02:00
|
|
|
|
case NODE_STAR:
|
|
|
|
|
fprintf(f,"*");
|
|
|
|
|
break;
|
|
|
|
|
}
|
2005-04-19 18:26:50 +02:00
|
|
|
|
if (detail == 2)
|
2005-04-16 22:55:51 +02:00
|
|
|
|
{
|
2005-05-06 01:45:04 +02:00
|
|
|
|
fprintf(f,"\\n pos=%d\\n annul=%d\\n PP=0x%04x\\n DP=0x%04x",
|
|
|
|
|
n->position,n->annulable,n->PP,n->DP);
|
2005-04-16 22:55:51 +02:00
|
|
|
|
}
|
2005-04-19 18:26:50 +02:00
|
|
|
|
}
|
2005-11-04 21:00:05 +01:00
|
|
|
|
#endif
|
2005-04-19 18:26:50 +02:00
|
|
|
|
|
|
|
|
|
/*////////////////////////////////////////////////
|
|
|
|
|
////////////////////////////////////////////////*/
|
|
|
|
|
|
|
|
|
|
#ifdef DEBUG_RE
|
|
|
|
|
static void print_tree_nodes(FILE* f, NODE* n, int detail)
|
|
|
|
|
{
|
2005-11-04 21:00:05 +01:00
|
|
|
|
if (n == NULL)
|
|
|
|
|
return;
|
2005-04-19 18:26:50 +02:00
|
|
|
|
|
|
|
|
|
print_tree_nodes(f,n->fg,detail);
|
|
|
|
|
print_tree_nodes(f,n->fd,detail);
|
|
|
|
|
|
|
|
|
|
fprintf(f,"%d [ label=\"",n->numero);
|
|
|
|
|
print_node(f,n,detail);
|
2005-04-16 22:55:51 +02:00
|
|
|
|
fprintf(f,"\"];\n");
|
2004-06-21 18:06:54 +02:00
|
|
|
|
}
|
2005-04-19 18:26:50 +02:00
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/*////////////////////////////////////////////////
|
|
|
|
|
////////////////////////////////////////////////*/
|
2004-06-21 18:06:54 +02:00
|
|
|
|
|
2005-04-19 18:26:50 +02:00
|
|
|
|
#ifdef DEBUG_RE
|
2005-04-09 21:16:09 +02:00
|
|
|
|
static void print_tree_edges(FILE* f, NODE* n)
|
2004-06-21 18:06:54 +02:00
|
|
|
|
{
|
|
|
|
|
if (n == NULL)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
print_tree_edges(f,n->fg);
|
|
|
|
|
print_tree_edges(f,n->fd);
|
2005-11-04 21:00:05 +01:00
|
|
|
|
|
2004-06-21 18:06:54 +02:00
|
|
|
|
switch (n->type)
|
|
|
|
|
{
|
|
|
|
|
case NODE_OR:
|
|
|
|
|
fprintf(f,"%d -> %d;",n->numero,n->fg->numero);
|
|
|
|
|
fprintf(f,"%d -> %d;",n->numero,n->fd->numero);
|
|
|
|
|
break;
|
|
|
|
|
case NODE_AND:
|
|
|
|
|
fprintf(f,"%d -> %d;",n->numero,n->fg->numero);
|
|
|
|
|
fprintf(f,"%d -> %d;",n->numero,n->fd->numero);
|
|
|
|
|
break;
|
2005-04-25 11:17:53 +02:00
|
|
|
|
case NODE_PLUS:
|
2004-06-21 18:06:54 +02:00
|
|
|
|
case NODE_STAR:
|
|
|
|
|
fprintf(f,"%d -> %d;",n->numero,n->fg->numero);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
2005-04-19 18:26:50 +02:00
|
|
|
|
#endif
|
2004-06-21 18:06:54 +02:00
|
|
|
|
|
2005-04-19 18:26:50 +02:00
|
|
|
|
/*////////////////////////////////////////////////
|
|
|
|
|
////////////////////////////////////////////////*/
|
|
|
|
|
|
|
|
|
|
#ifdef DEBUG_RE
|
2005-04-16 22:55:51 +02:00
|
|
|
|
void regexp_print_tree(NODE* n, char* name, int detail)
|
2004-06-21 18:06:54 +02:00
|
|
|
|
{
|
|
|
|
|
FILE* f;
|
2005-11-04 21:00:05 +01:00
|
|
|
|
pid_t pid;
|
|
|
|
|
|
2004-06-21 18:06:54 +02:00
|
|
|
|
f=fopen(name,"w");
|
|
|
|
|
fprintf(f,"digraph %s {\n",name);
|
2005-04-16 22:55:51 +02:00
|
|
|
|
print_tree_nodes(f,n,detail);
|
2004-06-21 18:06:54 +02:00
|
|
|
|
print_tree_edges(f,n);
|
|
|
|
|
fprintf(f,"fontsize=20;\n");
|
|
|
|
|
fprintf(f,"}\n");
|
|
|
|
|
fclose(f);
|
|
|
|
|
|
2005-04-09 21:16:09 +02:00
|
|
|
|
#ifdef HAVE_SYS_WAIT_H
|
2004-06-21 18:06:54 +02:00
|
|
|
|
pid = fork ();
|
|
|
|
|
if (pid > 0) {
|
|
|
|
|
wait(NULL);
|
|
|
|
|
} else if (pid == 0) {
|
|
|
|
|
execlp("dotty","dotty",name,NULL);
|
|
|
|
|
printf("exec dotty failed\n");
|
|
|
|
|
exit(1);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
}
|
2005-04-19 18:26:50 +02:00
|
|
|
|
#endif
|
2004-06-21 18:06:54 +02:00
|
|
|
|
|
2005-11-05 18:56:22 +01:00
|
|
|
|
|
|
|
|
|
/// Local Variables:
|
|
|
|
|
/// mode: hs-minor
|
|
|
|
|
/// c-basic-offset: 2
|
|
|
|
|
/// End:
|