2008-01-08 14:52:32 +01:00
|
|
|
/*****************************************************************************
|
|
|
|
* Eliot
|
|
|
|
* Copyright (C) 1999-2006 Antoine Fraboulet
|
|
|
|
* Authors: Antoine Fraboulet <antoine.fraboulet @@ free.fr>
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
|
|
* (at your option) any later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
#include "config.h"
|
|
|
|
|
2012-05-16 20:43:26 +02:00
|
|
|
#include <boost/format.hpp>
|
|
|
|
|
2008-01-08 14:52:32 +01:00
|
|
|
#include <cstdio>
|
|
|
|
#include <cstdlib>
|
|
|
|
#include <cstring>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
#include "dic.h"
|
|
|
|
#include "regexp.h"
|
|
|
|
|
2012-05-16 20:43:26 +02:00
|
|
|
using boost::format;
|
|
|
|
|
2008-01-08 14:52:32 +01:00
|
|
|
|
2008-07-07 19:29:59 +02:00
|
|
|
Node::Node(int type, char v, Node *fg, Node *fd)
|
|
|
|
: m_type(type), m_var(v), m_fg(fg), m_fd(fd), m_number(0), m_position(0),
|
|
|
|
m_annulable(false), m_PP(0), m_DP(0)
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2008-07-07 19:29:59 +02:00
|
|
|
Node::~Node()
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
2008-07-07 19:29:59 +02:00
|
|
|
delete m_fg;
|
|
|
|
delete m_fd;
|
2008-01-08 14:52:32 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
2008-07-07 19:29:59 +02:00
|
|
|
* p is the current leaf position
|
|
|
|
* n is the current node number
|
2008-01-08 14:52:32 +01:00
|
|
|
*/
|
2008-07-07 19:29:59 +02:00
|
|
|
void Node::traverse(int &p, int &n, int ptl[])
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
2008-07-07 19:29:59 +02:00
|
|
|
if (m_fg)
|
|
|
|
m_fg->traverse(p, n, ptl);
|
|
|
|
if (m_fd)
|
|
|
|
m_fd->traverse(p, n, ptl);
|
2008-01-08 14:52:32 +01:00
|
|
|
|
2008-07-07 19:29:59 +02:00
|
|
|
m_number = n;
|
|
|
|
++n;
|
2008-01-08 14:52:32 +01:00
|
|
|
|
2008-07-07 19:29:59 +02:00
|
|
|
switch (m_type)
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
|
|
|
case NODE_VAR:
|
2008-07-07 19:29:59 +02:00
|
|
|
m_position = p;
|
|
|
|
ptl[p] = m_var;
|
|
|
|
++p;
|
|
|
|
m_annulable = false;
|
|
|
|
m_PP = 1 << (m_position - 1);
|
|
|
|
m_DP = 1 << (m_position - 1);
|
2008-01-08 14:52:32 +01:00
|
|
|
break;
|
|
|
|
case NODE_OR:
|
2008-07-07 19:29:59 +02:00
|
|
|
m_position = 0;
|
|
|
|
m_annulable = m_fg->m_annulable || m_fd->m_annulable;
|
|
|
|
m_PP = m_fg->m_PP | m_fd->m_PP;
|
|
|
|
m_DP = m_fg->m_DP | m_fd->m_DP;
|
2008-01-08 14:52:32 +01:00
|
|
|
break;
|
|
|
|
case NODE_AND:
|
2008-07-07 19:29:59 +02:00
|
|
|
m_position = 0;
|
|
|
|
m_annulable = m_fg->m_annulable && m_fd->m_annulable;
|
|
|
|
m_PP = (m_fg->m_annulable) ? (m_fg->m_PP | m_fd->m_PP) : m_fg->m_PP;
|
|
|
|
m_DP = (m_fd->m_annulable) ? (m_fg->m_DP | m_fd->m_DP) : m_fd->m_DP;
|
2008-01-08 14:52:32 +01:00
|
|
|
break;
|
|
|
|
case NODE_PLUS:
|
2008-07-07 19:29:59 +02:00
|
|
|
m_position = 0;
|
|
|
|
m_annulable = false;
|
|
|
|
m_PP = m_fg->m_PP;
|
|
|
|
m_DP = m_fg->m_DP;
|
2008-01-08 14:52:32 +01:00
|
|
|
break;
|
|
|
|
case NODE_STAR:
|
2008-07-07 19:29:59 +02:00
|
|
|
m_position = 0;
|
|
|
|
m_annulable = true;
|
|
|
|
m_PP = m_fg->m_PP;
|
|
|
|
m_DP = m_fg->m_DP;
|
2008-01-08 14:52:32 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2008-07-13 09:55:47 +02:00
|
|
|
void Node::nextPos(uint64_t PS[])
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
2008-07-07 19:29:59 +02:00
|
|
|
if (m_fg)
|
|
|
|
m_fg->nextPos(PS);
|
|
|
|
if (m_fd)
|
|
|
|
m_fd->nextPos(PS);
|
2008-01-08 14:52:32 +01:00
|
|
|
|
2008-07-07 19:29:59 +02:00
|
|
|
switch (m_type)
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
|
|
|
case NODE_AND:
|
|
|
|
/************************************/
|
|
|
|
/* \forall p \in DP(left) */
|
|
|
|
/* PS[p] = PS[p] \cup PP(right) */
|
|
|
|
/************************************/
|
2008-07-13 09:55:47 +02:00
|
|
|
for (uint32_t pos = 1; pos <= PS[0]; pos++)
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
2008-07-07 19:29:59 +02:00
|
|
|
if (m_fg->m_DP & (1 << (pos-1)))
|
|
|
|
PS[pos] |= m_fd->m_PP;
|
2008-01-08 14:52:32 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case NODE_PLUS:
|
|
|
|
/************************************/
|
|
|
|
/* == same as START */
|
|
|
|
/* \forall p \in DP(left) */
|
|
|
|
/* PS[p] = PS[p] \cup PP(left) */
|
|
|
|
/************************************/
|
2008-07-13 09:55:47 +02:00
|
|
|
for (uint32_t pos = 1; pos <= PS[0]; pos++)
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
2008-07-07 19:29:59 +02:00
|
|
|
if (m_DP & (1 << (pos-1)))
|
|
|
|
PS[pos] |= m_PP;
|
2008-01-08 14:52:32 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case NODE_STAR:
|
|
|
|
/************************************/
|
|
|
|
/* \forall p \in DP(left) */
|
|
|
|
/* PS[p] = PS[p] \cup PP(left) */
|
|
|
|
/************************************/
|
2008-07-13 09:55:47 +02:00
|
|
|
for (uint32_t pos = 1; pos <= PS[0]; pos++)
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
2008-07-07 19:29:59 +02:00
|
|
|
if (m_DP & (1 << (pos-1)))
|
|
|
|
PS[pos] |= m_PP;
|
2008-01-08 14:52:32 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-07-07 19:29:59 +02:00
|
|
|
////////////////////////////////////////////////
|
2008-01-08 14:52:32 +01:00
|
|
|
// DEBUG only fonctions
|
2008-07-07 19:29:59 +02:00
|
|
|
////////////////////////////////////////////////
|
2008-01-08 14:52:32 +01:00
|
|
|
|
|
|
|
#ifdef DEBUG_RE
|
2008-07-07 19:29:59 +02:00
|
|
|
void printPS(int PS[])
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
2008-07-07 19:29:59 +02:00
|
|
|
printf("** next positions **\n");
|
2008-01-08 14:52:32 +01:00
|
|
|
for (int i = 1; i <= PS[0]; i++)
|
|
|
|
{
|
|
|
|
printf("%02d: 0x%08x\n", i, PS[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void regexp_print_ptl(int ptl[])
|
|
|
|
{
|
|
|
|
printf("** pos -> lettre: ");
|
|
|
|
for (int i = 1; i <= ptl[0]; i++)
|
|
|
|
{
|
|
|
|
printf("%d=%c ", i, ptl[i]);
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
2012-05-16 20:43:26 +02:00
|
|
|
string regexpPrintLetter(char l)
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
2012-05-16 20:43:26 +02:00
|
|
|
if (l == RE_EPSILON) return (format("( & [%1%])") % l).str();
|
|
|
|
if (l == RE_FINAL_TOK) return (format("( # [%1%])") % l).str();
|
|
|
|
if (l == RE_ALL_MATCH) return (format("( . [%1%])") % l).str();
|
|
|
|
if (l == RE_VOWL_MATCH) return (format("(:v: [%1%])") % l).str();
|
|
|
|
if (l == RE_CONS_MATCH) return (format("(:c: [%1%])") % l).str();
|
|
|
|
if (l == RE_USR1_MATCH) return (format("(:1: [%1%])") % l).str();
|
|
|
|
if (l == RE_USR2_MATCH) return (format("(:2: [%1%])") % l).str();
|
|
|
|
if (l < RE_FINAL_TOK)
|
|
|
|
return (format("(%1% [%2%])") % (char)(l + 'a' - 1) % (int)l).str();
|
|
|
|
else
|
|
|
|
return (format("(liste %1%)") % (l - RE_LIST_USER_END)).str();
|
2008-01-08 14:52:32 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef DEBUG_RE
|
2008-07-07 19:29:59 +02:00
|
|
|
void Node::printNode(FILE* f, int detail) const
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
2008-07-07 19:29:59 +02:00
|
|
|
switch (m_type)
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
|
|
|
case NODE_VAR:
|
2012-05-16 20:43:26 +02:00
|
|
|
fprintf(f, "%s", regexpPrintLetter(m_var).c_str());
|
2008-01-08 14:52:32 +01:00
|
|
|
break;
|
|
|
|
case NODE_OR:
|
|
|
|
fprintf(f, "OR");
|
|
|
|
break;
|
|
|
|
case NODE_AND:
|
|
|
|
fprintf(f, "AND");
|
|
|
|
break;
|
|
|
|
case NODE_PLUS:
|
|
|
|
fprintf(f, "+");
|
|
|
|
break;
|
|
|
|
case NODE_STAR:
|
|
|
|
fprintf(f, "*");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (detail == 2)
|
|
|
|
{
|
|
|
|
fprintf(f, "\\n pos=%d\\n annul=%d\\n PP=0x%04x\\n DP=0x%04x",
|
2008-07-07 19:29:59 +02:00
|
|
|
m_position, m_annulable, m_PP, m_DP);
|
2008-01-08 14:52:32 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-07-07 19:29:59 +02:00
|
|
|
void Node::printNodesRec(FILE* f, int detail) const
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
2008-07-07 19:29:59 +02:00
|
|
|
if (m_fg)
|
|
|
|
m_fg->printNodesRec(f, detail);
|
|
|
|
if (m_fd)
|
|
|
|
m_fd->printNodesRec(f, detail);
|
2008-01-08 14:52:32 +01:00
|
|
|
|
2008-07-07 19:29:59 +02:00
|
|
|
fprintf(f, "%d [ label=\"", m_number);
|
|
|
|
printNode(f, detail);
|
2008-01-08 14:52:32 +01:00
|
|
|
fprintf(f, "\"];\n");
|
|
|
|
}
|
|
|
|
|
2008-07-07 19:29:59 +02:00
|
|
|
void Node::printEdgesRec(FILE *f) const
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
2008-07-07 19:29:59 +02:00
|
|
|
if (m_fg)
|
|
|
|
m_fg->printEdgesRec(f);
|
|
|
|
if (m_fd)
|
|
|
|
m_fd->printEdgesRec(f);
|
2008-01-08 14:52:32 +01:00
|
|
|
|
2008-07-07 19:29:59 +02:00
|
|
|
switch (m_type)
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
|
|
|
case NODE_OR:
|
2008-07-07 19:29:59 +02:00
|
|
|
fprintf(f, "%d -> %d;", m_number, m_fg->m_number);
|
|
|
|
fprintf(f, "%d -> %d;", m_number, m_fd->m_number);
|
2008-01-08 14:52:32 +01:00
|
|
|
break;
|
|
|
|
case NODE_AND:
|
2008-07-07 19:29:59 +02:00
|
|
|
fprintf(f, "%d -> %d;", m_number, m_fg->m_number);
|
|
|
|
fprintf(f, "%d -> %d;", m_number, m_fd->m_number);
|
2008-01-08 14:52:32 +01:00
|
|
|
break;
|
|
|
|
case NODE_PLUS:
|
|
|
|
case NODE_STAR:
|
2008-07-07 19:29:59 +02:00
|
|
|
fprintf(f, "%d -> %d;", m_number, m_fg->m_number);
|
2008-01-08 14:52:32 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-07-07 19:29:59 +02:00
|
|
|
void Node::printTreeDot(const string &iFileName, int detail) const
|
2008-01-08 14:52:32 +01:00
|
|
|
{
|
2008-07-07 19:29:59 +02:00
|
|
|
FILE *f = fopen(iFileName.c_str(), "w");
|
2008-01-08 14:52:32 +01:00
|
|
|
if (f == NULL)
|
|
|
|
return;
|
2008-07-07 19:29:59 +02:00
|
|
|
fprintf(f, "digraph %s {\n", iFileName.c_str());
|
|
|
|
printNodesRec(f, detail);
|
|
|
|
printEdgesRec(f);
|
2008-01-08 14:52:32 +01:00
|
|
|
fprintf(f, "fontsize=20;\n");
|
|
|
|
fprintf(f, "}\n");
|
|
|
|
fclose(f);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|