mirror of
https://github.com/louisrubet/rpn
synced 2024-11-17 07:47:50 +01:00
Merge pull request #230 from louisrubet/#225/lexer-perf-issue
#225 lexer perf issue
This commit is contained in:
commit
800dc3fc68
8 changed files with 101 additions and 39 deletions
|
@ -28,7 +28,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
|
|||
set(BASE_COMPILER_OPTIONS "-std=c++14 -Wl,--no-as-needed")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${BASE_COMPILER_OPTIONS}")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${BASE_COMPILER_OPTIONS} -O0 -g")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${BASE_COMPILER_OPTIONS} -O3 -fomit-frame-pointer -s")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${BASE_COMPILER_OPTIONS} -O3 -s")
|
||||
endif()
|
||||
|
||||
# custom linenoise-ng
|
||||
|
@ -75,6 +75,7 @@ add_executable(
|
|||
${PROJECT_SOURCE_DIR}/linenoise-ng/src/wcwidth.cpp
|
||||
)
|
||||
target_link_libraries(rpn mpfr)
|
||||
target_link_libraries(rpn gmp)
|
||||
|
||||
# man
|
||||
add_custom_target(man ALL)
|
||||
|
|
|
@ -1,6 +1,3 @@
|
|||
#include <regex>
|
||||
using namespace std;
|
||||
|
||||
#include "lexer.hpp"
|
||||
|
||||
bool Lexer::lexer(string& entry, map<string, ReservedWord>& keywords, vector<SynElement>& elements,
|
||||
|
@ -38,6 +35,8 @@ bool Lexer::lexer(string& entry, map<string, ReservedWord>& keywords, vector<Syn
|
|||
if (parseUnknown(entry, i, jump, elements))
|
||||
// last chance, this unknown entry is treated as a symbol
|
||||
i = jump - 1;
|
||||
else
|
||||
return false; // no object of any type could be found, this is a lexer error
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -106,40 +105,69 @@ bool Lexer::parseProgram(string& entry, size_t idx, size_t& nextIdx, vector<SynE
|
|||
return true;
|
||||
}
|
||||
|
||||
int Lexer::getBaseAt(string& entry, int idxStart, bool& positive) {
|
||||
regex baseRegex("([+-])?((0[xX])|([0-9][0-9]?[bB]))");
|
||||
smatch match;
|
||||
if (regex_search(entry, match, baseRegex) && match.size() >= 5) {
|
||||
string sign = match[1].str();
|
||||
string base = match[2].str();
|
||||
// sign out, permits expressions like -0xAB3F
|
||||
positive = sign.size() > 0 && sign[0] == '-' ? false : true;
|
||||
// base
|
||||
entry = entry.substr(base.size() + sign.size());
|
||||
if (base[1] == 'X' || base[1] == 'x') return 16;
|
||||
if (base.size() > 0) {
|
||||
int b = stoi(base.substr(0, base.size() - 1));
|
||||
if (b == 0) b = 2; // admit "0b" as binary suffix
|
||||
return b;
|
||||
int Lexer::getBaseAt(string& entry, size_t& nextIdx, bool& positive) {
|
||||
// a regex could be "([+-])?((0[xX])|([0-9][0-9]?[bB]))"
|
||||
// regex is not use because dramatically slow
|
||||
// entry is scanned from idxStart, searching for [s]abc (sign and 3 first chars)
|
||||
size_t scan = 0;
|
||||
nextIdx = 0;
|
||||
positive = true;
|
||||
if (scan >= entry.size()) return 10;
|
||||
if (entry[scan] == '+') {
|
||||
scan++;
|
||||
nextIdx = scan;
|
||||
}
|
||||
else if (entry[scan] == '-') {
|
||||
scan++;
|
||||
nextIdx = scan;
|
||||
positive = false;
|
||||
}
|
||||
if (scan + 2 >= entry.size()) return 10;
|
||||
char a = entry[scan];
|
||||
char b = entry[scan + 1];
|
||||
char c = 0;
|
||||
if (scan + 2 < entry.size()) c = entry[scan + 2];
|
||||
if (a == '0') {
|
||||
if (b == 'x' || b == 'X') {
|
||||
nextIdx = scan + 2;
|
||||
return 16;
|
||||
}
|
||||
if (b == 'b' || b == 'B') {
|
||||
nextIdx = scan + 2;
|
||||
return 2;
|
||||
}
|
||||
} else if (isdigit(a)) {
|
||||
if (b == 'b' || b == 'B') {
|
||||
nextIdx = scan + 2;
|
||||
return int(a - '0');
|
||||
}
|
||||
if (isdigit(b) && (c == 'b' || c == 'B')) {
|
||||
nextIdx = scan + 3;
|
||||
return 10 * int(a - '0') + int(b - '0');
|
||||
}
|
||||
}
|
||||
return 10;
|
||||
}
|
||||
|
||||
bool Lexer::getNumberAt(string& entry, size_t idx, size_t& nextIdx, int& base, mpreal& r, char delim) {
|
||||
bool Lexer::getNumberAt(string& entry, size_t idx, size_t& nextIdx, int& base, mpreal** r, char delim) {
|
||||
stringstream ss;
|
||||
int idxNumber = 0;
|
||||
string token;
|
||||
bool positive = true;
|
||||
|
||||
nextIdx = idx;
|
||||
|
||||
ss.str(entry.substr(idx));
|
||||
if (getline(ss, token, delim)) {
|
||||
size_t numberIdx;
|
||||
nextIdx = token.size() + idx + 1;
|
||||
base = getBaseAt(token, idx, positive);
|
||||
if (base < BASE_MIN || base > BASE_MAX) return false;
|
||||
trim(token);
|
||||
if (mpfr_set_str(r.mpfr_ptr(), token.c_str(), base, mpreal::get_default_rnd()) == 0) {
|
||||
if (!positive) r = -r;
|
||||
base = getBaseAt(token, numberIdx, positive);
|
||||
if (base < BASE_MIN || base > BASE_MAX) return false;
|
||||
if (numberIdx != 0) token = token.substr(numberIdx);
|
||||
*r = new mpreal;
|
||||
if (mpfr_set_str((*r)->mpfr_ptr(), token.c_str(), base, mpreal::get_default_rnd()) == 0) {
|
||||
if (!positive) *(*r) = -*(*r);
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
|
@ -150,9 +178,9 @@ bool Lexer::getNumberAt(string& entry, size_t idx, size_t& nextIdx, int& base, m
|
|||
|
||||
bool Lexer::parseNumber(string& entry, size_t idx, size_t& nextIdx, vector<SynError>& errors,
|
||||
vector<SynElement>& elements) {
|
||||
mpreal r;
|
||||
mpreal* r = nullptr;
|
||||
int base = 10;
|
||||
if (getNumberAt(entry, idx, nextIdx, base, r)) {
|
||||
if (getNumberAt(entry, idx, nextIdx, base, &r)) {
|
||||
elements.push_back({cmd_number, .re = r, .reBase = base});
|
||||
return true;
|
||||
} else {
|
||||
|
@ -163,14 +191,15 @@ bool Lexer::parseNumber(string& entry, size_t idx, size_t& nextIdx, vector<SynEr
|
|||
|
||||
bool Lexer::parseComplex(string& entry, size_t idx, size_t& nextIdx, vector<SynError>& errors,
|
||||
vector<SynElement>& elements) {
|
||||
mpreal re, im;
|
||||
mpreal* re = nullptr;
|
||||
mpreal* im = nullptr;
|
||||
int reBase, imBase = 10;
|
||||
if (idx + 1 == entry.size()) {
|
||||
elements.push_back({cmd_symbol, .value = entry.substr(idx, entry.size() - idx)});
|
||||
nextIdx = entry.size();
|
||||
return true; // complex format error, return a symbol
|
||||
}
|
||||
if (!getNumberAt(entry, idx + 1, nextIdx, reBase, re, ',')) {
|
||||
if (!getNumberAt(entry, idx + 1, nextIdx, reBase, &re, ',')) {
|
||||
elements.push_back({cmd_symbol, .value = entry.substr(idx, entry.size() - idx)});
|
||||
nextIdx = entry.size();
|
||||
return true; // complex format error, return a symbol
|
||||
|
@ -184,7 +213,7 @@ bool Lexer::parseComplex(string& entry, size_t idx, size_t& nextIdx, vector<SynE
|
|||
return true; // complex format error, return a symbol
|
||||
}
|
||||
|
||||
if (!getNumberAt(entry, i, nextIdx, imBase, im, ')')) {
|
||||
if (!getNumberAt(entry, i, nextIdx, imBase, &im, ')')) {
|
||||
elements.push_back({cmd_symbol, .value = entry.substr(idx, entry.size() - idx)});
|
||||
nextIdx = entry.size();
|
||||
return true; // complex format error, return a symbol
|
||||
|
|
|
@ -19,8 +19,8 @@ class Lexer {
|
|||
struct SynElement {
|
||||
cmd_type_t type;
|
||||
string value;
|
||||
mpreal re;
|
||||
mpreal im;
|
||||
mpreal* re;
|
||||
mpreal* im;
|
||||
int reBase;
|
||||
int imBase;
|
||||
program_fn_t fn;
|
||||
|
@ -66,8 +66,8 @@ class Lexer {
|
|||
bool parseUnknown(string& entry, size_t idx, size_t& nextIdx, vector<SynElement>& elements);
|
||||
|
||||
void trim(string& s);
|
||||
int getBaseAt(string& entry, int idxStart, bool& positive);
|
||||
bool getNumberAt(string& entry, size_t idx, size_t& nextIdx, int& base, mpreal& r, char delim = ' ');
|
||||
int getBaseAt(string& entry, size_t& nextIdx, bool& positive);
|
||||
bool getNumberAt(string& entry, size_t idx, size_t& nextIdx, int& base, mpreal** r, char delim = ' ');
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#include <pwd.h>
|
||||
#include <signal.h>
|
||||
#include <csignal>
|
||||
|
||||
#include <cerrno>
|
||||
#include <iostream>
|
||||
|
|
|
@ -590,10 +590,10 @@ ret_value program::parse(string& entry) {
|
|||
for (Lexer::SynElement& element : elements) {
|
||||
switch (element.type) {
|
||||
case cmd_number:
|
||||
push_back(new number(element.re, element.reBase));
|
||||
push_back(new number(*element.re, element.reBase));
|
||||
break;
|
||||
case cmd_complex:
|
||||
push_back(new ocomplex(element.re, element.im, element.reBase, element.imBase));
|
||||
push_back(new ocomplex(*element.re, *element.im, element.reBase, element.imBase));
|
||||
break;
|
||||
case cmd_string:
|
||||
push_back(new ostring(element.value));
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include <stdio.h>
|
||||
#include <cstdio>
|
||||
#include "linenoise.h"
|
||||
|
||||
#include "escape.h"
|
||||
#include "linenoise.h"
|
||||
#include "program.hpp"
|
||||
#include "version.h"
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#include <time.h>
|
||||
#include <ctime>
|
||||
|
||||
#include "program.hpp"
|
||||
|
||||
|
|
|
@ -10,6 +10,38 @@
|
|||
|
||||
`del`
|
||||
|
||||
## 1-shot entries
|
||||
|
||||
```
|
||||
0
|
||||
0.
|
||||
.0
|
||||
0.0
|
||||
-0
|
||||
+0
|
||||
+.0
|
||||
+0.
|
||||
0.1
|
||||
+0.1
|
||||
-0.1
|
||||
-.1
|
||||
+.1
|
||||
```
|
||||
|
||||
-> stack should be 0, 0, 0, 0, -0, 0, 0, 0, 0.1, 0.1, -0.1, -0.1, 0.1
|
||||
|
||||
`del`
|
||||
|
||||
## n-shot entries
|
||||
|
||||
```
|
||||
0 0. .0 0.0 -0 +0 +.0 +0. 0.1 +0.1 -0.1 -.1 +.1
|
||||
```
|
||||
|
||||
-> stack should be 0, 0, 0, 0, -0, 0, 0, 0, 0.1, 0.1, -0.1, -0.1, 0.1
|
||||
|
||||
`del`
|
||||
|
||||
## numb 1
|
||||
|
||||
`3.14 +3.14 -3.14`
|
||||
|
|
Loading…
Reference in a new issue