Merge pull request #230 from louisrubet/#225/lexer-perf-issue

#225 lexer perf issue
This commit is contained in:
Louis Rubet 2022-02-22 16:48:06 +01:00 committed by GitHub
commit 800dc3fc68
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 101 additions and 39 deletions

View file

@ -28,7 +28,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
set(BASE_COMPILER_OPTIONS "-std=c++14 -Wl,--no-as-needed")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${BASE_COMPILER_OPTIONS}")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${BASE_COMPILER_OPTIONS} -O0 -g")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${BASE_COMPILER_OPTIONS} -O3 -fomit-frame-pointer -s")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${BASE_COMPILER_OPTIONS} -O3 -s")
endif()
# custom linenoise-ng
@ -75,6 +75,7 @@ add_executable(
${PROJECT_SOURCE_DIR}/linenoise-ng/src/wcwidth.cpp
)
target_link_libraries(rpn mpfr)
target_link_libraries(rpn gmp)
# man
add_custom_target(man ALL)

View file

@ -1,6 +1,3 @@
#include <regex>
using namespace std;
#include "lexer.hpp"
bool Lexer::lexer(string& entry, map<string, ReservedWord>& keywords, vector<SynElement>& elements,
@ -38,6 +35,8 @@ bool Lexer::lexer(string& entry, map<string, ReservedWord>& keywords, vector<Syn
if (parseUnknown(entry, i, jump, elements))
// last chance, this unknown entry is treated as a symbol
i = jump - 1;
else
return false; // no object of any type could be found, this is a lexer error
}
return true;
@ -106,40 +105,69 @@ bool Lexer::parseProgram(string& entry, size_t idx, size_t& nextIdx, vector<SynE
return true;
}
int Lexer::getBaseAt(string& entry, int idxStart, bool& positive) {
regex baseRegex("([+-])?((0[xX])|([0-9][0-9]?[bB]))");
smatch match;
if (regex_search(entry, match, baseRegex) && match.size() >= 5) {
string sign = match[1].str();
string base = match[2].str();
// sign out, permits expressions like -0xAB3F
positive = sign.size() > 0 && sign[0] == '-' ? false : true;
// base
entry = entry.substr(base.size() + sign.size());
if (base[1] == 'X' || base[1] == 'x') return 16;
if (base.size() > 0) {
int b = stoi(base.substr(0, base.size() - 1));
if (b == 0) b = 2; // admit "0b" as binary suffix
return b;
int Lexer::getBaseAt(string& entry, size_t& nextIdx, bool& positive) {
// a regex could be "([+-])?((0[xX])|([0-9][0-9]?[bB]))"
// regex is not use because dramatically slow
// entry is scanned from idxStart, searching for [s]abc (sign and 3 first chars)
size_t scan = 0;
nextIdx = 0;
positive = true;
if (scan >= entry.size()) return 10;
if (entry[scan] == '+') {
scan++;
nextIdx = scan;
}
else if (entry[scan] == '-') {
scan++;
nextIdx = scan;
positive = false;
}
if (scan + 2 >= entry.size()) return 10;
char a = entry[scan];
char b = entry[scan + 1];
char c = 0;
if (scan + 2 < entry.size()) c = entry[scan + 2];
if (a == '0') {
if (b == 'x' || b == 'X') {
nextIdx = scan + 2;
return 16;
}
if (b == 'b' || b == 'B') {
nextIdx = scan + 2;
return 2;
}
} else if (isdigit(a)) {
if (b == 'b' || b == 'B') {
nextIdx = scan + 2;
return int(a - '0');
}
if (isdigit(b) && (c == 'b' || c == 'B')) {
nextIdx = scan + 3;
return 10 * int(a - '0') + int(b - '0');
}
}
return 10;
}
bool Lexer::getNumberAt(string& entry, size_t idx, size_t& nextIdx, int& base, mpreal& r, char delim) {
bool Lexer::getNumberAt(string& entry, size_t idx, size_t& nextIdx, int& base, mpreal** r, char delim) {
stringstream ss;
int idxNumber = 0;
string token;
bool positive = true;
nextIdx = idx;
ss.str(entry.substr(idx));
if (getline(ss, token, delim)) {
size_t numberIdx;
nextIdx = token.size() + idx + 1;
base = getBaseAt(token, idx, positive);
if (base < BASE_MIN || base > BASE_MAX) return false;
trim(token);
if (mpfr_set_str(r.mpfr_ptr(), token.c_str(), base, mpreal::get_default_rnd()) == 0) {
if (!positive) r = -r;
base = getBaseAt(token, numberIdx, positive);
if (base < BASE_MIN || base > BASE_MAX) return false;
if (numberIdx != 0) token = token.substr(numberIdx);
*r = new mpreal;
if (mpfr_set_str((*r)->mpfr_ptr(), token.c_str(), base, mpreal::get_default_rnd()) == 0) {
if (!positive) *(*r) = -*(*r);
return true;
} else
return false;
@ -150,9 +178,9 @@ bool Lexer::getNumberAt(string& entry, size_t idx, size_t& nextIdx, int& base, m
bool Lexer::parseNumber(string& entry, size_t idx, size_t& nextIdx, vector<SynError>& errors,
vector<SynElement>& elements) {
mpreal r;
mpreal* r = nullptr;
int base = 10;
if (getNumberAt(entry, idx, nextIdx, base, r)) {
if (getNumberAt(entry, idx, nextIdx, base, &r)) {
elements.push_back({cmd_number, .re = r, .reBase = base});
return true;
} else {
@ -163,14 +191,15 @@ bool Lexer::parseNumber(string& entry, size_t idx, size_t& nextIdx, vector<SynEr
bool Lexer::parseComplex(string& entry, size_t idx, size_t& nextIdx, vector<SynError>& errors,
vector<SynElement>& elements) {
mpreal re, im;
mpreal* re = nullptr;
mpreal* im = nullptr;
int reBase, imBase = 10;
if (idx + 1 == entry.size()) {
elements.push_back({cmd_symbol, .value = entry.substr(idx, entry.size() - idx)});
nextIdx = entry.size();
return true; // complex format error, return a symbol
}
if (!getNumberAt(entry, idx + 1, nextIdx, reBase, re, ',')) {
if (!getNumberAt(entry, idx + 1, nextIdx, reBase, &re, ',')) {
elements.push_back({cmd_symbol, .value = entry.substr(idx, entry.size() - idx)});
nextIdx = entry.size();
return true; // complex format error, return a symbol
@ -184,7 +213,7 @@ bool Lexer::parseComplex(string& entry, size_t idx, size_t& nextIdx, vector<SynE
return true; // complex format error, return a symbol
}
if (!getNumberAt(entry, i, nextIdx, imBase, im, ')')) {
if (!getNumberAt(entry, i, nextIdx, imBase, &im, ')')) {
elements.push_back({cmd_symbol, .value = entry.substr(idx, entry.size() - idx)});
nextIdx = entry.size();
return true; // complex format error, return a symbol

View file

@ -19,8 +19,8 @@ class Lexer {
struct SynElement {
cmd_type_t type;
string value;
mpreal re;
mpreal im;
mpreal* re;
mpreal* im;
int reBase;
int imBase;
program_fn_t fn;
@ -66,8 +66,8 @@ class Lexer {
bool parseUnknown(string& entry, size_t idx, size_t& nextIdx, vector<SynElement>& elements);
void trim(string& s);
int getBaseAt(string& entry, int idxStart, bool& positive);
bool getNumberAt(string& entry, size_t idx, size_t& nextIdx, int& base, mpreal& r, char delim = ' ');
int getBaseAt(string& entry, size_t& nextIdx, bool& positive);
bool getNumberAt(string& entry, size_t idx, size_t& nextIdx, int& base, mpreal** r, char delim = ' ');
};
#endif

View file

@ -1,5 +1,5 @@
#include <pwd.h>
#include <signal.h>
#include <csignal>
#include <cerrno>
#include <iostream>

View file

@ -590,10 +590,10 @@ ret_value program::parse(string& entry) {
for (Lexer::SynElement& element : elements) {
switch (element.type) {
case cmd_number:
push_back(new number(element.re, element.reBase));
push_back(new number(*element.re, element.reBase));
break;
case cmd_complex:
push_back(new ocomplex(element.re, element.im, element.reBase, element.imBase));
push_back(new ocomplex(*element.re, *element.im, element.reBase, element.imBase));
break;
case cmd_string:
push_back(new ostring(element.value));

View file

@ -1,7 +1,7 @@
#include <stdio.h>
#include <cstdio>
#include "linenoise.h"
#include "escape.h"
#include "linenoise.h"
#include "program.hpp"
#include "version.h"

View file

@ -1,4 +1,4 @@
#include <time.h>
#include <ctime>
#include "program.hpp"

View file

@ -10,6 +10,38 @@
`del`
## 1-shot entries
```
0
0.
.0
0.0
-0
+0
+.0
+0.
0.1
+0.1
-0.1
-.1
+.1
```
-> stack should be 0, 0, 0, 0, -0, 0, 0, 0, 0.1, 0.1, -0.1, -0.1, 0.1
`del`
## n-shot entries
```
0 0. .0 0.0 -0 +0 +.0 +0. 0.1 +0.1 -0.1 -.1 +.1
```
-> stack should be 0, 0, 0, 0, -0, 0, 0, 0, 0.1, 0.1, -0.1, -0.1, 0.1
`del`
## numb 1
`3.14 +3.14 -3.14`