mirror of
https://gitlab.com/c3d/db48x.git
synced 2024-09-29 05:36:58 +02:00
performance: Implement some highly-focused optimizations
Did a few additional focused optimizations that bring back another 100ms on the `NQueens` benchmark on the DM42. This comes at the expense of about 4K of additional generated code, which is probably as much as is reasonable to dedicate to this. The numbers for the DM32 are now the best we ever had. Fixes: #533 Signed-off-by: Christophe de Dinechin <christophe@dinechin.org>
This commit is contained in:
parent
044352cc4c
commit
ebb6c8a377
7 changed files with 62 additions and 18 deletions
|
@ -12,6 +12,7 @@ all times in milliseconds, best of 5 runs, on USB power, with presumably no GC.
|
|||
|
||||
| Version | Time | PGM Size | QSPI Size | Note |
|
||||
|---------|---------|-----------|-----------|-------------------------|
|
||||
| 0.4.9+ | 1070 | 650116 | | Focused optimizations |
|
||||
| 0.4.9+ | 1175 | | | Range-based type checks |
|
||||
| 0.4.9+ | 1215 | | | Remove busy animation |
|
||||
| 0.4.9 | 1447 | 646028 | 1531868 | No LastArgs in progs |
|
||||
|
@ -45,6 +46,7 @@ is not there.
|
|||
|
||||
| Version | Time | PGM Size | QSPI Size | Note |
|
||||
|---------|---------|-----------|-----------|-------------------------|
|
||||
| 0.4.9+ | 1803 | 731052 | | Focused optimizations |
|
||||
| 0.4.9 | 2156 | 772732 | 1534316 | No LastArg in progs |
|
||||
| 0.4.8 | 2201 | 749892 | 1534316 | |
|
||||
| 0.4.7 | 2209 | 742868 | 1534316 | |
|
||||
|
|
|
@ -2704,6 +2704,7 @@ all times in milliseconds, best of 5 runs, on USB power, with presumably no GC.
|
|||
|
||||
| Version | Time | PGM Size | QSPI Size | Note |
|
||||
|---------|---------|-----------|-----------|-------------------------|
|
||||
| 0.4.9+ | 1070 | 650116 | | Focused optimizations |
|
||||
| 0.4.9+ | 1175 | | | Range-based type checks |
|
||||
| 0.4.9+ | 1215 | | | Remove busy animation |
|
||||
| 0.4.9 | 1447 | 646028 | 1531868 | No LastArgs in progs |
|
||||
|
@ -2737,6 +2738,7 @@ is not there.
|
|||
|
||||
| Version | Time | PGM Size | QSPI Size | Note |
|
||||
|---------|---------|-----------|-----------|-------------------------|
|
||||
| 0.4.9+ | 1803 | 731052 | | Focused optimizations |
|
||||
| 0.4.9 | 2156 | 772732 | 1534316 | No LastArg in progs |
|
||||
| 0.4.8 | 2201 | 749892 | 1534316 | |
|
||||
| 0.4.7 | 2209 | 742868 | 1534316 | |
|
||||
|
|
|
@ -2704,6 +2704,7 @@ all times in milliseconds, best of 5 runs, on USB power, with presumably no GC.
|
|||
|
||||
| Version | Time | PGM Size | QSPI Size | Note |
|
||||
|---------|---------|-----------|-----------|-------------------------|
|
||||
| 0.4.9+ | 1070 | 650116 | | Focused optimizations |
|
||||
| 0.4.9+ | 1175 | | | Range-based type checks |
|
||||
| 0.4.9+ | 1215 | | | Remove busy animation |
|
||||
| 0.4.9 | 1447 | 646028 | 1531868 | No LastArgs in progs |
|
||||
|
@ -2737,6 +2738,7 @@ is not there.
|
|||
|
||||
| Version | Time | PGM Size | QSPI Size | Note |
|
||||
|---------|---------|-----------|-----------|-------------------------|
|
||||
| 0.4.9+ | 1803 | 731052 | | Focused optimizations |
|
||||
| 0.4.9 | 2156 | 772732 | 1534316 | No LastArg in progs |
|
||||
| 0.4.8 | 2201 | 749892 | 1534316 | |
|
||||
| 0.4.7 | 2209 | 742868 | 1534316 | |
|
||||
|
|
|
@ -122,3 +122,5 @@ freebsd: LIBS += -lthr -liconv
|
|||
macx: LIBS += -framework CoreFoundation -framework IOKit
|
||||
macx: QMAKE_CFLAGS += -fsanitize=address
|
||||
macx: LIBS += -fsanitize=address
|
||||
clang: QMAKE_CFLAGS += -Wno-unknown-pragmas
|
||||
clang: QMAKE_CXXFLAGS += -Wno-unknown-pragmas
|
||||
|
|
22
src/leb128.h
22
src/leb128.h
|
@ -32,6 +32,12 @@
|
|||
#include "types.h"
|
||||
#include <cstdint>
|
||||
|
||||
|
||||
#ifdef DM42
|
||||
# pragma GCC push_options
|
||||
# pragma GCC optimize("-O3")
|
||||
#endif // DM42
|
||||
|
||||
template <typename Int = uint, typename Data>
|
||||
inline Int leb128(Data *&p)
|
||||
// ----------------------------------------------------------------------------
|
||||
|
@ -55,17 +61,15 @@ inline Int leb128(Data *&p)
|
|||
}
|
||||
|
||||
|
||||
template<>
|
||||
inline uint16_t leb128<uint16_t, byte>(byte *&bp)
|
||||
inline INLINE uint16_t leb128_u16(byte *bp)
|
||||
// ----------------------------------------------------------------------------
|
||||
// Return the leb128 value at pointer
|
||||
// ----------------------------------------------------------------------------
|
||||
{
|
||||
if (bp[0] < 0x80)
|
||||
return *bp++;
|
||||
uint16_t b1 = *bp++ & 0x7F;
|
||||
uint16_t b2 = *bp++ << 7;
|
||||
return b1 | b2;
|
||||
uint16_t b1 = *bp;
|
||||
if (b1 < 0x80)
|
||||
return b1;
|
||||
return (b1 & 0x7F) | (uint16_t(bp[1]) << 7);
|
||||
}
|
||||
|
||||
|
||||
|
@ -127,4 +131,8 @@ inline Data *leb128skip(Data *ptr)
|
|||
return (Data *) p;
|
||||
}
|
||||
|
||||
#ifdef DM42
|
||||
# pragma GCC pop_options
|
||||
#endif // DM42
|
||||
|
||||
#endif // LEB128_H
|
||||
|
|
40
src/object.h
40
src/object.h
|
@ -225,13 +225,18 @@ struct object
|
|||
}
|
||||
|
||||
|
||||
#ifdef DM42
|
||||
# pragma GCC push_options
|
||||
# pragma GCC optimize("-O3")
|
||||
#endif // DM42
|
||||
|
||||
id type() const
|
||||
// ------------------------------------------------------------------------
|
||||
// Return the type of the object
|
||||
// ------------------------------------------------------------------------
|
||||
{
|
||||
byte *ptr = (byte *) this;
|
||||
id ty = (id) leb128<uint16_t>(ptr);
|
||||
id ty = (id) leb128_u16(ptr);
|
||||
if (ty > NUM_IDS)
|
||||
{
|
||||
object_error(ty, this);
|
||||
|
@ -330,6 +335,20 @@ struct object
|
|||
}
|
||||
|
||||
|
||||
grob_p graph(grapher &g) const
|
||||
// ------------------------------------------------------------------------
|
||||
// Render the object into an existing grapher
|
||||
// ------------------------------------------------------------------------
|
||||
{
|
||||
record(render, "Graphing %+s %p into %p", name(), this, &g);
|
||||
return ops().graph(this, g);
|
||||
}
|
||||
|
||||
#ifdef DM42
|
||||
# pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
|
||||
size_t render(char *output, size_t length) const;
|
||||
// ------------------------------------------------------------------------
|
||||
// Render the object into a static buffer
|
||||
|
@ -342,16 +361,6 @@ struct object
|
|||
// ------------------------------------------------------------------------
|
||||
|
||||
|
||||
grob_p graph(grapher &g) const
|
||||
// ------------------------------------------------------------------------
|
||||
// Render the object into an existing grapher
|
||||
// ------------------------------------------------------------------------
|
||||
{
|
||||
record(render, "Graphing %+s %p into %p", name(), this, &g);
|
||||
return ops().graph(this, g);
|
||||
}
|
||||
|
||||
|
||||
text_p as_text(bool edit = true, bool eq = false) const;
|
||||
// ------------------------------------------------------------------------
|
||||
// Return the object as text
|
||||
|
@ -481,6 +490,11 @@ struct object
|
|||
//
|
||||
// ========================================================================
|
||||
|
||||
#ifdef DM42
|
||||
# pragma GCC push_options
|
||||
# pragma GCC optimize("-O3")
|
||||
#endif
|
||||
|
||||
struct id_map
|
||||
// ------------------------------------------------------------------------
|
||||
// Used to isolate the type range checking names
|
||||
|
@ -679,6 +693,10 @@ struct object
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
#ifdef DM42
|
||||
# pragma GCC pop_options
|
||||
#endif
|
||||
|
||||
|
||||
object_p as_quoted(id ty = ID_symbol) const;
|
||||
template<typename T>
|
||||
|
|
|
@ -44,6 +44,12 @@ struct program : list
|
|||
{
|
||||
program(id type, gcbytes bytes, size_t len): list(type, bytes, len) {}
|
||||
|
||||
|
||||
#ifdef DM42
|
||||
# pragma GCC push_options
|
||||
# pragma GCC optimize("-O3")
|
||||
#endif // DM42
|
||||
|
||||
template<bool saving_last_args>
|
||||
result execute_program() const
|
||||
// ------------------------------------------------------------------------
|
||||
|
@ -65,6 +71,10 @@ struct program : list
|
|||
return r;
|
||||
}
|
||||
|
||||
#ifdef DM42
|
||||
# pragma GCC pop_options
|
||||
#endif // DM42
|
||||
|
||||
static bool interrupted(); // Program interrupted e.g. by EXIT key
|
||||
static program_p parse(utf8 source, size_t size);
|
||||
|
||||
|
|
Loading…
Reference in a new issue