performance: Implement some highly-focused optimizations

Did a few additional focused optimizations that bring back another
100ms on the `NQueens` benchmark on the DM42.

This comes at the expense of about 4K of additional generated code,
which is probably as much as is reasonable to dedicate to this.

The numbers for the DM32 are now the best we ever had.

Fixes: #533

Signed-off-by: Christophe de Dinechin <christophe@dinechin.org>
This commit is contained in:
Christophe de Dinechin 2023-11-03 23:05:09 +01:00
parent 044352cc4c
commit ebb6c8a377
7 changed files with 62 additions and 18 deletions

View file

@ -12,6 +12,7 @@ all times in milliseconds, best of 5 runs, on USB power, with presumably no GC.
| Version | Time | PGM Size | QSPI Size | Note |
|---------|---------|-----------|-----------|-------------------------|
| 0.4.9+ | 1070 | 650116 | | Focused optimizations |
| 0.4.9+ | 1175 | | | Range-based type checks |
| 0.4.9+ | 1215 | | | Remove busy animation |
| 0.4.9 | 1447 | 646028 | 1531868 | No LastArgs in progs |
@ -45,6 +46,7 @@ is not there.
| Version | Time | PGM Size | QSPI Size | Note |
|---------|---------|-----------|-----------|-------------------------|
| 0.4.9+ | 1803 | 731052 | | Focused optimizations |
| 0.4.9 | 2156 | 772732 | 1534316 | No LastArg in progs |
| 0.4.8 | 2201 | 749892 | 1534316 | |
| 0.4.7 | 2209 | 742868 | 1534316 | |

View file

@ -2704,6 +2704,7 @@ all times in milliseconds, best of 5 runs, on USB power, with presumably no GC.
| Version | Time | PGM Size | QSPI Size | Note |
|---------|---------|-----------|-----------|-------------------------|
| 0.4.9+ | 1070 | 650116 | | Focused optimizations |
| 0.4.9+ | 1175 | | | Range-based type checks |
| 0.4.9+ | 1215 | | | Remove busy animation |
| 0.4.9 | 1447 | 646028 | 1531868 | No LastArgs in progs |
@ -2737,6 +2738,7 @@ is not there.
| Version | Time | PGM Size | QSPI Size | Note |
|---------|---------|-----------|-----------|-------------------------|
| 0.4.9+ | 1803 | 731052 | | Focused optimizations |
| 0.4.9 | 2156 | 772732 | 1534316 | No LastArg in progs |
| 0.4.8 | 2201 | 749892 | 1534316 | |
| 0.4.7 | 2209 | 742868 | 1534316 | |

View file

@ -2704,6 +2704,7 @@ all times in milliseconds, best of 5 runs, on USB power, with presumably no GC.
| Version | Time | PGM Size | QSPI Size | Note |
|---------|---------|-----------|-----------|-------------------------|
| 0.4.9+ | 1070 | 650116 | | Focused optimizations |
| 0.4.9+ | 1175 | | | Range-based type checks |
| 0.4.9+ | 1215 | | | Remove busy animation |
| 0.4.9 | 1447 | 646028 | 1531868 | No LastArgs in progs |
@ -2737,6 +2738,7 @@ is not there.
| Version | Time | PGM Size | QSPI Size | Note |
|---------|---------|-----------|-----------|-------------------------|
| 0.4.9+ | 1803 | 731052 | | Focused optimizations |
| 0.4.9 | 2156 | 772732 | 1534316 | No LastArg in progs |
| 0.4.8 | 2201 | 749892 | 1534316 | |
| 0.4.7 | 2209 | 742868 | 1534316 | |

View file

@ -122,3 +122,5 @@ freebsd: LIBS += -lthr -liconv
macx: LIBS += -framework CoreFoundation -framework IOKit
macx: QMAKE_CFLAGS += -fsanitize=address
macx: LIBS += -fsanitize=address
clang: QMAKE_CFLAGS += -Wno-unknown-pragmas
clang: QMAKE_CXXFLAGS += -Wno-unknown-pragmas

View file

@ -32,6 +32,12 @@
#include "types.h"
#include <cstdint>
#ifdef DM42
# pragma GCC push_options
# pragma GCC optimize("-O3")
#endif // DM42
template <typename Int = uint, typename Data>
inline Int leb128(Data *&p)
// ----------------------------------------------------------------------------
@ -55,17 +61,15 @@ inline Int leb128(Data *&p)
}
template<>
inline uint16_t leb128<uint16_t, byte>(byte *&bp)
inline INLINE uint16_t leb128_u16(byte *bp)
// ----------------------------------------------------------------------------
// Return the leb128 value at pointer
// ----------------------------------------------------------------------------
{
if (bp[0] < 0x80)
return *bp++;
uint16_t b1 = *bp++ & 0x7F;
uint16_t b2 = *bp++ << 7;
return b1 | b2;
uint16_t b1 = *bp;
if (b1 < 0x80)
return b1;
return (b1 & 0x7F) | (uint16_t(bp[1]) << 7);
}
@ -127,4 +131,8 @@ inline Data *leb128skip(Data *ptr)
return (Data *) p;
}
#ifdef DM42
# pragma GCC pop_options
#endif // DM42
#endif // LEB128_H

View file

@ -225,13 +225,18 @@ struct object
}
#ifdef DM42
# pragma GCC push_options
# pragma GCC optimize("-O3")
#endif // DM42
id type() const
// ------------------------------------------------------------------------
// Return the type of the object
// ------------------------------------------------------------------------
{
byte *ptr = (byte *) this;
id ty = (id) leb128<uint16_t>(ptr);
id ty = (id) leb128_u16(ptr);
if (ty > NUM_IDS)
{
object_error(ty, this);
@ -330,6 +335,20 @@ struct object
}
grob_p graph(grapher &g) const
// ------------------------------------------------------------------------
// Render the object into an existing grapher
// ------------------------------------------------------------------------
{
record(render, "Graphing %+s %p into %p", name(), this, &g);
return ops().graph(this, g);
}
#ifdef DM42
# pragma GCC pop_options
#endif
size_t render(char *output, size_t length) const;
// ------------------------------------------------------------------------
// Render the object into a static buffer
@ -342,16 +361,6 @@ struct object
// ------------------------------------------------------------------------
grob_p graph(grapher &g) const
// ------------------------------------------------------------------------
// Render the object into an existing grapher
// ------------------------------------------------------------------------
{
record(render, "Graphing %+s %p into %p", name(), this, &g);
return ops().graph(this, g);
}
text_p as_text(bool edit = true, bool eq = false) const;
// ------------------------------------------------------------------------
// Return the object as text
@ -481,6 +490,11 @@ struct object
//
// ========================================================================
#ifdef DM42
# pragma GCC push_options
# pragma GCC optimize("-O3")
#endif
struct id_map
// ------------------------------------------------------------------------
// Used to isolate the type range checking names
@ -679,6 +693,10 @@ struct object
return nullptr;
}
#ifdef DM42
# pragma GCC pop_options
#endif
object_p as_quoted(id ty = ID_symbol) const;
template<typename T>

View file

@ -44,6 +44,12 @@ struct program : list
{
program(id type, gcbytes bytes, size_t len): list(type, bytes, len) {}
#ifdef DM42
# pragma GCC push_options
# pragma GCC optimize("-O3")
#endif // DM42
template<bool saving_last_args>
result execute_program() const
// ------------------------------------------------------------------------
@ -65,6 +71,10 @@ struct program : list
return r;
}
#ifdef DM42
# pragma GCC pop_options
#endif // DM42
static bool interrupted(); // Program interrupted e.g. by EXIT key
static program_p parse(utf8 source, size_t size);