3 ms speedup for ST7920 and delay for BOARD_3DRAG

and saving ~1k memory
by limiting the `#pragma GCC optimize (3)` optimisation to `ultralcd_st7920_u8glib_rrd.h`. These optimisation was and is not done for all the other displays, is the reason for the big additionally use of memory, because the complete 'ultralcd.cpp' and 'dogm_lcd_implementation.h' was optimised (sadly i did not observe a change in speed).

Unrolling the loop in `ST7920_SWSPI_SND_8BIT()`, what i expected the optimiser to do, by hand, saved some speed by eliminating the loop variable (i) compares and increases. Every CPU cycle in this loop costs at least 0.5ms per display update because it's executed more than 1k times/s.

The delays are now pre-filled with the calculated values for 4.5V driven ST7920.
A way to simply add __your__ timing into the configuration was made.

At 4.5V
1.) The CLK signal needs to be at least 200ns high and 200ns low.
2.) The DAT pin needs to be set at least 40ns before CLK goes high and must stay at this value until 40ns after CLK went high.

A nop takes one processor cycle.
For 16MHz one nop lasts 62.5ns.
For 20MHz one not lasts 50ns.

To fulfill condition 1.) we need 200/62.5 = 3.2 => 4 cycles (200/50 = 4 => 4). For the low phase, setting the pin takes much longer. For the high phase we (theoretically) have to throw in 2 nops, because changing the CLK takes only 2 cycles.

Condition 2.) is always fulfilled because the processor needs two cycles (100 - 125ns) for switching the CLK pin.


Needs tests and feedback.
Especially i cant test 20MHz, 3DRAG and displays supplied wit less than 5V.
Are the delays right? Please experiment with longer or shorter delays. And give feedback.

Already tested are 5 displays with 4.9V - 5.1V at 16MHz where no delays are needed.
This commit is contained in:
AnHardt 2016-06-04 14:54:20 +02:00 committed by Scott Lahteine
parent ca184d3d72
commit 56c42b572c

View file

@ -27,9 +27,6 @@
#if ENABLED(U8GLIB_ST7920) #if ENABLED(U8GLIB_ST7920)
//set optimization so ARDUINO optimizes this file
#pragma GCC optimize (3)
#define ST7920_CLK_PIN LCD_PINS_D4 #define ST7920_CLK_PIN LCD_PINS_D4
#define ST7920_DAT_PIN LCD_PINS_ENABLE #define ST7920_DAT_PIN LCD_PINS_ENABLE
#define ST7920_CS_PIN LCD_PINS_RS #define ST7920_CS_PIN LCD_PINS_RS
@ -43,20 +40,119 @@
#include <U8glib.h> #include <U8glib.h>
//set optimization so ARDUINO optimizes this file
#pragma GCC push_options
#pragma GCC optimize (3)
#define DELAY_0_NOP ;
#define DELAY_1_NOP __asm__("nop\n\t");
#define DELAY_2_NOP __asm__("nop\n\t" "nop\n\t");
#define DELAY_3_NOP __asm__("nop\n\t" "nop\n\t" "nop\n\t");
#define DELAY_4_NOP __asm__("nop\n\t" "nop\n\t" "nop\n\t" "nop\n\t");
// If you want you can define your own set of delays in Configuration.h
//#define ST7920_DELAY_1 DELAY_0_NOP
//#define ST7920_DELAY_2 DELAY_0_NOP
//#define ST7920_DELAY_3 DELAY_0_NOP
#if F_CPU >= 20000000
#ifndef ST7920_DELAY_1
#define ST7920_DELAY_1 DELAY_0_NOP
#endif
#ifndef ST7920_DELAY_2
#define ST7920_DELAY_2 DELAY_0_NOP
#endif
#ifndef ST7920_DELAY_3
#define ST7920_DELAY_3 DELAY_2_NOP
#endif
#elif MOTHERBOARD == BOARD_3DRAG
#ifndef ST7920_DELAY_1
#define ST7920_DELAY_1 DELAY_0_NOP
#endif
#ifndef ST7920_DELAY_2
#define ST7920_DELAY_2 DELAY_0_NOP
#endif
#ifndef ST7920_DELAY_3
#define ST7920_DELAY_3 DELAY_2_NOP
#endif
#elif F_CPU == 16000000
#ifndef ST7920_DELAY_1
#define ST7920_DELAY_1 DELAY_0_NOP
#endif
#ifndef ST7920_DELAY_2
#define ST7920_DELAY_2 DELAY_0_NOP
#endif
#ifndef ST7920_DELAY_3
#define ST7920_DELAY_3 DELAY_2_NOP
#endif
#else
#error "No valid condition for delays in 'ultralcd_st7920_u8glib_rrd.h'"
#endif
static void ST7920_SWSPI_SND_8BIT(uint8_t val) { static void ST7920_SWSPI_SND_8BIT(uint8_t val) {
uint8_t i;
for (i = 0; i < 8; i++) {
WRITE(ST7920_CLK_PIN,0); WRITE(ST7920_CLK_PIN,0);
#if F_CPU == 20000000 ST7920_DELAY_1
__asm__("nop\n\t");
#endif
WRITE(ST7920_DAT_PIN,val&0x80); WRITE(ST7920_DAT_PIN,val&0x80);
val<<=1; val<<=1;
ST7920_DELAY_2
WRITE(ST7920_CLK_PIN,1);
ST7920_DELAY_3
WRITE(ST7920_CLK_PIN,0);
ST7920_DELAY_1
WRITE(ST7920_DAT_PIN,val&0x80);
val<<=1;
ST7920_DELAY_2
WRITE(ST7920_CLK_PIN,1);
ST7920_DELAY_3
WRITE(ST7920_CLK_PIN,0);
ST7920_DELAY_1
WRITE(ST7920_DAT_PIN,val&0x80);
val<<=1;
ST7920_DELAY_2
WRITE(ST7920_CLK_PIN,1);
ST7920_DELAY_3
WRITE(ST7920_CLK_PIN,0);
ST7920_DELAY_1
WRITE(ST7920_DAT_PIN,val&0x80);
val<<=1;
ST7920_DELAY_2
WRITE(ST7920_CLK_PIN,1);
ST7920_DELAY_3
WRITE(ST7920_CLK_PIN,0);
ST7920_DELAY_1
WRITE(ST7920_DAT_PIN,val&0x80);
val<<=1;
ST7920_DELAY_2
WRITE(ST7920_CLK_PIN,1);
ST7920_DELAY_3
WRITE(ST7920_CLK_PIN,0);
ST7920_DELAY_1
WRITE(ST7920_DAT_PIN,val&0x80);
val<<=1;
ST7920_DELAY_2
WRITE(ST7920_CLK_PIN,1);
ST7920_DELAY_3
WRITE(ST7920_CLK_PIN,0);
ST7920_DELAY_1
WRITE(ST7920_DAT_PIN,val&0x80);
val<<=1;
ST7920_DELAY_2
WRITE(ST7920_CLK_PIN,1);
ST7920_DELAY_3
WRITE(ST7920_CLK_PIN,0);
ST7920_DELAY_1
WRITE(ST7920_DAT_PIN,val&0x80);
val<<=1;
ST7920_DELAY_2
WRITE(ST7920_CLK_PIN,1); WRITE(ST7920_CLK_PIN,1);
#if F_CPU == 20000000
__asm__("nop\n\t""nop\n\t");
#endif
}
} }
#define ST7920_CS() {WRITE(ST7920_CS_PIN,1);u8g_10MicroDelay();} #define ST7920_CS() {WRITE(ST7920_CS_PIN,1);u8g_10MicroDelay();}
@ -138,6 +234,7 @@ class U8GLIB_ST7920_128X64_RRD : public U8GLIB {
U8GLIB_ST7920_128X64_RRD(uint8_t dummy) : U8GLIB(&u8g_dev_st7920_128x64_rrd_sw_spi) { UNUSED(dummy); } U8GLIB_ST7920_128X64_RRD(uint8_t dummy) : U8GLIB(&u8g_dev_st7920_128x64_rrd_sw_spi) { UNUSED(dummy); }
}; };
#pragma GCC pop_options
#endif //U8GLIB_ST7920 #endif //U8GLIB_ST7920
#endif //ULCDST7920_H #endif //ULCDST7920_H