From 56c42b572c5f94b11b1a2f9edabfcda84003cbef Mon Sep 17 00:00:00 2001 From: AnHardt Date: Sat, 4 Jun 2016 14:54:20 +0200 Subject: [PATCH 1/4] 3 ms speedup for ST7920 and delay for BOARD_3DRAG and saving ~1k memory by limiting the `#pragma GCC optimize (3)` optimisation to `ultralcd_st7920_u8glib_rrd.h`. These optimisation was and is not done for all the other displays, is the reason for the big additionally use of memory, because the complete 'ultralcd.cpp' and 'dogm_lcd_implementation.h' was optimised (sadly i did not observe a change in speed). Unrolling the loop in `ST7920_SWSPI_SND_8BIT()`, what i expected the optimiser to do, by hand, saved some speed by eliminating the loop variable (i) compares and increases. Every CPU cycle in this loop costs at least 0.5ms per display update because it's executed more than 1k times/s. The delays are now pre-filled with the calculated values for 4.5V driven ST7920. A way to simply add __your__ timing into the configuration was made. At 4.5V 1.) The CLK signal needs to be at least 200ns high and 200ns low. 2.) The DAT pin needs to be set at least 40ns before CLK goes high and must stay at this value until 40ns after CLK went high. A nop takes one processor cycle. For 16MHz one nop lasts 62.5ns. For 20MHz one not lasts 50ns. To fulfill condition 1.) we need 200/62.5 = 3.2 => 4 cycles (200/50 = 4 => 4). For the low phase, setting the pin takes much longer. For the high phase we (theoretically) have to throw in 2 nops, because changing the CLK takes only 2 cycles. Condition 2.) is always fulfilled because the processor needs two cycles (100 - 125ns) for switching the CLK pin. Needs tests and feedback. Especially i cant test 20MHz, 3DRAG and displays supplied wit less than 5V. Are the delays right? Please experiment with longer or shorter delays. And give feedback. Already tested are 5 displays with 4.9V - 5.1V at 16MHz where no delays are needed. --- Marlin/ultralcd_st7920_u8glib_rrd.h | 121 +++++++++++++++++++++++++--- 1 file changed, 109 insertions(+), 12 deletions(-) diff --git a/Marlin/ultralcd_st7920_u8glib_rrd.h b/Marlin/ultralcd_st7920_u8glib_rrd.h index cc88b5256..1df6343ed 100644 --- a/Marlin/ultralcd_st7920_u8glib_rrd.h +++ b/Marlin/ultralcd_st7920_u8glib_rrd.h @@ -27,9 +27,6 @@ #if ENABLED(U8GLIB_ST7920) -//set optimization so ARDUINO optimizes this file -#pragma GCC optimize (3) - #define ST7920_CLK_PIN LCD_PINS_D4 #define ST7920_DAT_PIN LCD_PINS_ENABLE #define ST7920_CS_PIN LCD_PINS_RS @@ -43,20 +40,119 @@ #include +//set optimization so ARDUINO optimizes this file +#pragma GCC push_options +#pragma GCC optimize (3) + +#define DELAY_0_NOP ; +#define DELAY_1_NOP __asm__("nop\n\t"); +#define DELAY_2_NOP __asm__("nop\n\t" "nop\n\t"); +#define DELAY_3_NOP __asm__("nop\n\t" "nop\n\t" "nop\n\t"); +#define DELAY_4_NOP __asm__("nop\n\t" "nop\n\t" "nop\n\t" "nop\n\t"); + + +// If you want you can define your own set of delays in Configuration.h +//#define ST7920_DELAY_1 DELAY_0_NOP +//#define ST7920_DELAY_2 DELAY_0_NOP +//#define ST7920_DELAY_3 DELAY_0_NOP + +#if F_CPU >= 20000000 + #ifndef ST7920_DELAY_1 + #define ST7920_DELAY_1 DELAY_0_NOP + #endif + #ifndef ST7920_DELAY_2 + #define ST7920_DELAY_2 DELAY_0_NOP + #endif + #ifndef ST7920_DELAY_3 + #define ST7920_DELAY_3 DELAY_2_NOP + #endif +#elif MOTHERBOARD == BOARD_3DRAG + #ifndef ST7920_DELAY_1 + #define ST7920_DELAY_1 DELAY_0_NOP + #endif + #ifndef ST7920_DELAY_2 + #define ST7920_DELAY_2 DELAY_0_NOP + #endif + #ifndef ST7920_DELAY_3 + #define ST7920_DELAY_3 DELAY_2_NOP + #endif +#elif F_CPU == 16000000 + #ifndef ST7920_DELAY_1 + #define ST7920_DELAY_1 DELAY_0_NOP + #endif + #ifndef ST7920_DELAY_2 + #define ST7920_DELAY_2 DELAY_0_NOP + #endif + #ifndef ST7920_DELAY_3 + #define ST7920_DELAY_3 DELAY_2_NOP + #endif +#else + #error "No valid condition for delays in 'ultralcd_st7920_u8glib_rrd.h'" +#endif + static void ST7920_SWSPI_SND_8BIT(uint8_t val) { - uint8_t i; - for (i = 0; i < 8; i++) { WRITE(ST7920_CLK_PIN,0); - #if F_CPU == 20000000 - __asm__("nop\n\t"); - #endif + ST7920_DELAY_1 WRITE(ST7920_DAT_PIN,val&0x80); val<<=1; + ST7920_DELAY_2 + WRITE(ST7920_CLK_PIN,1); + ST7920_DELAY_3 + + WRITE(ST7920_CLK_PIN,0); + ST7920_DELAY_1 + WRITE(ST7920_DAT_PIN,val&0x80); + val<<=1; + ST7920_DELAY_2 + WRITE(ST7920_CLK_PIN,1); + ST7920_DELAY_3 + + WRITE(ST7920_CLK_PIN,0); + ST7920_DELAY_1 + WRITE(ST7920_DAT_PIN,val&0x80); + val<<=1; + ST7920_DELAY_2 + WRITE(ST7920_CLK_PIN,1); + ST7920_DELAY_3 + + WRITE(ST7920_CLK_PIN,0); + ST7920_DELAY_1 + WRITE(ST7920_DAT_PIN,val&0x80); + val<<=1; + ST7920_DELAY_2 + WRITE(ST7920_CLK_PIN,1); + ST7920_DELAY_3 + + WRITE(ST7920_CLK_PIN,0); + ST7920_DELAY_1 + WRITE(ST7920_DAT_PIN,val&0x80); + val<<=1; + ST7920_DELAY_2 + WRITE(ST7920_CLK_PIN,1); + ST7920_DELAY_3 + + WRITE(ST7920_CLK_PIN,0); + ST7920_DELAY_1 + WRITE(ST7920_DAT_PIN,val&0x80); + val<<=1; + ST7920_DELAY_2 + WRITE(ST7920_CLK_PIN,1); + ST7920_DELAY_3 + + WRITE(ST7920_CLK_PIN,0); + ST7920_DELAY_1 + WRITE(ST7920_DAT_PIN,val&0x80); + val<<=1; + ST7920_DELAY_2 + WRITE(ST7920_CLK_PIN,1); + ST7920_DELAY_3 + + WRITE(ST7920_CLK_PIN,0); + ST7920_DELAY_1 + WRITE(ST7920_DAT_PIN,val&0x80); + val<<=1; + ST7920_DELAY_2 WRITE(ST7920_CLK_PIN,1); - #if F_CPU == 20000000 - __asm__("nop\n\t""nop\n\t"); - #endif - } } #define ST7920_CS() {WRITE(ST7920_CS_PIN,1);u8g_10MicroDelay();} @@ -138,6 +234,7 @@ class U8GLIB_ST7920_128X64_RRD : public U8GLIB { U8GLIB_ST7920_128X64_RRD(uint8_t dummy) : U8GLIB(&u8g_dev_st7920_128x64_rrd_sw_spi) { UNUSED(dummy); } }; +#pragma GCC pop_options #endif //U8GLIB_ST7920 #endif //ULCDST7920_H From 4454d80276f4097f2e9713848df3efd9e5b431f0 Mon Sep 17 00:00:00 2001 From: AnHardt Date: Tue, 7 Jun 2016 13:45:35 +0200 Subject: [PATCH 2/4] Decrease the needed nops to 1 by shitfing the left shift into the high phase. ``` 2 cbi 0x2,1 ;set CLK // 1 in r18,__SREG__ //1 1-3 sbrc r24,7 //2-4 2 rjmp .L19 //4 1 cli .L19: //5 2 lds r25,258 lds r25,258 //7 1 andi r25,lo8(-2) ori r25,lo8(1) //8 2 sts 258,r25 sts 258,r25 //10 1 out __SREG__,r18 out __SREG__,r18 //11 2 .L3: rjmp .L3 //13 //2 2 sbi 0x2,1 ;reset CLK // //13-15 //2-4 1 lsl r24 ; val //1 1 nop //2 2 cbi 0x2,1 ;set CLK //4 ... ``` --- Marlin/ultralcd_st7920_u8glib_rrd.h | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/Marlin/ultralcd_st7920_u8glib_rrd.h b/Marlin/ultralcd_st7920_u8glib_rrd.h index 1df6343ed..9611efe6c 100644 --- a/Marlin/ultralcd_st7920_u8glib_rrd.h +++ b/Marlin/ultralcd_st7920_u8glib_rrd.h @@ -41,7 +41,6 @@ #include //set optimization so ARDUINO optimizes this file -#pragma GCC push_options #pragma GCC optimize (3) #define DELAY_0_NOP ; @@ -64,9 +63,9 @@ #define ST7920_DELAY_2 DELAY_0_NOP #endif #ifndef ST7920_DELAY_3 - #define ST7920_DELAY_3 DELAY_2_NOP + #define ST7920_DELAY_3 DELAY_1_NOP #endif -#elif MOTHERBOARD == BOARD_3DRAG +#elif (MOTHERBOARD == BOARD_3DRAG) || (MOTHERBOARD == BOARD_K8200) #ifndef ST7920_DELAY_1 #define ST7920_DELAY_1 DELAY_0_NOP #endif @@ -74,7 +73,7 @@ #define ST7920_DELAY_2 DELAY_0_NOP #endif #ifndef ST7920_DELAY_3 - #define ST7920_DELAY_3 DELAY_2_NOP + #define ST7920_DELAY_3 DELAY_1_NOP #endif #elif F_CPU == 16000000 #ifndef ST7920_DELAY_1 @@ -84,7 +83,7 @@ #define ST7920_DELAY_2 DELAY_0_NOP #endif #ifndef ST7920_DELAY_3 - #define ST7920_DELAY_3 DELAY_2_NOP + #define ST7920_DELAY_3 DELAY_1_NOP #endif #else #error "No valid condition for delays in 'ultralcd_st7920_u8glib_rrd.h'" @@ -94,63 +93,62 @@ static void ST7920_SWSPI_SND_8BIT(uint8_t val) { WRITE(ST7920_CLK_PIN,0); ST7920_DELAY_1 WRITE(ST7920_DAT_PIN,val&0x80); - val<<=1; ST7920_DELAY_2 WRITE(ST7920_CLK_PIN,1); + val<<=1; ST7920_DELAY_3 WRITE(ST7920_CLK_PIN,0); ST7920_DELAY_1 WRITE(ST7920_DAT_PIN,val&0x80); - val<<=1; ST7920_DELAY_2 WRITE(ST7920_CLK_PIN,1); + val<<=1; ST7920_DELAY_3 WRITE(ST7920_CLK_PIN,0); ST7920_DELAY_1 WRITE(ST7920_DAT_PIN,val&0x80); - val<<=1; ST7920_DELAY_2 WRITE(ST7920_CLK_PIN,1); + val<<=1; ST7920_DELAY_3 WRITE(ST7920_CLK_PIN,0); ST7920_DELAY_1 WRITE(ST7920_DAT_PIN,val&0x80); - val<<=1; ST7920_DELAY_2 WRITE(ST7920_CLK_PIN,1); + val<<=1; ST7920_DELAY_3 WRITE(ST7920_CLK_PIN,0); ST7920_DELAY_1 WRITE(ST7920_DAT_PIN,val&0x80); - val<<=1; ST7920_DELAY_2 WRITE(ST7920_CLK_PIN,1); + val<<=1; ST7920_DELAY_3 WRITE(ST7920_CLK_PIN,0); ST7920_DELAY_1 WRITE(ST7920_DAT_PIN,val&0x80); - val<<=1; ST7920_DELAY_2 WRITE(ST7920_CLK_PIN,1); + val<<=1; ST7920_DELAY_3 WRITE(ST7920_CLK_PIN,0); ST7920_DELAY_1 WRITE(ST7920_DAT_PIN,val&0x80); - val<<=1; ST7920_DELAY_2 WRITE(ST7920_CLK_PIN,1); + val<<=1; ST7920_DELAY_3 WRITE(ST7920_CLK_PIN,0); ST7920_DELAY_1 WRITE(ST7920_DAT_PIN,val&0x80); - val<<=1; ST7920_DELAY_2 WRITE(ST7920_CLK_PIN,1); } @@ -234,7 +232,7 @@ class U8GLIB_ST7920_128X64_RRD : public U8GLIB { U8GLIB_ST7920_128X64_RRD(uint8_t dummy) : U8GLIB(&u8g_dev_st7920_128x64_rrd_sw_spi) { UNUSED(dummy); } }; -#pragma GCC pop_options +#pragma GCC reset_options #endif //U8GLIB_ST7920 #endif //ULCDST7920_H From b4076883ccd611e1e0e1de6e863803f3b43c4215 Mon Sep 17 00:00:00 2001 From: AnHardt Date: Tue, 14 Jun 2016 11:08:28 +0200 Subject: [PATCH 3/4] Set testet delays for K8200, RAMBO, MINIRAMBO and ST7920 --- Marlin/ultralcd_st7920_u8glib_rrd.h | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/Marlin/ultralcd_st7920_u8glib_rrd.h b/Marlin/ultralcd_st7920_u8glib_rrd.h index 9611efe6c..699934cef 100644 --- a/Marlin/ultralcd_st7920_u8glib_rrd.h +++ b/Marlin/ultralcd_st7920_u8glib_rrd.h @@ -66,6 +66,26 @@ #define ST7920_DELAY_3 DELAY_1_NOP #endif #elif (MOTHERBOARD == BOARD_3DRAG) || (MOTHERBOARD == BOARD_K8200) + #ifndef ST7920_DELAY_1 + #define ST7920_DELAY_1 DELAY_0_NOP + #endif + #ifndef ST7920_DELAY_2 + #define ST7920_DELAY_2 DELAY_3_NOP + #endif + #ifndef ST7920_DELAY_3 + #define ST7920_DELAY_3 DELAY_0_NOP + #endif +#elif (MOTHERBOARD == BOARD_MINIRAMBO) + #ifndef ST7920_DELAY_1 + #define ST7920_DELAY_1 DELAY_0_NOP + #endif + #ifndef ST7920_DELAY_2 + #define ST7920_DELAY_2 DELAY_4_NOP + #endif + #ifndef ST7920_DELAY_3 + #define ST7920_DELAY_3 DELAY_0_NOP + #endif +#elif (MOTHERBOARD == BOARD_RAMBO) #ifndef ST7920_DELAY_1 #define ST7920_DELAY_1 DELAY_0_NOP #endif @@ -73,7 +93,7 @@ #define ST7920_DELAY_2 DELAY_0_NOP #endif #ifndef ST7920_DELAY_3 - #define ST7920_DELAY_3 DELAY_1_NOP + #define ST7920_DELAY_3 DELAY_0_NOP #endif #elif F_CPU == 16000000 #ifndef ST7920_DELAY_1 From a7684eef7a214b47ff59ae94e902fe896efb66c5 Mon Sep 17 00:00:00 2001 From: Scott Lahteine Date: Sat, 25 Jun 2016 18:32:13 -0700 Subject: [PATCH 4/4] Squish code in ST7920 --- Marlin/ultralcd_st7920_u8glib_rrd.h | 155 ++++++++-------------------- 1 file changed, 44 insertions(+), 111 deletions(-) diff --git a/Marlin/ultralcd_st7920_u8glib_rrd.h b/Marlin/ultralcd_st7920_u8glib_rrd.h index 699934cef..a87c19e8e 100644 --- a/Marlin/ultralcd_st7920_u8glib_rrd.h +++ b/Marlin/ultralcd_st7920_u8glib_rrd.h @@ -43,11 +43,11 @@ //set optimization so ARDUINO optimizes this file #pragma GCC optimize (3) -#define DELAY_0_NOP ; -#define DELAY_1_NOP __asm__("nop\n\t"); -#define DELAY_2_NOP __asm__("nop\n\t" "nop\n\t"); -#define DELAY_3_NOP __asm__("nop\n\t" "nop\n\t" "nop\n\t"); -#define DELAY_4_NOP __asm__("nop\n\t" "nop\n\t" "nop\n\t" "nop\n\t"); +#define DELAY_0_NOP NOOP +#define DELAY_1_NOP __asm__("nop\n\t") +#define DELAY_2_NOP __asm__("nop\n\t" "nop\n\t") +#define DELAY_3_NOP __asm__("nop\n\t" "nop\n\t" "nop\n\t") +#define DELAY_4_NOP __asm__("nop\n\t" "nop\n\t" "nop\n\t" "nop\n\t") // If you want you can define your own set of delays in Configuration.h @@ -56,121 +56,54 @@ //#define ST7920_DELAY_3 DELAY_0_NOP #if F_CPU >= 20000000 - #ifndef ST7920_DELAY_1 - #define ST7920_DELAY_1 DELAY_0_NOP - #endif - #ifndef ST7920_DELAY_2 - #define ST7920_DELAY_2 DELAY_0_NOP - #endif - #ifndef ST7920_DELAY_3 - #define ST7920_DELAY_3 DELAY_1_NOP - #endif + #define CPU_ST7920_DELAY_1 DELAY_0_NOP + #define CPU_ST7920_DELAY_2 DELAY_0_NOP + #define CPU_ST7920_DELAY_3 DELAY_1_NOP #elif (MOTHERBOARD == BOARD_3DRAG) || (MOTHERBOARD == BOARD_K8200) - #ifndef ST7920_DELAY_1 - #define ST7920_DELAY_1 DELAY_0_NOP - #endif - #ifndef ST7920_DELAY_2 - #define ST7920_DELAY_2 DELAY_3_NOP - #endif - #ifndef ST7920_DELAY_3 - #define ST7920_DELAY_3 DELAY_0_NOP - #endif + #define CPU_ST7920_DELAY_1 DELAY_0_NOP + #define CPU_ST7920_DELAY_2 DELAY_3_NOP + #define CPU_ST7920_DELAY_3 DELAY_0_NOP #elif (MOTHERBOARD == BOARD_MINIRAMBO) - #ifndef ST7920_DELAY_1 - #define ST7920_DELAY_1 DELAY_0_NOP - #endif - #ifndef ST7920_DELAY_2 - #define ST7920_DELAY_2 DELAY_4_NOP - #endif - #ifndef ST7920_DELAY_3 - #define ST7920_DELAY_3 DELAY_0_NOP - #endif + #define CPU_ST7920_DELAY_1 DELAY_0_NOP + #define CPU_ST7920_DELAY_2 DELAY_4_NOP + #define CPU_ST7920_DELAY_3 DELAY_0_NOP #elif (MOTHERBOARD == BOARD_RAMBO) - #ifndef ST7920_DELAY_1 - #define ST7920_DELAY_1 DELAY_0_NOP - #endif - #ifndef ST7920_DELAY_2 - #define ST7920_DELAY_2 DELAY_0_NOP - #endif - #ifndef ST7920_DELAY_3 - #define ST7920_DELAY_3 DELAY_0_NOP - #endif + #define CPU_ST7920_DELAY_1 DELAY_0_NOP + #define CPU_ST7920_DELAY_2 DELAY_0_NOP + #define CPU_ST7920_DELAY_3 DELAY_0_NOP #elif F_CPU == 16000000 - #ifndef ST7920_DELAY_1 - #define ST7920_DELAY_1 DELAY_0_NOP - #endif - #ifndef ST7920_DELAY_2 - #define ST7920_DELAY_2 DELAY_0_NOP - #endif - #ifndef ST7920_DELAY_3 - #define ST7920_DELAY_3 DELAY_1_NOP - #endif + #define CPU_ST7920_DELAY_1 DELAY_0_NOP + #define CPU_ST7920_DELAY_2 DELAY_0_NOP + #define CPU_ST7920_DELAY_3 DELAY_1_NOP #else #error "No valid condition for delays in 'ultralcd_st7920_u8glib_rrd.h'" #endif +#ifndef ST7920_DELAY_1 + #define ST7920_DELAY_1 CPU_ST7920_DELAY_1 +#endif +#ifndef ST7920_DELAY_2 + #define ST7920_DELAY_2 CPU_ST7920_DELAY_2 +#endif +#ifndef ST7920_DELAY_3 + #define ST7920_DELAY_3 CPU_ST7920_DELAY_3 +#endif + +#define ST7920_SND_BIT \ + WRITE(ST7920_CLK_PIN, LOW); ST7920_DELAY_1; \ + WRITE(ST7920_DAT_PIN, val & 0x80); ST7920_DELAY_2; \ + WRITE(ST7920_CLK_PIN, HIGH); ST7920_DELAY_3; \ + val <<= 1 + static void ST7920_SWSPI_SND_8BIT(uint8_t val) { - WRITE(ST7920_CLK_PIN,0); - ST7920_DELAY_1 - WRITE(ST7920_DAT_PIN,val&0x80); - ST7920_DELAY_2 - WRITE(ST7920_CLK_PIN,1); - val<<=1; - ST7920_DELAY_3 - - WRITE(ST7920_CLK_PIN,0); - ST7920_DELAY_1 - WRITE(ST7920_DAT_PIN,val&0x80); - ST7920_DELAY_2 - WRITE(ST7920_CLK_PIN,1); - val<<=1; - ST7920_DELAY_3 - - WRITE(ST7920_CLK_PIN,0); - ST7920_DELAY_1 - WRITE(ST7920_DAT_PIN,val&0x80); - ST7920_DELAY_2 - WRITE(ST7920_CLK_PIN,1); - val<<=1; - ST7920_DELAY_3 - - WRITE(ST7920_CLK_PIN,0); - ST7920_DELAY_1 - WRITE(ST7920_DAT_PIN,val&0x80); - ST7920_DELAY_2 - WRITE(ST7920_CLK_PIN,1); - val<<=1; - ST7920_DELAY_3 - - WRITE(ST7920_CLK_PIN,0); - ST7920_DELAY_1 - WRITE(ST7920_DAT_PIN,val&0x80); - ST7920_DELAY_2 - WRITE(ST7920_CLK_PIN,1); - val<<=1; - ST7920_DELAY_3 - - WRITE(ST7920_CLK_PIN,0); - ST7920_DELAY_1 - WRITE(ST7920_DAT_PIN,val&0x80); - ST7920_DELAY_2 - WRITE(ST7920_CLK_PIN,1); - val<<=1; - ST7920_DELAY_3 - - WRITE(ST7920_CLK_PIN,0); - ST7920_DELAY_1 - WRITE(ST7920_DAT_PIN,val&0x80); - ST7920_DELAY_2 - WRITE(ST7920_CLK_PIN,1); - val<<=1; - ST7920_DELAY_3 - - WRITE(ST7920_CLK_PIN,0); - ST7920_DELAY_1 - WRITE(ST7920_DAT_PIN,val&0x80); - ST7920_DELAY_2 - WRITE(ST7920_CLK_PIN,1); + ST7920_SND_BIT; // 1 + ST7920_SND_BIT; // 2 + ST7920_SND_BIT; // 3 + ST7920_SND_BIT; // 4 + ST7920_SND_BIT; // 5 + ST7920_SND_BIT; // 6 + ST7920_SND_BIT; // 7 + ST7920_SND_BIT; // 8 } #define ST7920_CS() {WRITE(ST7920_CS_PIN,1);u8g_10MicroDelay();}