Implement DELAY_NS with CYCCNT on Cortex-M7 (#12283)
This commit is contained in:
parent
f5498168ae
commit
cafabf2055
4 changed files with 74 additions and 47 deletions
|
@ -30,6 +30,7 @@
|
||||||
#include "HAL.h"
|
#include "HAL.h"
|
||||||
|
|
||||||
#include "../../inc/MarlinConfig.h"
|
#include "../../inc/MarlinConfig.h"
|
||||||
|
#include "../shared/Delay.h"
|
||||||
|
|
||||||
#if ENABLED(EEPROM_EMULATED_WITH_SRAM)
|
#if ENABLED(EEPROM_EMULATED_WITH_SRAM)
|
||||||
#if STM32F7xx
|
#if STM32F7xx
|
||||||
|
@ -80,6 +81,11 @@ uint16_t HAL_adc_result;
|
||||||
// HAL initialization task
|
// HAL initialization task
|
||||||
void HAL_init(void) {
|
void HAL_init(void) {
|
||||||
|
|
||||||
|
// Needed for DELAY_NS() / DELAY_US() on CORTEX-M7
|
||||||
|
#if (defined(__arm__) || defined(__thumb__)) && __CORTEX_M == 7
|
||||||
|
enableCycleCounter();
|
||||||
|
#endif
|
||||||
|
|
||||||
FastIO_init();
|
FastIO_init();
|
||||||
|
|
||||||
#if ENABLED(SDSUPPORT)
|
#if ENABLED(SDSUPPORT)
|
||||||
|
|
|
@ -153,8 +153,6 @@ extern uint16_t HAL_adc_result;
|
||||||
// Public functions
|
// Public functions
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Memory related
|
// Memory related
|
||||||
#define __bss_end __bss_end__
|
#define __bss_end __bss_end__
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@
|
||||||
#ifdef __MK20DX256__
|
#ifdef __MK20DX256__
|
||||||
|
|
||||||
#include "HAL.h"
|
#include "HAL.h"
|
||||||
#include "../Delay.h"
|
#include "../shared/Delay.h"
|
||||||
|
|
||||||
#include <Wire.h>
|
#include <Wire.h>
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Busy wait delay cycles routines:
|
* Busy wait delay cycles routines:
|
||||||
|
@ -28,57 +29,81 @@
|
||||||
* DELAY_US(count): Delay execution in microseconds
|
* DELAY_US(count): Delay execution in microseconds
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef MARLIN_DELAY_H
|
|
||||||
#define MARLIN_DELAY_H
|
|
||||||
|
|
||||||
#include "../../core/macros.h"
|
#include "../../core/macros.h"
|
||||||
|
#include "../../core/millis_t.h"
|
||||||
|
|
||||||
#if defined(__arm__) || defined(__thumb__)
|
#if defined(__arm__) || defined(__thumb__)
|
||||||
|
|
||||||
// https://blueprints.launchpad.net/gcc-arm-embedded/+spec/delay-cycles
|
#if __CORTEX_M == 7
|
||||||
|
|
||||||
#define nop() __asm__ __volatile__("nop;\n\t":::)
|
// Cortex-M7 can use the cycle counter of the DWT unit
|
||||||
|
// http://www.anthonyvh.com/2017/05/18/cortex_m-cycle_counter/
|
||||||
|
|
||||||
FORCE_INLINE static void __delay_4cycles(uint32_t cy) { // +1 cycle
|
FORCE_INLINE static void enableCycleCounter() {
|
||||||
#if ARCH_PIPELINE_RELOAD_CYCLES < 2
|
CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk;
|
||||||
#define EXTRA_NOP_CYCLES A("nop")
|
|
||||||
#else
|
|
||||||
#define EXTRA_NOP_CYCLES ""
|
|
||||||
#endif
|
|
||||||
|
|
||||||
__asm__ __volatile__(
|
// Unlock DWT.
|
||||||
A(".syntax unified") // is to prevent CM0,CM1 non-unified syntax
|
DWT->LAR = 0xC5ACCE55;
|
||||||
L("1")
|
|
||||||
A("subs %[cnt],#1")
|
|
||||||
EXTRA_NOP_CYCLES
|
|
||||||
A("bne 1b")
|
|
||||||
: [cnt]"+r"(cy) // output: +r means input+output
|
|
||||||
: // input:
|
|
||||||
: "cc" // clobbers:
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delay in cycles
|
DWT->CYCCNT = 0;
|
||||||
FORCE_INLINE static void DELAY_CYCLES(uint32_t x) {
|
DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk;
|
||||||
|
|
||||||
if (__builtin_constant_p(x)) {
|
|
||||||
#define MAXNOPS 4
|
|
||||||
|
|
||||||
if (x <= (MAXNOPS)) {
|
|
||||||
switch (x) { case 4: nop(); case 3: nop(); case 2: nop(); case 1: nop(); }
|
|
||||||
}
|
|
||||||
else { // because of +1 cycle inside delay_4cycles
|
|
||||||
const uint32_t rem = (x - 1) % (MAXNOPS);
|
|
||||||
switch (rem) { case 3: nop(); case 2: nop(); case 1: nop(); }
|
|
||||||
if ((x = (x - 1) / (MAXNOPS)))
|
|
||||||
__delay_4cycles(x); // if need more then 4 nop loop is more optimal
|
|
||||||
}
|
|
||||||
#undef MAXNOPS
|
|
||||||
}
|
}
|
||||||
else if ((x >>= 2))
|
|
||||||
__delay_4cycles(x);
|
FORCE_INLINE volatile uint32_t getCycleCount() { return DWT->CYCCNT; }
|
||||||
}
|
|
||||||
#undef nop
|
FORCE_INLINE static void DELAY_CYCLES(const uint32_t x) {
|
||||||
|
const uint32_t endCycles = getCycleCount() + x;
|
||||||
|
while (PENDING(getCycleCount(), endCycles)) { }
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
// https://blueprints.launchpad.net/gcc-arm-embedded/+spec/delay-cycles
|
||||||
|
|
||||||
|
#define nop() __asm__ __volatile__("nop;\n\t":::)
|
||||||
|
|
||||||
|
FORCE_INLINE static void __delay_4cycles(uint32_t cy) { // +1 cycle
|
||||||
|
#if ARCH_PIPELINE_RELOAD_CYCLES < 2
|
||||||
|
#define EXTRA_NOP_CYCLES A("nop")
|
||||||
|
#else
|
||||||
|
#define EXTRA_NOP_CYCLES ""
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__asm__ __volatile__(
|
||||||
|
A(".syntax unified") // is to prevent CM0,CM1 non-unified syntax
|
||||||
|
L("1")
|
||||||
|
A("subs %[cnt],#1")
|
||||||
|
EXTRA_NOP_CYCLES
|
||||||
|
A("bne 1b")
|
||||||
|
: [cnt]"+r"(cy) // output: +r means input+output
|
||||||
|
: // input:
|
||||||
|
: "cc" // clobbers:
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delay in cycles
|
||||||
|
FORCE_INLINE static void DELAY_CYCLES(uint32_t x) {
|
||||||
|
|
||||||
|
if (__builtin_constant_p(x)) {
|
||||||
|
#define MAXNOPS 4
|
||||||
|
|
||||||
|
if (x <= (MAXNOPS)) {
|
||||||
|
switch (x) { case 4: nop(); case 3: nop(); case 2: nop(); case 1: nop(); }
|
||||||
|
}
|
||||||
|
else { // because of +1 cycle inside delay_4cycles
|
||||||
|
const uint32_t rem = (x - 1) % (MAXNOPS);
|
||||||
|
switch (rem) { case 3: nop(); case 2: nop(); case 1: nop(); }
|
||||||
|
if ((x = (x - 1) / (MAXNOPS)))
|
||||||
|
__delay_4cycles(x); // if need more then 4 nop loop is more optimal
|
||||||
|
}
|
||||||
|
#undef MAXNOPS
|
||||||
|
}
|
||||||
|
else if ((x >>= 2))
|
||||||
|
__delay_4cycles(x);
|
||||||
|
}
|
||||||
|
#undef nop
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#elif defined(__AVR__)
|
#elif defined(__AVR__)
|
||||||
|
|
||||||
|
@ -144,5 +169,3 @@
|
||||||
|
|
||||||
// Delay in microseconds
|
// Delay in microseconds
|
||||||
#define DELAY_US(x) DELAY_CYCLES( (x) * (F_CPU / 1000000UL) )
|
#define DELAY_US(x) DELAY_CYCLES( (x) * (F_CPU / 1000000UL) )
|
||||||
|
|
||||||
#endif // MARLIN_DELAY_H
|
|
||||||
|
|
Reference in a new issue