Fix ARM delay function (#20901)
This commit is contained in:
parent
6054052248
commit
77966135e8
|
@ -240,7 +240,7 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
// all the others
|
// all the others
|
||||||
static uint32_t spiDelayCyclesX4 = (F_CPU) / 1000000; // 4µs => 125khz
|
static uint32_t spiDelayCyclesX4 = 4 * (F_CPU) / 1000000; // 4µs => 125khz
|
||||||
|
|
||||||
static uint8_t spiTransferX(uint8_t b) { // using Mode 0
|
static uint8_t spiTransferX(uint8_t b) { // using Mode 0
|
||||||
int bits = 8;
|
int bits = 8;
|
||||||
|
@ -249,12 +249,12 @@
|
||||||
b <<= 1; // little setup time
|
b <<= 1; // little setup time
|
||||||
|
|
||||||
WRITE(SD_SCK_PIN, HIGH);
|
WRITE(SD_SCK_PIN, HIGH);
|
||||||
__delay_4cycles(spiDelayCyclesX4);
|
DELAY_CYCLES(spiDelayCyclesX4);
|
||||||
|
|
||||||
b |= (READ(SD_MISO_PIN) != 0);
|
b |= (READ(SD_MISO_PIN) != 0);
|
||||||
|
|
||||||
WRITE(SD_SCK_PIN, LOW);
|
WRITE(SD_SCK_PIN, LOW);
|
||||||
__delay_4cycles(spiDelayCyclesX4);
|
DELAY_CYCLES(spiDelayCyclesX4);
|
||||||
} while (--bits);
|
} while (--bits);
|
||||||
return b;
|
return b;
|
||||||
}
|
}
|
||||||
|
@ -510,7 +510,7 @@
|
||||||
spiRxBlock = (pfnSpiRxBlock)spiRxBlockX;
|
spiRxBlock = (pfnSpiRxBlock)spiRxBlockX;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
spiDelayCyclesX4 = ((F_CPU) / 1000000) >> (6 - spiRate);
|
spiDelayCyclesX4 = ((F_CPU) / 1000000) >> (6 - spiRate) << 2; // spiRate of 2 gives the maximum error with current CPU
|
||||||
spiTransferTx = (pfnSpiTransfer)spiTransferX;
|
spiTransferTx = (pfnSpiTransfer)spiTransferX;
|
||||||
spiTransferRx = (pfnSpiTransfer)spiTransferX;
|
spiTransferRx = (pfnSpiTransfer)spiTransferX;
|
||||||
spiTxBlock = (pfnSpiTxBlock)spiTxBlockX;
|
spiTxBlock = (pfnSpiTxBlock)spiTxBlockX;
|
||||||
|
|
|
@ -59,6 +59,7 @@
|
||||||
|
|
||||||
#if ENABLED(U8GLIB_ST7920)
|
#if ENABLED(U8GLIB_ST7920)
|
||||||
|
|
||||||
|
#include "../../../inc/MarlinConfig.h"
|
||||||
#include "../../shared/Delay.h"
|
#include "../../shared/Delay.h"
|
||||||
|
|
||||||
#include <U8glib.h>
|
#include <U8glib.h>
|
||||||
|
|
|
@ -59,6 +59,7 @@
|
||||||
|
|
||||||
#if HAS_MARLINUI_U8GLIB
|
#if HAS_MARLINUI_U8GLIB
|
||||||
|
|
||||||
|
#include "../../../inc/MarlinConfig.h"
|
||||||
#include "../../shared/Delay.h"
|
#include "../../shared/Delay.h"
|
||||||
|
|
||||||
#include <U8glib.h>
|
#include <U8glib.h>
|
||||||
|
|
|
@ -57,17 +57,6 @@ uint16_t HAL_adc_result;
|
||||||
// Public functions
|
// Public functions
|
||||||
// ------------------------
|
// ------------------------
|
||||||
|
|
||||||
// Needed for DELAY_NS() / DELAY_US() on CORTEX-M7
|
|
||||||
#if (defined(__arm__) || defined(__thumb__)) && __CORTEX_M == 7
|
|
||||||
// HAL pre-initialization task
|
|
||||||
// Force the preinit function to run between the premain() and main() function
|
|
||||||
// of the STM32 arduino core
|
|
||||||
__attribute__((constructor (102)))
|
|
||||||
void HAL_preinit() {
|
|
||||||
enableCycleCounter();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// HAL initialization task
|
// HAL initialization task
|
||||||
void HAL_init() {
|
void HAL_init() {
|
||||||
FastIO_init();
|
FastIO_init();
|
||||||
|
|
|
@ -194,6 +194,7 @@ void flashFirmware(const int16_t);
|
||||||
typedef void (*systickCallback_t)(void);
|
typedef void (*systickCallback_t)(void);
|
||||||
void systick_attach_callback(systickCallback_t cb);
|
void systick_attach_callback(systickCallback_t cb);
|
||||||
void HAL_SYSTICK_Callback();
|
void HAL_SYSTICK_Callback();
|
||||||
|
|
||||||
extern volatile uint32_t systick_uptime_millis;
|
extern volatile uint32_t systick_uptime_millis;
|
||||||
|
|
||||||
#define HAL_CAN_SET_PWM_FREQ // This HAL supports PWM Frequency adjustment
|
#define HAL_CAN_SET_PWM_FREQ // This HAL supports PWM Frequency adjustment
|
||||||
|
|
|
@ -51,18 +51,28 @@ static SPISettings spiConfig;
|
||||||
OUT_WRITE(SD_MOSI_PIN, HIGH);
|
OUT_WRITE(SD_MOSI_PIN, HIGH);
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint16_t delay_STM32_soft_spi;
|
// Use function with compile-time value so we can actually reach the desired frequency
|
||||||
|
// Need to adjust this a little bit: on a 72MHz clock, we have 14ns/clock
|
||||||
|
// and we'll use ~3 cycles to jump to the method and going back, so it'll take ~40ns from the given clock here
|
||||||
|
#define CALLING_COST_NS (3U * 1000000000U) / (F_CPU)
|
||||||
|
void (*delaySPIFunc)();
|
||||||
|
void delaySPI_125() { DELAY_NS(125 - CALLING_COST_NS); }
|
||||||
|
void delaySPI_250() { DELAY_NS(250 - CALLING_COST_NS); }
|
||||||
|
void delaySPI_500() { DELAY_NS(500 - CALLING_COST_NS); }
|
||||||
|
void delaySPI_1000() { DELAY_NS(1000 - CALLING_COST_NS); }
|
||||||
|
void delaySPI_2000() { DELAY_NS(2000 - CALLING_COST_NS); }
|
||||||
|
void delaySPI_4000() { DELAY_NS(4000 - CALLING_COST_NS); }
|
||||||
|
|
||||||
void spiInit(uint8_t spiRate) {
|
void spiInit(uint8_t spiRate) {
|
||||||
// Use datarates Marlin uses
|
// Use datarates Marlin uses
|
||||||
switch (spiRate) {
|
switch (spiRate) {
|
||||||
case SPI_FULL_SPEED: delay_STM32_soft_spi = 125; break; // desired: 8,000,000 actual: ~1.1M
|
case SPI_FULL_SPEED: delaySPIFunc = &delaySPI_125; break; // desired: 8,000,000 actual: ~1.1M
|
||||||
case SPI_HALF_SPEED: delay_STM32_soft_spi = 125; break; // desired: 4,000,000 actual: ~1.1M
|
case SPI_HALF_SPEED: delaySPIFunc = &delaySPI_125; break; // desired: 4,000,000 actual: ~1.1M
|
||||||
case SPI_QUARTER_SPEED:delay_STM32_soft_spi = 250; break; // desired: 2,000,000 actual: ~890K
|
case SPI_QUARTER_SPEED:delaySPIFunc = &delaySPI_250; break; // desired: 2,000,000 actual: ~890K
|
||||||
case SPI_EIGHTH_SPEED: delay_STM32_soft_spi = 500; break; // desired: 1,000,000 actual: ~590K
|
case SPI_EIGHTH_SPEED: delaySPIFunc = &delaySPI_500; break; // desired: 1,000,000 actual: ~590K
|
||||||
case SPI_SPEED_5: delay_STM32_soft_spi = 1000; break; // desired: 500,000 actual: ~360K
|
case SPI_SPEED_5: delaySPIFunc = &delaySPI_1000; break; // desired: 500,000 actual: ~360K
|
||||||
case SPI_SPEED_6: delay_STM32_soft_spi = 2000; break; // desired: 250,000 actual: ~210K
|
case SPI_SPEED_6: delaySPIFunc = &delaySPI_2000; break; // desired: 250,000 actual: ~210K
|
||||||
default: delay_STM32_soft_spi = 4000; break; // desired: 125,000 actual: ~123K
|
default: delaySPIFunc = &delaySPI_4000; break; // desired: 125,000 actual: ~123K
|
||||||
}
|
}
|
||||||
SPI.begin();
|
SPI.begin();
|
||||||
}
|
}
|
||||||
|
@ -75,9 +85,9 @@ static SPISettings spiConfig;
|
||||||
WRITE(SD_SCK_PIN, LOW);
|
WRITE(SD_SCK_PIN, LOW);
|
||||||
WRITE(SD_MOSI_PIN, b & 0x80);
|
WRITE(SD_MOSI_PIN, b & 0x80);
|
||||||
|
|
||||||
DELAY_NS(delay_STM32_soft_spi);
|
delaySPIFunc();
|
||||||
WRITE(SD_SCK_PIN, HIGH);
|
WRITE(SD_SCK_PIN, HIGH);
|
||||||
DELAY_NS(delay_STM32_soft_spi);
|
delaySPIFunc();
|
||||||
|
|
||||||
b <<= 1; // little setup time
|
b <<= 1; // little setup time
|
||||||
b |= (READ(SD_MISO_PIN) != 0);
|
b |= (READ(SD_MISO_PIN) != 0);
|
||||||
|
|
176
Marlin/src/HAL/shared/Delay.cpp
Normal file
176
Marlin/src/HAL/shared/Delay.cpp
Normal file
|
@ -0,0 +1,176 @@
|
||||||
|
/**
|
||||||
|
* Marlin 3D Printer Firmware
|
||||||
|
* Copyright (c) 2020 MarlinFirmware [https://github.com/MarlinFirmware/Marlin]
|
||||||
|
*
|
||||||
|
* Based on Sprinter and grbl.
|
||||||
|
* Copyright (c) 2011 Camiel Gubbels / Erik van der Zalm
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include "Delay.h"
|
||||||
|
|
||||||
|
#include "../../inc/MarlinConfig.h"
|
||||||
|
|
||||||
|
#if defined(__arm__) || defined(__thumb__)
|
||||||
|
|
||||||
|
static uint32_t ASM_CYCLES_PER_ITERATION = 4; // Initial bet which will be adjusted in calibrate_delay_loop
|
||||||
|
|
||||||
|
// Simple assembler loop counting down
|
||||||
|
void delay_asm(uint32_t cy) {
|
||||||
|
cy = _MAX(cy / ASM_CYCLES_PER_ITERATION, 1U); // Zero is forbidden here
|
||||||
|
__asm__ __volatile__(
|
||||||
|
A(".syntax unified") // is to prevent CM0,CM1 non-unified syntax
|
||||||
|
L("1")
|
||||||
|
A("subs %[cnt],#1")
|
||||||
|
A("bne 1b")
|
||||||
|
: [cnt]"+r"(cy) // output: +r means input+output
|
||||||
|
: // input:
|
||||||
|
: "cc" // clobbers:
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We can't use CMSIS since it's not available on all platform, so fallback to hardcoded register values
|
||||||
|
#define HW_REG(X) *(volatile uint32_t *)(X)
|
||||||
|
#define _DWT_CTRL 0xE0001000
|
||||||
|
#define _DWT_CYCCNT 0xE0001004 // CYCCNT is 32bits, takes 37s or so to wrap.
|
||||||
|
#define _DEM_CR 0xE000EDFC
|
||||||
|
#define _LAR 0xE0001FB0
|
||||||
|
|
||||||
|
// Use hardware cycle counter instead, it's much safer
|
||||||
|
void delay_dwt(uint32_t count) {
|
||||||
|
// Reuse the ASM_CYCLES_PER_ITERATION variable to avoid wasting another useless variable
|
||||||
|
register uint32_t start = HW_REG(_DWT_CYCCNT) - ASM_CYCLES_PER_ITERATION, elapsed;
|
||||||
|
do {
|
||||||
|
elapsed = HW_REG(_DWT_CYCCNT) - start;
|
||||||
|
} while (elapsed < count);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pointer to asm function, calling the functions has a ~20 cycles overhead
|
||||||
|
DelayImpl DelayCycleFnc = delay_asm;
|
||||||
|
|
||||||
|
void calibrate_delay_loop() {
|
||||||
|
// Check if we have a working DWT implementation in the CPU (see https://developer.arm.com/documentation/ddi0439/b/Data-Watchpoint-and-Trace-Unit/DWT-Programmers-Model)
|
||||||
|
if (!HW_REG(_DWT_CTRL)) {
|
||||||
|
// No DWT present, so fallback to plain old ASM nop counting
|
||||||
|
// Unfortunately, we don't exactly know how many iteration it'll take to decrement a counter in a loop
|
||||||
|
// It depends on the CPU architecture, the code current position (flash vs SRAM)
|
||||||
|
// So, instead of wild guessing and making mistake, instead
|
||||||
|
// compute it once for all
|
||||||
|
ASM_CYCLES_PER_ITERATION = 1;
|
||||||
|
// We need to fetch some reference clock before waiting
|
||||||
|
cli();
|
||||||
|
uint32_t start = micros();
|
||||||
|
delay_asm(1000); // On a typical CPU running in MHz, waiting 1000 "unknown cycles" means it'll take between 1ms to 6ms, that's perfectly acceptable
|
||||||
|
uint32_t end = micros();
|
||||||
|
sei();
|
||||||
|
uint32_t expectedCycles = (end - start) * ((F_CPU) / 1000000UL); // Convert microseconds to cycles
|
||||||
|
// Finally compute the right scale
|
||||||
|
ASM_CYCLES_PER_ITERATION = (uint32_t)(expectedCycles / 1000);
|
||||||
|
|
||||||
|
// No DWT present, likely a Cortex M0 so NOP counting is our best bet here
|
||||||
|
DelayCycleFnc = delay_asm;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Enable DWT counter
|
||||||
|
// From https://stackoverflow.com/a/41188674/1469714
|
||||||
|
HW_REG(_DEM_CR) = HW_REG(_DEM_CR) | 0x01000000; // Enable trace
|
||||||
|
#if __CORTEX_M == 7
|
||||||
|
HW_REG(_LAR) = 0xC5ACCE55; // Unlock access to DWT registers, see https://developer.arm.com/documentation/ihi0029/e/ section B2.3.10
|
||||||
|
#endif
|
||||||
|
HW_REG(_DWT_CYCCNT) = 0; // Clear DWT cycle counter
|
||||||
|
HW_REG(_DWT_CTRL) = HW_REG(_DWT_CTRL) | 1; // Enable DWT cycle counter
|
||||||
|
|
||||||
|
// Then calibrate the constant offset from the counter
|
||||||
|
ASM_CYCLES_PER_ITERATION = 0;
|
||||||
|
uint32_t s = HW_REG(_DWT_CYCCNT);
|
||||||
|
uint32_t e = HW_REG(_DWT_CYCCNT); // (e - s) contains the number of cycle required to read the cycle counter
|
||||||
|
delay_dwt(0);
|
||||||
|
uint32_t f = HW_REG(_DWT_CYCCNT); // (f - e) contains the delay to call the delay function + the time to read the cycle counter
|
||||||
|
ASM_CYCLES_PER_ITERATION = (f - e) - (e - s);
|
||||||
|
|
||||||
|
// Use safer DWT function
|
||||||
|
DelayCycleFnc = delay_dwt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if ENABLED(MARLIN_DEV_MODE)
|
||||||
|
void dump_delay_accuracy_check()
|
||||||
|
{
|
||||||
|
auto report_call_time = [](PGM_P const name, const uint32_t cycles, const uint32_t total, const bool do_flush=true) {
|
||||||
|
SERIAL_ECHOPGM("Calling ");
|
||||||
|
serialprintPGM(name);
|
||||||
|
SERIAL_ECHOLNPAIR(" for ", cycles, "cycles took: ", total, "cycles");
|
||||||
|
if (do_flush) SERIAL_FLUSH();
|
||||||
|
};
|
||||||
|
|
||||||
|
uint32_t s, e;
|
||||||
|
|
||||||
|
SERIAL_ECHOLNPAIR("Computed delay calibration value: ", ASM_CYCLES_PER_ITERATION);
|
||||||
|
SERIAL_FLUSH();
|
||||||
|
// Display the results of the calibration above
|
||||||
|
constexpr uint32_t testValues[] = { 1, 5, 10, 20, 50, 100, 150, 200, 350, 500, 750, 1000 };
|
||||||
|
for (auto i : testValues) {
|
||||||
|
s = micros(); DELAY_US(i); e = micros();
|
||||||
|
report_call_time(PSTR("delay"), i, e - s);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (HW_REG(_DWT_CTRL)) {
|
||||||
|
for (auto i : testValues) {
|
||||||
|
s = HW_REG(_DWT_CYCCNT); DELAY_CYCLES(i); e = HW_REG(_DWT_CYCCNT);
|
||||||
|
report_call_time(PSTR("delay"), i, e - s);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Measure the delay to call a real function compared to a function pointer
|
||||||
|
s = HW_REG(_DWT_CYCCNT); delay_dwt(1); e = HW_REG(_DWT_CYCCNT);
|
||||||
|
report_call_time(PSTR("delay_dwt"), 1, e - s);
|
||||||
|
|
||||||
|
static PGMSTR(dcd, "DELAY_CYCLES directly ");
|
||||||
|
|
||||||
|
s = HW_REG(_DWT_CYCCNT); DELAY_CYCLES( 1); e = HW_REG(_DWT_CYCCNT);
|
||||||
|
report_call_time(dcd, 1, e - s, false);
|
||||||
|
|
||||||
|
s = HW_REG(_DWT_CYCCNT); DELAY_CYCLES( 5); e = HW_REG(_DWT_CYCCNT);
|
||||||
|
report_call_time(dcd, 5, e - s, false);
|
||||||
|
|
||||||
|
s = HW_REG(_DWT_CYCCNT); DELAY_CYCLES(10); e = HW_REG(_DWT_CYCCNT);
|
||||||
|
report_call_time(dcd, 10, e - s, false);
|
||||||
|
|
||||||
|
s = HW_REG(_DWT_CYCCNT); DELAY_CYCLES(20); e = HW_REG(_DWT_CYCCNT);
|
||||||
|
report_call_time(dcd, 20, e - s, false);
|
||||||
|
|
||||||
|
s = HW_REG(_DWT_CYCCNT); DELAY_CYCLES(50); e = HW_REG(_DWT_CYCCNT);
|
||||||
|
report_call_time(dcd, 50, e - s, false);
|
||||||
|
|
||||||
|
s = HW_REG(_DWT_CYCCNT); DELAY_CYCLES(100); e = HW_REG(_DWT_CYCCNT);
|
||||||
|
report_call_time(dcd, 100, e - s, false);
|
||||||
|
|
||||||
|
s = HW_REG(_DWT_CYCCNT); DELAY_CYCLES(200); e = HW_REG(_DWT_CYCCNT);
|
||||||
|
report_call_time(dcd, 200, e - s, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // MARLIN_DEV_MODE
|
||||||
|
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
void calibrate_delay_loop() {}
|
||||||
|
#if ENABLED(MARLIN_DEV_MODE)
|
||||||
|
void dump_delay_accuracy_check() {
|
||||||
|
static PGMSTR(none, "N/A on this platform");
|
||||||
|
serialprintPGM(none);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
|
@ -21,6 +21,8 @@
|
||||||
*/
|
*/
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "../../inc/MarlinConfigPre.h"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Busy wait delay cycles routines:
|
* Busy wait delay cycles routines:
|
||||||
*
|
*
|
||||||
|
@ -31,79 +33,68 @@
|
||||||
|
|
||||||
#include "../../core/macros.h"
|
#include "../../core/macros.h"
|
||||||
|
|
||||||
|
void calibrate_delay_loop();
|
||||||
|
|
||||||
#if defined(__arm__) || defined(__thumb__)
|
#if defined(__arm__) || defined(__thumb__)
|
||||||
|
|
||||||
#if __CORTEX_M == 7
|
// We want to have delay_cycle function with the lowest possible overhead, so we adjust at the function at runtime based on the current CPU best feature
|
||||||
|
typedef void (*DelayImpl)(uint32_t);
|
||||||
|
extern DelayImpl DelayCycleFnc;
|
||||||
|
|
||||||
// Cortex-M3 through M7 can use the cycle counter of the DWT unit
|
// I've measured 36 cycles on my system to call the cycle waiting method, but it shouldn't change much to have a bit more margin, it only consume a bit more flash
|
||||||
// https://www.anthonyvh.com/2017/05/18/cortex_m-cycle_counter/
|
#define TRIP_POINT_FOR_CALLING_FUNCTION 40
|
||||||
|
|
||||||
FORCE_INLINE static void enableCycleCounter() {
|
// A simple recursive template class that output exactly one 'nop' of code per recursion
|
||||||
CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk;
|
template <int N> struct NopWriter {
|
||||||
|
FORCE_INLINE static void build() {
|
||||||
#if __CORTEX_M == 7
|
__asm__ __volatile__("nop");
|
||||||
DWT->LAR = 0xC5ACCE55; // Unlock DWT on the M7
|
NopWriter<N-1>::build();
|
||||||
#endif
|
|
||||||
|
|
||||||
DWT->CYCCNT = 0;
|
|
||||||
DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk;
|
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
// End the loop
|
||||||
|
template <> struct NopWriter<0> { FORCE_INLINE static void build() {} };
|
||||||
|
|
||||||
FORCE_INLINE volatile uint32_t getCycleCount() { return DWT->CYCCNT; }
|
namespace Private {
|
||||||
|
// Split recursing template in 2 different class so we don't reach the maximum template instantiation depth limit
|
||||||
FORCE_INLINE static void DELAY_CYCLES(const uint32_t x) {
|
template <bool belowTP, int N> struct Helper {
|
||||||
const uint32_t endCycles = getCycleCount() + x;
|
FORCE_INLINE static void build() {
|
||||||
while (PENDING(getCycleCount(), endCycles)) {}
|
DelayCycleFnc(N - 2); // Approximative cost of calling the function (might be off by one or 2 cycles)
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
// https://blueprints.launchpad.net/gcc-arm-embedded/+spec/delay-cycles
|
|
||||||
|
|
||||||
#define nop() __asm__ __volatile__("nop;\n\t":::)
|
|
||||||
|
|
||||||
FORCE_INLINE static void __delay_4cycles(uint32_t cy) { // +1 cycle
|
|
||||||
#if ARCH_PIPELINE_RELOAD_CYCLES < 2
|
|
||||||
#define EXTRA_NOP_CYCLES A("nop")
|
|
||||||
#else
|
|
||||||
#define EXTRA_NOP_CYCLES ""
|
|
||||||
#endif
|
|
||||||
|
|
||||||
__asm__ __volatile__(
|
|
||||||
A(".syntax unified") // is to prevent CM0,CM1 non-unified syntax
|
|
||||||
L("1")
|
|
||||||
A("subs %[cnt],#1")
|
|
||||||
EXTRA_NOP_CYCLES
|
|
||||||
A("bne 1b")
|
|
||||||
: [cnt]"+r"(cy) // output: +r means input+output
|
|
||||||
: // input:
|
|
||||||
: "cc" // clobbers:
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Delay in cycles
|
|
||||||
FORCE_INLINE static void DELAY_CYCLES(uint32_t x) {
|
|
||||||
|
|
||||||
if (__builtin_constant_p(x)) {
|
|
||||||
#define MAXNOPS 4
|
|
||||||
|
|
||||||
if (x <= (MAXNOPS)) {
|
|
||||||
switch (x) { case 4: nop(); case 3: nop(); case 2: nop(); case 1: nop(); }
|
|
||||||
}
|
|
||||||
else { // because of +1 cycle inside delay_4cycles
|
|
||||||
const uint32_t rem = (x - 1) % (MAXNOPS);
|
|
||||||
switch (rem) { case 3: nop(); case 2: nop(); case 1: nop(); }
|
|
||||||
if ((x = (x - 1) / (MAXNOPS)))
|
|
||||||
__delay_4cycles(x); // if need more then 4 nop loop is more optimal
|
|
||||||
}
|
|
||||||
#undef MAXNOPS
|
|
||||||
}
|
}
|
||||||
else if ((x >>= 2))
|
};
|
||||||
__delay_4cycles(x);
|
|
||||||
}
|
|
||||||
#undef nop
|
|
||||||
|
|
||||||
#endif
|
template <int N> struct Helper<true, N> {
|
||||||
|
FORCE_INLINE static void build() {
|
||||||
|
NopWriter<N - 1>::build();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <> struct Helper<true, 0> {
|
||||||
|
FORCE_INLINE static void build() {}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
// Select a behavior based on the constexpr'ness of the parameter
|
||||||
|
// If called with a compile-time parameter, then write as many NOP as required to reach the asked cycle count
|
||||||
|
// (there is some tripping point here to start looping when it's more profitable than gruntly executing NOPs)
|
||||||
|
// If not called from a compile-time parameter, fallback to a runtime loop counting version instead
|
||||||
|
template <bool compileTime, int Cycles>
|
||||||
|
struct SmartDelay {
|
||||||
|
FORCE_INLINE SmartDelay(int) {
|
||||||
|
if (Cycles == 0) return;
|
||||||
|
Private::Helper<Cycles < TRIP_POINT_FOR_CALLING_FUNCTION, Cycles>::build();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// Runtime version below. There is no way this would run under less than ~TRIP_POINT_FOR_CALLING_FUNCTION cycles
|
||||||
|
template <int T>
|
||||||
|
struct SmartDelay<false, T> {
|
||||||
|
FORCE_INLINE SmartDelay(int v) { DelayCycleFnc(v); }
|
||||||
|
};
|
||||||
|
|
||||||
|
#define DELAY_CYCLES(X) do { SmartDelay<IS_CONSTEXPR(X), IS_CONSTEXPR(X) ? X : 0> _smrtdly_X(X); } while(0)
|
||||||
|
|
||||||
|
// For delay in microseconds, no smart delay selection is required, directly call the delay function
|
||||||
|
// Teensy compiler is too old and does not accept smart delay compile-time / run-time selection correctly
|
||||||
|
#define DELAY_US(x) DelayCycleFnc((x) * ((F_CPU) / 1000000UL))
|
||||||
|
|
||||||
#elif defined(__AVR__)
|
#elif defined(__AVR__)
|
||||||
|
|
||||||
|
@ -144,10 +135,15 @@
|
||||||
}
|
}
|
||||||
#undef nop
|
#undef nop
|
||||||
|
|
||||||
|
// Delay in microseconds
|
||||||
|
#define DELAY_US(x) DELAY_CYCLES((x) * ((F_CPU) / 1000000UL))
|
||||||
|
|
||||||
#elif defined(__PLAT_LINUX__) || defined(ESP32)
|
#elif defined(__PLAT_LINUX__) || defined(ESP32)
|
||||||
|
|
||||||
// specified inside platform
|
// DELAY_CYCLES specified inside platform
|
||||||
|
|
||||||
|
// Delay in microseconds
|
||||||
|
#define DELAY_US(x) DELAY_CYCLES((x) * ((F_CPU) / 1000000UL))
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#error "Unsupported MCU architecture"
|
#error "Unsupported MCU architecture"
|
||||||
|
@ -157,5 +153,5 @@
|
||||||
// Delay in nanoseconds
|
// Delay in nanoseconds
|
||||||
#define DELAY_NS(x) DELAY_CYCLES((x) * ((F_CPU) / 1000000UL) / 1000UL)
|
#define DELAY_NS(x) DELAY_CYCLES((x) * ((F_CPU) / 1000000UL) / 1000UL)
|
||||||
|
|
||||||
// Delay in microseconds
|
|
||||||
#define DELAY_US(x) DELAY_CYCLES((x) * ((F_CPU) / 1000000UL))
|
|
||||||
|
|
|
@ -1005,6 +1005,9 @@ void setup() {
|
||||||
SERIAL_ECHO_MSG("Compiled: " __DATE__);
|
SERIAL_ECHO_MSG("Compiled: " __DATE__);
|
||||||
SERIAL_ECHO_MSG(STR_FREE_MEMORY, freeMemory(), STR_PLANNER_BUFFER_BYTES, (int)sizeof(block_t) * (BLOCK_BUFFER_SIZE));
|
SERIAL_ECHO_MSG(STR_FREE_MEMORY, freeMemory(), STR_PLANNER_BUFFER_BYTES, (int)sizeof(block_t) * (BLOCK_BUFFER_SIZE));
|
||||||
|
|
||||||
|
// Some HAL need precise delay adjustment
|
||||||
|
calibrate_delay_loop();
|
||||||
|
|
||||||
// Init buzzer pin(s)
|
// Init buzzer pin(s)
|
||||||
#if USE_BEEPER
|
#if USE_BEEPER
|
||||||
SETUP_RUN(buzzer.init());
|
SETUP_RUN(buzzer.init());
|
||||||
|
|
|
@ -61,6 +61,8 @@
|
||||||
#define _O2 __attribute__((optimize("O2")))
|
#define _O2 __attribute__((optimize("O2")))
|
||||||
#define _O3 __attribute__((optimize("O3")))
|
#define _O3 __attribute__((optimize("O3")))
|
||||||
|
|
||||||
|
#define IS_CONSTEXPR(...) __builtin_constant_p(__VA_ARGS__) // Only valid solution with C++14. Should use std::is_constant_evaluated() in C++20 instead
|
||||||
|
|
||||||
#ifndef UNUSED
|
#ifndef UNUSED
|
||||||
#define UNUSED(x) ((void)(x))
|
#define UNUSED(x) ((void)(x))
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -30,6 +30,8 @@
|
||||||
#include "../HAL/shared/eeprom_if.h"
|
#include "../HAL/shared/eeprom_if.h"
|
||||||
#include "../HAL/shared/Delay.h"
|
#include "../HAL/shared/Delay.h"
|
||||||
|
|
||||||
|
extern void dump_delay_accuracy_check();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Dn: G-code for development and testing
|
* Dn: G-code for development and testing
|
||||||
*
|
*
|
||||||
|
@ -141,7 +143,7 @@
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case 5: { // D4 Read / Write onboard Flash
|
case 5: { // D5 Read / Write onboard Flash
|
||||||
#define FLASH_SIZE 1024
|
#define FLASH_SIZE 1024
|
||||||
uint8_t *pointer = parser.hex_adr_val('A');
|
uint8_t *pointer = parser.hex_adr_val('A');
|
||||||
uint16_t len = parser.ushortval('C', 1);
|
uint16_t len = parser.ushortval('C', 1);
|
||||||
|
@ -162,6 +164,10 @@
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
|
case 6: // D6 Check delay loop accuracy
|
||||||
|
dump_delay_accuracy_check();
|
||||||
|
break;
|
||||||
|
|
||||||
case 100: { // D100 Disable heaters and attempt a hard hang (Watchdog Test)
|
case 100: { // D100 Disable heaters and attempt a hard hang (Watchdog Test)
|
||||||
SERIAL_ECHOLNPGM("Disabling heaters and attempting to trigger Watchdog");
|
SERIAL_ECHOLNPGM("Disabling heaters and attempting to trigger Watchdog");
|
||||||
SERIAL_ECHOLNPGM("(USE_WATCHDOG " TERN(USE_WATCHDOG, "ENABLED", "DISABLED") ")");
|
SERIAL_ECHOLNPGM("(USE_WATCHDOG " TERN(USE_WATCHDOG, "ENABLED", "DISABLED") ")");
|
||||||
|
|
Loading…
Reference in a new issue