Improve JD acosx implementation (#17817)

This commit is contained in:
Štěpán Dalecký 2020-05-03 06:59:09 +02:00 committed by GitHub
parent 9a4a768ee3
commit 0c68794fa9
Signed by: GitHub
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 93 additions and 22 deletions

View file

@ -2304,28 +2304,87 @@ bool Planner::_populate_block(block_t * const block, bool split_move,
const float junction_acceleration = limit_value_by_axis_maximum(block->acceleration, junction_unit_vec),
sin_theta_d2 = SQRT(0.5f * (1.0f - junction_cos_theta)); // Trig half angle identity. Always positive.
vmax_junction_sqr = (junction_acceleration * junction_deviation_mm * sin_theta_d2) / (1.0f - sin_theta_d2);
vmax_junction_sqr = JUNC_SQ(junction_acceleration, sin_theta_d2);
if (block->millimeters < 1) {
// Fast acos approximation (max. error +-0.033 rads)
// Based on MinMax polynomial published by W. Randolph Franklin, see
// https://wrf.ecse.rpi.edu/Research/Short_Notes/arcsin/onlyelem.html
// (acos(x) = pi / 2 - asin(x))
const float neg = junction_cos_theta < 0 ? -1 : 1,
t = neg * junction_cos_theta,
asinx = 0.032843707f
+ t * (-1.451838349f
+ t * ( 29.66153956f
+ t * (-131.1123477f
+ t * ( 262.8130562f
+ t * (-242.7199627f + t * 84.31466202f) )))),
junction_theta = RADIANS(90) - neg * asinx;
t = neg * junction_cos_theta;
// If angle is greater than 135 degrees (octagon), find speed for approximate arc
if (junction_theta > RADIANS(135)) {
// NOTE: MinMax acos approximation and thereby also junction_theta top out at pi-0.033, which avoids division by 0
const float limit_sqr = block->millimeters / (RADIANS(180) - junction_theta) * junction_acceleration;
if (t < -0.7071067812f) {
#if ENABLED(JD_USE_MATH_ACOS)
#error "TODO: Inline maths with the MCU / FPU."
#elif ENABLED(JD_USE_LOOKUP_TABLE)
// Fast acos approximation (max. error +-0.01 rads)
// Based on LUT table and linear interpolation
/**
* // Generate the JD Lookup Table
* constexpr float c = 1.00751317f; // Correction factor to center error around 0
* for (int i = 0; i < jd_lut_count - 1; ++i) {
* const float x0 = (sq(i) - 1) / sq(i),
* y0 = acos(x0) * (i ? c : 1),
* x1 = 0.5 * x0 + 0.5,
* y1 = acos(x1) * c;
* jd_lut_k[i] = (y0 - y1) / (x0 - x1);
* jd_lut_b[i] = (y1 * x0 - y0 * x1) / (x0 - x1);
* }
* jd_lut_k[jd_lut_count - 1] = jd_lut_b[jd_lut_count - 1] = 0;
*
* // Compute correction factor (Set c to 1.0f first!)
* float min = INFINITY, max = -min;
* for (float t = 0; t <= 1; t += 0.0003f) {
* const float e = acos(t) / approx(t);
* if (isfinite(e)) {
* NOMORE(min, e);
* NOLESS(max, e);
* }
* }
* fprintf(stderr, "%.9gf, ", (min + max) / 2);
*/
static constexpr int16_t jd_lut_count = 15;
static constexpr uint16_t jd_lut_tll = 1 << jd_lut_count;
static constexpr int16_t jd_lut_tll0 = __builtin_clz(jd_lut_tll) + 1; // i.e., 16 - jd_lut_count
static constexpr float jd_lut_k[jd_lut_count] PROGMEM = {
-1.03146219f, -1.30760407f, -1.75205469f, -2.41705418f, -3.37768555f,
-4.74888229f, -6.69648552f, -9.45659828f, -13.3640289f, -18.8927879f,
-26.7136307f, -37.7754059f, -53.4200745f, -75.5457306f, 0.0f };
static constexpr float jd_lut_b[jd_lut_count] PROGMEM = {
1.57079637f, 1.70886743f, 2.04220533f, 2.62408018f, 3.52467203f,
4.85301876f, 6.77019119f, 9.50873947f, 13.4009094f, 18.9188652f,
26.7320709f, 37.7884521f, 53.4292908f, 75.5522461f, 0.0f };
const int16_t idx = (t == 0.0f) ? 0 : __builtin_clz(int16_t((1.0f - t) * jd_lut_tll)) - jd_lut_tll0;
float junction_theta = t * pgm_read_float(&jd_lut_k[idx]) + pgm_read_float(&jd_lut_b[idx]);
if (neg > 0) junction_theta = RADIANS(180) - junction_theta;
#else
// Fast acos(-t) approximation (max. error +-0.033rad = 1.89°)
// Based on MinMax polynomial published by W. Randolph Franklin, see
// https://wrf.ecse.rpi.edu/Research/Short_Notes/arcsin/onlyelem.html
// acos( t) = pi / 2 - asin(x)
// acos(-t) = pi - acos(t) ... pi / 2 + asin(x)
const float asinx = 0.032843707f
+ t * (-1.451838349f
+ t * ( 29.66153956f
+ t * (-131.1123477f
+ t * ( 262.8130562f
+ t * (-242.7199627f
+ t * ( 84.31466202f ) ))))),
junction_theta = RADIANS(90) + neg * asinx; // acos(-t)
// NOTE: junction_theta bottoms out at 0.033 which avoids divide by 0.
#endif
const float limit_sqr = (block->millimeters * junction_acceleration) / junction_theta;
NOMORE(vmax_junction_sqr, limit_sqr);
}
}
@ -2363,11 +2422,10 @@ bool Planner::_populate_block(block_t * const block, bool split_move,
// Start with a safe speed (from which the machine may halt to stop immediately).
float safe_speed = nominal_speed;
#ifdef TRAVEL_EXTRA_XYJERK
const float extra_xyjerk = (de <= 0) ? TRAVEL_EXTRA_XYJERK : 0;
#else
constexpr float extra_xyjerk = 0;
#ifndef TRAVEL_EXTRA_XYJERK
#define TRAVEL_EXTRA_XYJERK 0
#endif
const float extra_xyjerk = (de <= 0) ? TRAVEL_EXTRA_XYJERK : 0;
uint8_t limited = 0;
TERN(HAS_LINEAR_E_JERK, LOOP_XYZ, LOOP_XYZE)(i) {

View file

@ -32,6 +32,17 @@
#include "../MarlinCore.h"
#if HAS_JUNCTION_DEVIATION
// Enable this option for perfect accuracy but maximum
// computation. Should be fine on ARM processors.
//#define JD_USE_MATH_ACOS
// Disable this option to save 120 bytes of PROGMEM,
// but incur increased computation and a reduction
// in accuracy.
#define JD_USE_LOOKUP_TABLE
#endif
#include "motion.h"
#include "../gcode/queue.h"
@ -827,9 +838,11 @@ class Planner {
static void autotemp_update();
#endif
#define JUNC_SQ(N,ST) (junction_deviation_mm * (N) * (ST) / (1.0f - (ST)))
#if HAS_LINEAR_E_JERK
FORCE_INLINE static void recalculate_max_e_jerk() {
#define GET_MAX_E_JERK(N) SQRT(SQRT(0.5) * junction_deviation_mm * (N) * RECIPROCAL(1.0 - SQRT(0.5)))
#define GET_MAX_E_JERK(N) SQRT(JUNC_SQ(N,SQRT(0.5)))
#if ENABLED(DISTINCT_E_FACTORS)
LOOP_L_N(i, EXTRUDERS)
max_e_jerk[i] = GET_MAX_E_JERK(settings.max_acceleration_mm_per_s2[E_AXIS_N(i)]);