From 0c68794fa920838c289373314c9e1b172bbfa676 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0t=C4=9Bp=C3=A1n=20Daleck=C3=BD?= <36531759+daleckystepan@users.noreply.github.com> Date: Sun, 3 May 2020 06:59:09 +0200 Subject: [PATCH] Improve JD acosx implementation (#17817) --- Marlin/src/module/planner.cpp | 100 +++++++++++++++++++++++++++------- Marlin/src/module/planner.h | 15 ++++- 2 files changed, 93 insertions(+), 22 deletions(-) diff --git a/Marlin/src/module/planner.cpp b/Marlin/src/module/planner.cpp index 117a8b6fd..54714715f 100644 --- a/Marlin/src/module/planner.cpp +++ b/Marlin/src/module/planner.cpp @@ -2304,28 +2304,87 @@ bool Planner::_populate_block(block_t * const block, bool split_move, const float junction_acceleration = limit_value_by_axis_maximum(block->acceleration, junction_unit_vec), sin_theta_d2 = SQRT(0.5f * (1.0f - junction_cos_theta)); // Trig half angle identity. Always positive. - vmax_junction_sqr = (junction_acceleration * junction_deviation_mm * sin_theta_d2) / (1.0f - sin_theta_d2); + vmax_junction_sqr = JUNC_SQ(junction_acceleration, sin_theta_d2); if (block->millimeters < 1) { - // Fast acos approximation (max. error +-0.033 rads) - // Based on MinMax polynomial published by W. Randolph Franklin, see - // https://wrf.ecse.rpi.edu/Research/Short_Notes/arcsin/onlyelem.html - // (acos(x) = pi / 2 - asin(x)) - const float neg = junction_cos_theta < 0 ? -1 : 1, - t = neg * junction_cos_theta, - asinx = 0.032843707f - + t * (-1.451838349f - + t * ( 29.66153956f - + t * (-131.1123477f - + t * ( 262.8130562f - + t * (-242.7199627f + t * 84.31466202f) )))), - junction_theta = RADIANS(90) - neg * asinx; + t = neg * junction_cos_theta; // If angle is greater than 135 degrees (octagon), find speed for approximate arc - if (junction_theta > RADIANS(135)) { - // NOTE: MinMax acos approximation and thereby also junction_theta top out at pi-0.033, which avoids division by 0 - const float limit_sqr = block->millimeters / (RADIANS(180) - junction_theta) * junction_acceleration; + if (t < -0.7071067812f) { + + #if ENABLED(JD_USE_MATH_ACOS) + + #error "TODO: Inline maths with the MCU / FPU." + + #elif ENABLED(JD_USE_LOOKUP_TABLE) + + // Fast acos approximation (max. error +-0.01 rads) + // Based on LUT table and linear interpolation + + /** + * // Generate the JD Lookup Table + * constexpr float c = 1.00751317f; // Correction factor to center error around 0 + * for (int i = 0; i < jd_lut_count - 1; ++i) { + * const float x0 = (sq(i) - 1) / sq(i), + * y0 = acos(x0) * (i ? c : 1), + * x1 = 0.5 * x0 + 0.5, + * y1 = acos(x1) * c; + * jd_lut_k[i] = (y0 - y1) / (x0 - x1); + * jd_lut_b[i] = (y1 * x0 - y0 * x1) / (x0 - x1); + * } + * jd_lut_k[jd_lut_count - 1] = jd_lut_b[jd_lut_count - 1] = 0; + * + * // Compute correction factor (Set c to 1.0f first!) + * float min = INFINITY, max = -min; + * for (float t = 0; t <= 1; t += 0.0003f) { + * const float e = acos(t) / approx(t); + * if (isfinite(e)) { + * NOMORE(min, e); + * NOLESS(max, e); + * } + * } + * fprintf(stderr, "%.9gf, ", (min + max) / 2); + */ + static constexpr int16_t jd_lut_count = 15; + static constexpr uint16_t jd_lut_tll = 1 << jd_lut_count; + static constexpr int16_t jd_lut_tll0 = __builtin_clz(jd_lut_tll) + 1; // i.e., 16 - jd_lut_count + static constexpr float jd_lut_k[jd_lut_count] PROGMEM = { + -1.03146219f, -1.30760407f, -1.75205469f, -2.41705418f, -3.37768555f, + -4.74888229f, -6.69648552f, -9.45659828f, -13.3640289f, -18.8927879f, + -26.7136307f, -37.7754059f, -53.4200745f, -75.5457306f, 0.0f }; + static constexpr float jd_lut_b[jd_lut_count] PROGMEM = { + 1.57079637f, 1.70886743f, 2.04220533f, 2.62408018f, 3.52467203f, + 4.85301876f, 6.77019119f, 9.50873947f, 13.4009094f, 18.9188652f, + 26.7320709f, 37.7884521f, 53.4292908f, 75.5522461f, 0.0f }; + + const int16_t idx = (t == 0.0f) ? 0 : __builtin_clz(int16_t((1.0f - t) * jd_lut_tll)) - jd_lut_tll0; + + float junction_theta = t * pgm_read_float(&jd_lut_k[idx]) + pgm_read_float(&jd_lut_b[idx]); + if (neg > 0) junction_theta = RADIANS(180) - junction_theta; + + #else + + // Fast acos(-t) approximation (max. error +-0.033rad = 1.89°) + // Based on MinMax polynomial published by W. Randolph Franklin, see + // https://wrf.ecse.rpi.edu/Research/Short_Notes/arcsin/onlyelem.html + // acos( t) = pi / 2 - asin(x) + // acos(-t) = pi - acos(t) ... pi / 2 + asin(x) + + const float asinx = 0.032843707f + + t * (-1.451838349f + + t * ( 29.66153956f + + t * (-131.1123477f + + t * ( 262.8130562f + + t * (-242.7199627f + + t * ( 84.31466202f ) ))))), + junction_theta = RADIANS(90) + neg * asinx; // acos(-t) + + // NOTE: junction_theta bottoms out at 0.033 which avoids divide by 0. + + #endif + + const float limit_sqr = (block->millimeters * junction_acceleration) / junction_theta; NOMORE(vmax_junction_sqr, limit_sqr); } } @@ -2363,11 +2422,10 @@ bool Planner::_populate_block(block_t * const block, bool split_move, // Start with a safe speed (from which the machine may halt to stop immediately). float safe_speed = nominal_speed; - #ifdef TRAVEL_EXTRA_XYJERK - const float extra_xyjerk = (de <= 0) ? TRAVEL_EXTRA_XYJERK : 0; - #else - constexpr float extra_xyjerk = 0; + #ifndef TRAVEL_EXTRA_XYJERK + #define TRAVEL_EXTRA_XYJERK 0 #endif + const float extra_xyjerk = (de <= 0) ? TRAVEL_EXTRA_XYJERK : 0; uint8_t limited = 0; TERN(HAS_LINEAR_E_JERK, LOOP_XYZ, LOOP_XYZE)(i) { diff --git a/Marlin/src/module/planner.h b/Marlin/src/module/planner.h index 7b029cba0..ccc7faf70 100644 --- a/Marlin/src/module/planner.h +++ b/Marlin/src/module/planner.h @@ -32,6 +32,17 @@ #include "../MarlinCore.h" +#if HAS_JUNCTION_DEVIATION + // Enable this option for perfect accuracy but maximum + // computation. Should be fine on ARM processors. + //#define JD_USE_MATH_ACOS + + // Disable this option to save 120 bytes of PROGMEM, + // but incur increased computation and a reduction + // in accuracy. + #define JD_USE_LOOKUP_TABLE +#endif + #include "motion.h" #include "../gcode/queue.h" @@ -827,9 +838,11 @@ class Planner { static void autotemp_update(); #endif + #define JUNC_SQ(N,ST) (junction_deviation_mm * (N) * (ST) / (1.0f - (ST))) + #if HAS_LINEAR_E_JERK FORCE_INLINE static void recalculate_max_e_jerk() { - #define GET_MAX_E_JERK(N) SQRT(SQRT(0.5) * junction_deviation_mm * (N) * RECIPROCAL(1.0 - SQRT(0.5))) + #define GET_MAX_E_JERK(N) SQRT(JUNC_SQ(N,SQRT(0.5))) #if ENABLED(DISTINCT_E_FACTORS) LOOP_L_N(i, EXTRUDERS) max_e_jerk[i] = GET_MAX_E_JERK(settings.max_acceleration_mm_per_s2[E_AXIS_N(i)]);