libm: optimistic pow calling.
Call optimized pow optimistically and revert to full range
implementation if we detect an out-of-range input.
Change-Id: I6f3aa734adbf99484b7ff70736ef83a41e5815b8
(cherry picked from commit e7fd29685a75f1d26061ac511e7a29479fd871b5)
(cherry picked from commit 7916899916973ca5695526ae45283e54709df32a)
(cherry picked from commit d39a7f744c78409949482e3cb3c9710e8f281a33)
diff --git a/libm/arm/e_pow.S b/libm/arm/e_pow.S
index 0e922db..0e6240d 100644
--- a/libm/arm/e_pow.S
+++ b/libm/arm/e_pow.S
@@ -33,6 +33,7 @@
#define int_1 d29
#define double_1 d28
@ sign and 2^int_n fixup:
+#define maxrange r12
#define expadjustment d7
#define literals r10
@ Values which exist within both polynomial implementations:
@@ -99,13 +100,18 @@
#define vmov_f32 fconsts
#define vmovne_f64 fconstdne
-ENTRY(pow_neon)
+#define KRAIT_NO_AAPCS_VFP_MODE
+
+ENTRY(pow)
#if defined(KRAIT_NO_AAPCS_VFP_MODE)
@ ARM ABI has inputs coming in via r registers, lets move to a d register
vmov x, x_lw, x_hw
#endif
push {r4, r5, r6, r7, r8, r9, r10, lr}
+ movw maxrange, #0x0000
+ movt maxrange, #0x4010
+
@ pre-staged bp values
vldr bpa, .LbpA
vldr bpb, .LbpB
@@ -175,6 +181,11 @@
.Lxle2to3over5:
+ cmp x_hw, maxrange
+ cmpls y_hw, maxrange
+ movt maxrange, #0x3f00
+ cmpls maxrange, x_hw
+
@ load log2 polynomial series constants
vldm literals!, {k4, k3, k2, k1}
@@ -186,6 +197,8 @@
vsub.f64 u, ss, bp
vadd.f64 v, ss, bp
+ bhi .LuseFullImpl
+
@ s = (x-1)/(x+1)
vdiv.f64 ss, u, v
@@ -392,6 +405,10 @@
@ leave directly returning whatever is in Return_lw and Return_hw
pop {r4, r5, r6, r7, r8, r9, r10, pc}
+.LuseFullImpl:
+ pop {r4, r5, r6, r7, r8, r9, r10, lr}
+ b __full_ieee754_pow
+
.align 6
.LliteralTable:
@ Least-sqares tuned constants for 11th order (log2((1+s)/(1-s)):
@@ -439,4 +456,4 @@
.Ltwoto1o4: @ 2^1/4
.long 0x0a31b715, 0x3ff306fe
-END(pow_neon)
+END(pow)
diff --git a/libm/upstream-freebsd/lib/msun/src/e_pow.c b/libm/upstream-freebsd/lib/msun/src/e_pow.c
index 10a0cf0..917e80b 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_pow.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_pow.c
@@ -60,14 +60,6 @@
#include "math.h"
#include "math_private.h"
-#if defined(KRAIT_NEON_OPTIMIZATION) || defined(SPARROW_NEON_OPTIMIZATION)
-#if defined(KRAIT_NO_AAPCS_VFP_MODE)
-double pow_neon(double x, double y);
-#else
-double pow_neon(double x, double y, int32_t lx, int32_t hx) __attribute__((pcs("aapcs-vfp")));
-#endif
-#endif
-
static const double
bp[] = {1.0, 1.5,},
dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
@@ -102,7 +94,11 @@
ivln2_l = 1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
double
+#if defined(KRAIT_NEON_OPTIMIZATION) || defined(SPARROW_NEON_OPTIMIZATION)
+__full_ieee754_pow(double x, double y)
+#else
__ieee754_pow(double x, double y)
+#endif
{
double z,ax,z_h,z_l,p_h,p_l;
double y1,t1,t2,r,s,t,u,v,w;
@@ -228,14 +224,6 @@
t1 = u+v;
SET_LOW_WORD(t1,0);
t2 = v-(t1-u);
-#if defined(KRAIT_NEON_OPTIMIZATION)
- } else if (ix <= 0x40100000 && iy <= 0x40100000 && hy > 0 && hx > 0) {
-#if defined(KRAIT_NO_AAPCS_VFP_MODE)
- return pow_neon(x,y);
-#else
- return pow_neon(x,y,lx,hx);
-#endif
-#endif
} else {
double ss,s2,s_h,s_l,t_h,t_l;
n = 0;
diff --git a/libm/upstream-freebsd/lib/msun/src/math_private.h b/libm/upstream-freebsd/lib/msun/src/math_private.h
index a28344e..a079cc0 100644
--- a/libm/upstream-freebsd/lib/msun/src/math_private.h
+++ b/libm/upstream-freebsd/lib/msun/src/math_private.h
@@ -725,6 +725,7 @@
#if defined(KRAIT_NEON_OPTIMIZATION)
int __kernel_rem_pio2(double*,double*,int,int,int) __attribute__((pcs("aapcs-vfp")));
+double __full_ieee754_pow(double,double);
#ifndef INLINE_REM_PIO2
int __ieee754_rem_pio2(double,double*) __attribute__((pcs("aapcs-vfp")));
#endif