libm: optimistic pow calling.

Call optimized pow optimistically and revert to full range
implementation if we detect an out-of-range input.

Change-Id: I6f3aa734adbf99484b7ff70736ef83a41e5815b8
(cherry picked from commit e7fd29685a75f1d26061ac511e7a29479fd871b5)
(cherry picked from commit 7916899916973ca5695526ae45283e54709df32a)
(cherry picked from commit d39a7f744c78409949482e3cb3c9710e8f281a33)
diff --git a/libm/arm/e_pow.S b/libm/arm/e_pow.S
index 0e922db..0e6240d 100644
--- a/libm/arm/e_pow.S
+++ b/libm/arm/e_pow.S
@@ -33,6 +33,7 @@
 #define int_1               d29
 #define double_1            d28
 @ sign and 2^int_n fixup:
+#define maxrange            r12
 #define expadjustment       d7
 #define literals            r10
 @ Values which exist within both polynomial implementations:
@@ -99,13 +100,18 @@
 #define vmov_f32            fconsts
 #define vmovne_f64          fconstdne
 
-ENTRY(pow_neon)
+#define KRAIT_NO_AAPCS_VFP_MODE
+
+ENTRY(pow)
 #if defined(KRAIT_NO_AAPCS_VFP_MODE)
      @ ARM ABI has inputs coming in via r registers, lets move to a d register
     vmov            x, x_lw, x_hw
 #endif
     push            {r4, r5, r6, r7, r8, r9, r10, lr}
 
+    movw            maxrange, #0x0000
+    movt            maxrange, #0x4010
+
     @ pre-staged bp values
     vldr            bpa, .LbpA
     vldr            bpb, .LbpB
@@ -175,6 +181,11 @@
 
 .Lxle2to3over5:
 
+    cmp             x_hw, maxrange
+    cmpls           y_hw, maxrange
+    movt            maxrange, #0x3f00
+    cmpls           maxrange, x_hw
+
     @ load log2 polynomial series constants
     vldm            literals!, {k4, k3, k2, k1}
 
@@ -186,6 +197,8 @@
     vsub.f64        u, ss, bp
     vadd.f64        v, ss, bp
 
+    bhi             .LuseFullImpl
+
     @ s = (x-1)/(x+1)
     vdiv.f64        ss, u, v
 
@@ -392,6 +405,10 @@
     @ leave directly returning whatever is in Return_lw and Return_hw
     pop             {r4, r5, r6, r7, r8, r9, r10, pc}
 
+.LuseFullImpl:
+    pop             {r4, r5, r6, r7, r8, r9, r10, lr}
+    b               __full_ieee754_pow
+
 .align 6
 .LliteralTable:
 @ Least-sqares tuned constants for 11th order (log2((1+s)/(1-s)):
@@ -439,4 +456,4 @@
 
 .Ltwoto1o4: @ 2^1/4
     .long       0x0a31b715, 0x3ff306fe
-END(pow_neon)
+END(pow)
diff --git a/libm/upstream-freebsd/lib/msun/src/e_pow.c b/libm/upstream-freebsd/lib/msun/src/e_pow.c
index 10a0cf0..917e80b 100644
--- a/libm/upstream-freebsd/lib/msun/src/e_pow.c
+++ b/libm/upstream-freebsd/lib/msun/src/e_pow.c
@@ -60,14 +60,6 @@
 #include "math.h"
 #include "math_private.h"
 
-#if defined(KRAIT_NEON_OPTIMIZATION) || defined(SPARROW_NEON_OPTIMIZATION)
-#if defined(KRAIT_NO_AAPCS_VFP_MODE)
-double pow_neon(double x, double y);
-#else
-double pow_neon(double x, double y, int32_t lx, int32_t hx) __attribute__((pcs("aapcs-vfp")));
-#endif
-#endif
-
 static const double
 bp[] = {1.0, 1.5,},
 dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
@@ -102,7 +94,11 @@
 ivln2_l  =  1.92596299112661746887e-08; /* 0x3E54AE0B, 0xF85DDF44 =1/ln2 tail*/
 
 double
+#if defined(KRAIT_NEON_OPTIMIZATION) || defined(SPARROW_NEON_OPTIMIZATION)
+__full_ieee754_pow(double x, double y)
+#else
 __ieee754_pow(double x, double y)
+#endif
 {
 	double z,ax,z_h,z_l,p_h,p_l;
 	double y1,t1,t2,r,s,t,u,v,w;
@@ -228,14 +224,6 @@
 	    t1 = u+v;
 	    SET_LOW_WORD(t1,0);
 	    t2 = v-(t1-u);
-#if defined(KRAIT_NEON_OPTIMIZATION)
-	} else if (ix <= 0x40100000 && iy <= 0x40100000 && hy > 0 && hx > 0) {
-#if defined(KRAIT_NO_AAPCS_VFP_MODE)
-		return pow_neon(x,y);
-#else
-		return pow_neon(x,y,lx,hx);
-#endif
-#endif
 	} else {
 	    double ss,s2,s_h,s_l,t_h,t_l;
 	    n = 0;
diff --git a/libm/upstream-freebsd/lib/msun/src/math_private.h b/libm/upstream-freebsd/lib/msun/src/math_private.h
index a28344e..a079cc0 100644
--- a/libm/upstream-freebsd/lib/msun/src/math_private.h
+++ b/libm/upstream-freebsd/lib/msun/src/math_private.h
@@ -725,6 +725,7 @@
 
 #if defined(KRAIT_NEON_OPTIMIZATION)
 int	__kernel_rem_pio2(double*,double*,int,int,int) __attribute__((pcs("aapcs-vfp")));
+double	__full_ieee754_pow(double,double);
 #ifndef INLINE_REM_PIO2
 int	__ieee754_rem_pio2(double,double*) __attribute__((pcs("aapcs-vfp")));
 #endif