/* Copyright (c) 2010 CodeSourcery, Inc.
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of CodeSourcery nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY CODESOURCERY, INC. ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL CODESOURCERY BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifdef __NO_FPRS__
#error Need a hard-float machine.
#endif

typedef union {
  unsigned int word;
  float value;
} ieee_float_shape_type;

typedef union {
  struct {
#if defined(__BIG_ENDIAN__)
    unsigned int msw;
    unsigned int lsw;
#elif defined(__LITTLE_ENDIAN__)
    unsigned int lsw;
    unsigned int msw;
#else
#error Must define endianness appropriately.
#endif
  } parts;
  double value;
} ieee_double_shape_type;

static const ieee_float_shape_type a_nan = {.word = 0x7fc00000 };
static const ieee_float_shape_type a_inf = {.word = 0x7f800000 };
static const float threehalf = 1.5;
static const float one = 1.0;
static const float zero = 0.0;

#define f_washf(x)                              \
  ({ float f;                                   \
  asm volatile ("fmuls %[r],%[a],%[b]"          \
                : [r] "=f" (f)                  \
                : [a] "f" (x), [b] "f" (one));  \
  f; })

/* The method is based on the descriptions in:

   _The Handbook of Floating-Point Arithmetic_ by Muller et al., chapter 5;
   _IA-64 and Elementary Functions: Speed and Precision_ by Markstein, chapter 9

   We find the reciprocal square root and use that to compute the actual
   square root.  */

#ifdef __STDC__
float
__ieee754_sqrtf (float b)
#else
float
__ieee754_sqrtf (b)
     float b;
#endif
{
  if (__builtin_expect (b > zero, 1))
    {
#define FMSUB(a_, c_, b_)                                               \
      ({ double __r;                                                    \
        __asm__ ("fmsub %[r], %[a], %[c], %[b]\n"                       \
                 : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
        __r;})
#define FNMSUB(a_, c_, b_)                                              \
      ({ double __r;                                                    \
        __asm__ ("fnmsub %[r], %[a], %[c], %[b]\n"                      \
                 : [r] "=f" (__r) : [a] "f" (a_), [c] "f" (c_), [b] "f" (b_)); \
        __r;})

      if (__builtin_expect (b != a_inf.value, 1))
        {
          double y, x;

          /* Compute y = 1.5 * b - b.  Uses fewer constants than y = 0.5 * b.  */
          y = FMSUB (threehalf, b, b);

          /* Initial estimate.  */
          __asm__ ("frsqrte %[x], %[b]\n" : [x] "=f" (x) : [b] "f" (b));

          /* Iterate.  x_{n+1} = x_n * (1.5 - y * (x_n * x_n)).  */
          x = x * FNMSUB (y, x * x, threehalf);
          x = x * FNMSUB (y, x * x, threehalf);
          x = x * FNMSUB (y, x * x, threehalf);

          /* All done.  */
          return x * b;
        }
    }
  else if (b < zero)
    {
      b = a_nan.value;
    }
  return f_washf (b);
}
