fastfloat.c

Documentation: github.com/bytedance/sonic/native

     1/* Copyright 2020 Alexander Bolz
     2 * 
     3 * Boost Software License - Version 1.0 - August 17th, 2003
     4 * 
     5 * Permission is hereby granted, free of charge, to any person or organization
     6 * obtaining a copy of the software and accompanying documentation covered by
     7 * this license (the "Software") to use, reproduce, display, distribute,
     8 * execute, and transmit the Software, and to prepare derivative works of the
     9 * Software, and to permit third-parties to whom the Software is furnished to
    10 * do so, all subject to the following:
    11 * 
    12 * The copyright notices in the Software and this entire statement, including
    13 * the above license grant, this restriction and the following disclaimer,
    14 * must be included in all copies of the Software, in whole or in part, and
    15 * all derivative works of the Software, unless such copies or derivative
    16 * works are solely in the form of machine-executable object code generated by
    17 * a source language processor.
    18 *
    19 * Unless required by applicable law or agreed to in writing, this software
    20 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    21 * KIND, either express or implied.
    22 * 
    23 * This file may have been modified by ByteDance authors. All ByteDance
    24 * Modifications are Copyright 2022 ByteDance Authors.
    25 */
    26
    27#include "native.h"
    28#include "tab.h"
    29#include "test/xassert.h"
    30
    31#define F64_BITS         64
    32#define F64_EXP_BITS     11
    33#define F64_SIG_BITS     52
    34#define F64_EXP_MASK     0x7FF0000000000000ull // middle 11 bits
    35#define F64_SIG_MASK     0x000FFFFFFFFFFFFFull // lower 52 bits
    36#define F64_EXP_BIAS     1023
    37#define F64_INF_NAN_EXP  0x7FF
    38#define F64_HIDDEN_BIT   0x0010000000000000ull
    39
    40struct f64_dec {
    41    uint64_t sig;
    42    int64_t exp;
    43};
    44typedef struct f64_dec f64_dec;
    45
    46typedef __uint128_t uint128_t;
    47
    48static inline unsigned ctz10(const uint64_t v) {
    49    xassert(0 <= v && v < 100000000000000000ull);
    50    if (v >= 10000000000ull) {
    51        if (v <      100000000000ull) return 11;
    52        if (v <     1000000000000ull) return 12;
    53        if (v <    10000000000000ull) return 13;
    54        if (v <   100000000000000ull) return 14;
    55        if (v <  1000000000000000ull) return 15;
    56        if (v < 10000000000000000ull) return 16;
    57        else                          return 17;
    58    }
    59        if (v <                10ull) return 1;
    60        if (v <               100ull) return 2;
    61        if (v <              1000ull) return 3;
    62        if (v <             10000ull) return 4;
    63        if (v <            100000ull) return 5;
    64        if (v <           1000000ull) return 6;
    65        if (v <          10000000ull) return 7;
    66        if (v <         100000000ull) return 8;
    67        if (v <        1000000000ull) return 9;
    68        else                          return 10;
    69
    70}
    71
    72static inline char* format_significand(uint64_t sig, char *out, int cnt) {
    73    char *p = out + cnt;
    74    int ctz = 0;
    75
    76    if ((sig >> 32) != 0) {
    77        uint64_t q = sig / 100000000;
    78        uint32_t r = ((uint32_t)sig) - 100000000 * ((uint32_t) q);
    79        sig = q;
    80        if (r != 0) {
    81            uint32_t c  = r % 10000;
    82            r /= 10000;
    83            uint32_t d  = r % 10000;
    84            uint32_t c0 = (c % 100) << 1;
    85            uint32_t c1 = (c / 100) << 1;
    86            uint32_t d0 = (d % 100) << 1;
    87            uint32_t d1 = (d / 100) << 1;
    88            copy_two_digs(p - 2, Digits + c0);
    89            copy_two_digs(p - 4, Digits + c1);
    90            copy_two_digs(p - 6, Digits + d0);
    91            copy_two_digs(p - 8, Digits + d1);
    92        } else {
    93            ctz += 8;
    94        }
    95        p -= 8;
    96    }
    97
    98    uint32_t sig2 = (uint32_t)sig;
    99    while (sig2 >= 10000) {
   100        uint32_t c = sig2 - 10000 * (sig2 / 10000);
   101        sig2 /= 10000;
   102        uint32_t c0 = (c % 100) << 1;
   103        uint32_t c1 = (c / 100) << 1;
   104        copy_two_digs(p - 2, Digits + c0);
   105        copy_two_digs(p - 4, Digits + c1);
   106        p -= 4;
   107    }
   108    if (sig2 >= 100) {
   109        uint32_t c = (sig2 % 100) << 1;
   110        sig2 /= 100;
   111        copy_two_digs(p - 2, Digits + c);
   112        p -= 2;
   113    }
   114    if (sig2 >= 10) {
   115        uint32_t c = sig2 << 1;
   116        copy_two_digs(p - 2, Digits + c);
   117    } else {
   118        *out = (char) ('0' + sig2);
   119    }
   120    return out + cnt - ctz;
   121}
   122
   123static inline char* format_integer(uint64_t sig, char *out, unsigned cnt) {
   124    char *p = out + cnt;
   125    if ((sig >> 32) != 0) {
   126        uint64_t q = sig / 100000000;
   127        uint32_t r = ((uint32_t)sig) - 100000000 * ((uint32_t) q);
   128        sig = q;
   129        uint32_t c  = r % 10000;
   130        r /= 10000;
   131        uint32_t d  = r % 10000;
   132        uint32_t c0 = (c % 100) << 1;
   133        uint32_t c1 = (c / 100) << 1;
   134        uint32_t d0 = (d % 100) << 1;
   135        uint32_t d1 = (d / 100) << 1;
   136        copy_two_digs(p - 2, Digits + c0);
   137        copy_two_digs(p - 4, Digits + c1);
   138        copy_two_digs(p - 6, Digits + d0);
   139        copy_two_digs(p - 8, Digits + d1);
   140        p -= 8;
   141    }
   142
   143    uint32_t sig2 = (uint32_t)sig;
   144    while (sig2 >= 10000) {
   145        uint32_t c = sig2 - 10000 * (sig2 / 10000);
   146        sig2 /= 10000;
   147        uint32_t c0 = (c % 100) << 1;
   148        uint32_t c1 = (c / 100) << 1;
   149        copy_two_digs(p - 2, Digits + c0);
   150        copy_two_digs(p - 4, Digits + c1);
   151        p -= 4;
   152    }
   153    if (sig2 >= 100) {
   154        uint32_t c = (sig2 % 100) << 1;
   155        sig2 /= 100;
   156        copy_two_digs(p - 2, Digits + c);
   157        p -= 2;
   158    }
   159    if (sig2 >= 10) {
   160        uint32_t c = sig2 << 1;
   161        copy_two_digs(p - 2, Digits + c);
   162    } else {
   163        *out = (char) ('0' + sig2);
   164    }
   165    return out + cnt;
   166}
   167
   168static inline char* format_exponent(f64_dec v, char *out, unsigned cnt) {
   169    char* p = out + 1;
   170    char* end = format_significand(v.sig, p, cnt);
   171    while (*(end - 1) == '0') end--;
   172
   173    /* print decimal point if needed */
   174    *out = *p;
   175    if (end - p > 1) {
   176        *p = '.';
   177    } else {
   178        end--;
   179    }
   180
   181    /* print the exponent */
   182    *end++ = 'e';
   183    int32_t exp = v.exp + (int32_t) cnt - 1;
   184    if (exp < 0) {
   185        *end++ = '-';
   186        exp = -exp;
   187    } else {
   188        *end++ = '+';
   189    }
   190
   191    if (exp >= 100) {
   192        int32_t c = exp % 10;
   193        copy_two_digs(end, Digits + 2 * (exp / 10));
   194        end[2] = (char) ('0' + c);
   195        end += 3;
   196    } else if (exp >= 10) {
   197        copy_two_digs(end, Digits + 2 * exp);
   198        end += 2;
   199    } else {
   200        *end++ = (char) ('0' + exp);
   201    }
   202    return end;
   203}
   204
   205static inline char* format_decimal(f64_dec v, char* out, unsigned cnt) {
   206    char* p = out;
   207    char* end;
   208    int point = cnt + v.exp;
   209
   210    /* print leading zeros if fp < 1 */
   211    if (point <= 0) {
   212        *p++ = '0', *p++ = '.';
   213        for (int i = 0; i < -point; i++) {
   214            *p++ = '0';
   215        }
   216    }
   217
   218    /* add the remaining digits */
   219    end = format_significand(v.sig, p, cnt);
   220    while (*(end - 1) == '0') end--;
   221    if (point <= 0) {
   222        return end;
   223    }
   224
   225    /* insert point or add trailing zeros */
   226    int digs = end - p;
   227    if (digs > point) {
   228        for (int i = 0; i < digs - point; i++) {
   229            *(end - i) = *(end - i - 1);
   230        }
   231        p[point] = '.';
   232        end++;
   233    } else {
   234        for (int i = 0; i < point - digs; i++) {
   235            *end++ = '0';
   236        }
   237    }
   238    return end;
   239}
   240
   241static inline char* write_dec(f64_dec dec, char* p) {
   242    int cnt = ctz10(dec.sig);
   243    int dot = cnt + dec.exp;
   244    int sci_exp = dot - 1;
   245    bool exp_fmt = sci_exp < -6 || sci_exp > 20;
   246    bool has_dot = dot < cnt;
   247
   248    if (exp_fmt) {
   249        return format_exponent(dec, p, cnt);
   250    }
   251    if (has_dot) {
   252        return format_decimal(dec, p, cnt);
   253    }
   254
   255    char* end = p + dot;
   256    p = format_integer(dec.sig, p, cnt);
   257    while (p < end) *p++ = '0';
   258    return end;
   259}
   260
   261static inline uint64_t f64toraw(double fp) {
   262    union {
   263        uint64_t u64;
   264        double   f64;
   265    } uval;
   266    uval.f64 = fp;
   267    return uval.u64;
   268}
   269
   270static inline uint64_t round_odd(uint64x2 g, uint64_t cp) {
   271    const uint128_t x = ((uint128_t)cp) * g.lo;
   272    const uint128_t y = ((uint128_t)cp) * g.hi + ((uint64_t)(x >> 64));
   273
   274    const uint64_t y0 = ((uint64_t)y);
   275    const uint64_t y1 = ((uint64_t)(y >> 64));
   276    return y1 | (y0 > 1);
   277}
   278
   279/**
   280 Rendering float point number into decimal.
   281 The function used Schubfach algorithm, reference:
   282 The Schubfach way to render doubles, Raffaello Giulietti, 2022-03-20.
   283 https://drive.google.com/file/d/1gp5xv4CAa78SVgCeWfGqqI4FfYYYuNFb
   284 https://mail.openjdk.java.net/pipermail/core-libs-dev/2021-November/083536.html
   285 https://github.com/openjdk/jdk/pull/3402 (Java implementation)
   286 https://github.com/abolz/Drachennest (C++ implementation)
   287 */
   288static inline f64_dec f64todec(uint64_t rsig, int32_t rexp, uint64_t c, int32_t q) {
   289    uint64_t cbl, cb, cbr, vbl, vb, vbr, lower, upper, s;
   290    int32_t k, h;
   291    bool even, irregular, w_inside, u_inside;
   292    f64_dec dec;
   293
   294    even = !(c & 1);
   295    irregular = rsig == 0 && rexp > 1;
   296
   297    cbl = 4 * c - 2 + irregular;
   298    cb = 4 * c;
   299    cbr = 4 * c + 2;
   300
   301    /*  q is in [-1500, 1500]
   302        k = irregular ? floor(log_10(3/4 * 2^q)) : floor(log10(pow(2^q)))
   303        floor(log10(3/4 * 2^q))  = (q * 1262611 - 524031) >> 22
   304        floor(log10(pow(2^q))) = (q * 1262611) >> 22 */
   305    k = (q * 1262611 - (irregular ? 524031 : 0)) >> 22;
   306
   307    /*  k is in [-1233, 1233]
   308        s = floor(V) = floor(c * 2^q * 10^-k)
   309        vb = 4V = 4 * c * 2^q * 10^-k */
   310    h = q + ((-k) * 1741647 >> 19) + 1;
   311    uint64x2 pow10 = pow10_ceil_sig(-k);
   312    vbl = round_odd(pow10, cbl << h);
   313    vb = round_odd(pow10, cb << h);
   314    vbr = round_odd(pow10, cbr << h);
   315
   316    lower = vbl + !even;
   317    upper = vbr - !even;
   318
   319    s = vb / 4;
   320    if (s >= 10) {
   321        /* R_k+1 interval contains at most one: up or wp */
   322        uint64_t sp = s / 10;
   323        bool up_inside = lower <= (40 * sp);
   324        bool wp_inside = (40 * sp + 40) <= upper;
   325        if (up_inside != wp_inside) {
   326            dec.sig = sp + wp_inside;
   327            dec.exp = k + 1;
   328            return dec;
   329        }
   330    }
   331
   332    /* R_k interval contains at least one: u or w */
   333    u_inside = lower <= (4 * s);
   334    w_inside = (4 * s + 4) <= upper;
   335    if (u_inside != w_inside) {
   336        dec.sig = s + w_inside;
   337        dec.exp = k;
   338        return dec;
   339    }
   340
   341    /* R_k interval contains both u or w */
   342    uint64_t mid = 4 * s + 2;
   343    bool round_up = vb > mid || (vb == mid && (s & 1) != 0);
   344    dec.sig = s + round_up;
   345    dec.exp = k;
   346    return dec;
   347}
   348
   349int f64toa(char *out, double fp) {
   350    char* p = out;
   351    uint64_t raw = f64toraw(fp);
   352    bool neg;
   353    uint64_t rsig, c;
   354    int32_t rexp, q;
   355
   356    neg = ((raw >> (F64_BITS - 1)) != 0);
   357    rsig = raw & F64_SIG_MASK;
   358    rexp = (int32_t)((raw & F64_EXP_MASK) >> F64_SIG_BITS);
   359
   360    /* check infinity and nan */
   361    if (unlikely(rexp == F64_INF_NAN_EXP)) {
   362        return 0;
   363    }
   364
   365    /* check negative numbers */
   366    *p = '-';
   367    p += neg;
   368
   369    /* simple case of 0.0 */
   370    if ((raw << 1) ==  0) {
   371        *p++ = '0';
   372        return p - out;
   373    }
   374
   375    /* fp = c * 2^q */
   376    if (likely(rexp != 0)) {
   377        /* double is normal */
   378        c = rsig | F64_HIDDEN_BIT;
   379        q = rexp - F64_EXP_BIAS - F64_SIG_BITS;
   380
   381        /* fast path for integer */
   382        if (q <= 0 && q >= -F64_SIG_BITS && is_div_pow2(c, -q)) {
   383            uint64_t u = c >> -q;
   384            p = format_integer(u, p, ctz10(u));
   385            return p - out;
   386        }
   387
   388    } else {
   389        c = rsig;
   390        q = 1 - F64_EXP_BIAS - F64_SIG_BITS;
   391    }
   392
   393    f64_dec dec = f64todec(rsig, rexp, c, q);
   394    p = write_dec(dec, p);
   395    return p - out;
   396}
   397
   398#undef F64_BITS       
   399#undef F64_EXP_BITS   
   400#undef F64_SIG_BITS   
   401#undef F64_EXP_MASK   
   402#undef F64_SIG_MASK   
   403#undef F64_EXP_BIAS   
   404#undef F64_INF_NAN_EXP
   405#undef F64_HIDDEN_BIT
View as plain text