...

Text file src/github.com/bytedance/sonic/native/f32toa.c

Documentation: github.com/bytedance/sonic/native

     1/* Copyright 2020 Alexander Bolz
     2 * 
     3 * Boost Software License - Version 1.0 - August 17th, 2003
     4 * 
     5 * Permission is hereby granted, free of charge, to any person or organization
     6 * obtaining a copy of the software and accompanying documentation covered by
     7 * this license (the "Software") to use, reproduce, display, distribute,
     8 * execute, and transmit the Software, and to prepare derivative works of the
     9 * Software, and to permit third-parties to whom the Software is furnished to
    10 * do so, all subject to the following:
    11 * 
    12 * The copyright notices in the Software and this entire statement, including
    13 * the above license grant, this restriction and the following disclaimer,
    14 * must be included in all copies of the Software, in whole or in part, and
    15 * all derivative works of the Software, unless such copies or derivative
    16 * works are solely in the form of machine-executable object code generated by
    17 * a source language processor.
    18 *
    19 * Unless required by applicable law or agreed to in writing, this software
    20 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    21 * KIND, either express or implied.
    22 * 
    23 * This file may have been modified by ByteDance authors. All ByteDance
    24 * Modifications are Copyright 2022 ByteDance Authors.
    25 */
    26
    27#include "native.h"
    28#include "tab.h"
    29#include "test/xassert.h"
    30
    31#define F32_BITS         32
    32#define F32_EXP_BITS     8
    33#define F32_SIG_BITS     23
    34#define F32_EXP_MASK     0x7F800000u // middle 8 bits
    35#define F32_SIG_MASK     0x007FFFFFu // lower 23 bits
    36#define F32_EXP_BIAS     127
    37#define F32_INF_NAN_EXP  0xFF
    38#define F32_HIDDEN_BIT   0x00800000u
    39
    40typedef struct {
    41    uint32_t sig;
    42    int32_t exp;
    43} f32_dec;
    44
    45static inline unsigned ctz10_u32(const uint32_t v) {
    46    xassert(0 <= v && v < 1000000000u);
    47    if (v >= 100000) {
    48        if (v <   1000000) return 6;
    49        if (v <  10000000) return 7;
    50        if (v < 100000000) return 8;
    51        else               return 9;
    52    } else {
    53        if (v <        10) return 1;
    54        if (v <       100) return 2;
    55        if (v <      1000) return 3;
    56        if (v <     10000) return 4;
    57        else               return 5;
    58    }
    59}
    60
    61static inline char* format_significand_f32(uint32_t sig, char *out, int cnt) {
    62    char *r = out + cnt;
    63    int ctz = 0;
    64
    65    /* at most 9 digits here */
    66    if  (sig >= 10000) {
    67        uint32_t c = sig - 10000 * (sig / 10000);
    68        sig /= 10000;
    69        if (c != 0) {
    70            uint32_t c0 = (c % 100) << 1;
    71            uint32_t c1 = (c / 100) << 1;
    72            copy_two_digs(r - 2, Digits + c0);
    73            copy_two_digs(r - 4, Digits + c1);
    74        } else {
    75            ctz = 4;
    76        }
    77        r -= 4;
    78    }
    79
    80    while (sig >= 100) {
    81        uint32_t c = (sig % 100) << 1;
    82        sig /= 100;
    83        copy_two_digs(r - 2, Digits + c);
    84        r -= 2;
    85    }
    86
    87    if (sig >= 10) {
    88        uint32_t c = sig << 1;
    89        copy_two_digs(out, Digits + c);
    90    } else {
    91        *out = (char) ('0' + sig);
    92    }
    93
    94    return out + cnt - ctz;
    95}
    96
    97static inline char* format_integer_u32(uint32_t sig, char *out, unsigned cnt) {
    98    char *r = out + cnt;
    99
   100    /* at most 9 digits here */
   101    if  (sig >= 10000) {
   102        uint32_t c = sig - 10000 * (sig / 10000);
   103        sig /= 10000;
   104        uint32_t c0 = (c % 100) << 1;
   105        uint32_t c1 = (c / 100) << 1;
   106        copy_two_digs(r - 2, Digits + c0);
   107        copy_two_digs(r - 4, Digits + c1);
   108        r -= 4;
   109    }
   110
   111    while (sig >= 100) {
   112        uint32_t c = (sig % 100) << 1;
   113        sig /= 100;
   114        copy_two_digs(r - 2, Digits + c);
   115        r -= 2;
   116    }
   117
   118    if (sig >= 10) {
   119        uint32_t c = sig << 1;
   120        copy_two_digs(out, Digits + c);
   121    } else {
   122        *out = (char) ('0' + sig);
   123    }
   124
   125    return out + cnt;
   126}
   127
   128static inline char* format_exponent_f32(f32_dec v, char *out, int cnt) {
   129    char* p = out + 1;
   130    char* end = format_significand_f32(v.sig, p, cnt);
   131    while (*(end - 1) == '0') end--;
   132
   133    /* Print decimal point if needed */
   134    *out = *p;
   135    if (end - p > 1) {
   136        *p = '.';
   137    } else {
   138        end--;
   139    }
   140
   141    /* Print the exponent */
   142    *end++ = 'e';
   143    int32_t exp = v.exp + (int32_t) cnt - 1;
   144    if (exp < 0) {
   145        *end++ = '-';
   146        exp = -exp;
   147    } else {
   148        *end++ = '+';
   149    }
   150
   151    if (exp >= 100) {
   152        int32_t c = exp % 10;
   153        copy_two_digs(end, Digits + 2 * (exp / 10));
   154        end[2] = (char) ('0' + c);
   155        end += 3;
   156    } else if (exp >= 10) {
   157        copy_two_digs(end, Digits + 2 * exp);
   158        end += 2;
   159    } else {
   160        *end++ = (char) ('0' + exp);
   161    }
   162    return end;
   163}
   164
   165static inline char* format_decimal_f32(f32_dec v, char* out, int cnt) {
   166    char* p = out;
   167    char* end;
   168    int point = cnt + v.exp;
   169
   170    /* print leading zeros if fp < 1 */
   171    if (point <= 0) {
   172        *p++ = '0', *p++ = '.';
   173        for (int i = 0; i < -point; i++) {
   174            *p++ = '0';
   175        }
   176    }
   177
   178    /* add the remaining digits */
   179    end = format_significand_f32(v.sig, p, cnt);
   180    while (*(end - 1) == '0') end--;
   181    if (point <= 0) {
   182        return end;
   183    }
   184
   185    /* insert point or add trailing zeros */
   186    int digs = end - p, frac = digs - point;
   187    if (digs > point) {
   188        for (int i = 0; i < frac; i++) {
   189            *(end - i) = *(end - i - 1);
   190        }
   191        p[point] = '.';
   192        end++;
   193    } else {
   194        for (int i = 0; i < point - digs; i++) {
   195            *end++ = '0';
   196        }
   197    }
   198    return end;
   199}
   200
   201static inline char* write_dec_f32(f32_dec dec, char* p) {
   202    int cnt = ctz10_u32(dec.sig);
   203    int dot = cnt + dec.exp;
   204    int sci_exp = dot - 1;
   205    bool exp_fmt = sci_exp < -6 || sci_exp > 20;
   206    bool has_dot = dot < cnt;
   207
   208    if (exp_fmt) {
   209        return format_exponent_f32(dec, p, cnt);
   210    }
   211    if (has_dot) {
   212        return format_decimal_f32(dec, p, cnt);
   213    }
   214
   215    char* end = p + dot;
   216    p = format_integer_u32(dec.sig, p, cnt);
   217    while (p < end) *p++ = '0';
   218    return end;
   219}
   220
   221static inline uint32_t f32toraw(float fp) {
   222    union {
   223        uint32_t u32;
   224        float   f32;
   225    } uval;
   226    uval.f32 = fp;
   227    return uval.u32;
   228}
   229
   230static inline uint64_t pow10_ceil_sig_f32(int32_t k)
   231{
   232    // There are unique beta and r such that 10^k = beta 2^r and
   233    // 2^63 <= beta < 2^64, namely r = floor(log_2 10^k) - 63 and
   234    // beta = 2^-r 10^k.
   235    // Let g = ceil(beta), so (g-1) 2^r < 10^k <= g 2^r, with the latter
   236    // value being a pretty good overestimate for 10^k.
   237
   238    // NB: Since for all the required exponents k, we have g < 2^64,
   239    //     all constants can be stored in 128-bit integers.
   240    // reference from: 
   241    //  https://github.com/abolz/Drachennest/blob/master/src/schubfach_32.cc#L144
   242
   243#define KMAX 45
   244#define KMIN -31
   245    static const uint64_t g[KMAX - KMIN + 1] = {
   246        0x81CEB32C4B43FCF5, // -31
   247        0xA2425FF75E14FC32, // -30
   248        0xCAD2F7F5359A3B3F, // -29
   249        0xFD87B5F28300CA0E, // -28
   250        0x9E74D1B791E07E49, // -27
   251        0xC612062576589DDB, // -26
   252        0xF79687AED3EEC552, // -25
   253        0x9ABE14CD44753B53, // -24
   254        0xC16D9A0095928A28, // -23
   255        0xF1C90080BAF72CB2, // -22
   256        0x971DA05074DA7BEF, // -21
   257        0xBCE5086492111AEB, // -20
   258        0xEC1E4A7DB69561A6, // -19
   259        0x9392EE8E921D5D08, // -18
   260        0xB877AA3236A4B44A, // -17
   261        0xE69594BEC44DE15C, // -16
   262        0x901D7CF73AB0ACDA, // -15
   263        0xB424DC35095CD810, // -14
   264        0xE12E13424BB40E14, // -13
   265        0x8CBCCC096F5088CC, // -12
   266        0xAFEBFF0BCB24AAFF, // -11
   267        0xDBE6FECEBDEDD5BF, // -10
   268        0x89705F4136B4A598, //  -9
   269        0xABCC77118461CEFD, //  -8
   270        0xD6BF94D5E57A42BD, //  -7
   271        0x8637BD05AF6C69B6, //  -6
   272        0xA7C5AC471B478424, //  -5
   273        0xD1B71758E219652C, //  -4
   274        0x83126E978D4FDF3C, //  -3
   275        0xA3D70A3D70A3D70B, //  -2
   276        0xCCCCCCCCCCCCCCCD, //  -1
   277        0x8000000000000000, //   0
   278        0xA000000000000000, //   1
   279        0xC800000000000000, //   2
   280        0xFA00000000000000, //   3
   281        0x9C40000000000000, //   4
   282        0xC350000000000000, //   5
   283        0xF424000000000000, //   6
   284        0x9896800000000000, //   7
   285        0xBEBC200000000000, //   8
   286        0xEE6B280000000000, //   9
   287        0x9502F90000000000, //  10
   288        0xBA43B74000000000, //  11
   289        0xE8D4A51000000000, //  12
   290        0x9184E72A00000000, //  13
   291        0xB5E620F480000000, //  14
   292        0xE35FA931A0000000, //  15
   293        0x8E1BC9BF04000000, //  16
   294        0xB1A2BC2EC5000000, //  17
   295        0xDE0B6B3A76400000, //  18
   296        0x8AC7230489E80000, //  19
   297        0xAD78EBC5AC620000, //  20
   298        0xD8D726B7177A8000, //  21
   299        0x878678326EAC9000, //  22
   300        0xA968163F0A57B400, //  23
   301        0xD3C21BCECCEDA100, //  24
   302        0x84595161401484A0, //  25
   303        0xA56FA5B99019A5C8, //  26
   304        0xCECB8F27F4200F3A, //  27
   305        0x813F3978F8940985, //  28
   306        0xA18F07D736B90BE6, //  29
   307        0xC9F2C9CD04674EDF, //  30
   308        0xFC6F7C4045812297, //  31
   309        0x9DC5ADA82B70B59E, //  32
   310        0xC5371912364CE306, //  33
   311        0xF684DF56C3E01BC7, //  34
   312        0x9A130B963A6C115D, //  35
   313        0xC097CE7BC90715B4, //  36
   314        0xF0BDC21ABB48DB21, //  37
   315        0x96769950B50D88F5, //  38
   316        0xBC143FA4E250EB32, //  39
   317        0xEB194F8E1AE525FE, //  40
   318        0x92EFD1B8D0CF37BF, //  41
   319        0xB7ABC627050305AE, //  42
   320        0xE596B7B0C643C71A, //  43
   321        0x8F7E32CE7BEA5C70, //  44
   322        0xB35DBF821AE4F38C, //  45
   323    };
   324
   325    xassert(k >= KMIN && k <= KMAX);
   326    return g[k - KMIN];
   327#undef KMIN
   328#undef KMAX
   329}
   330
   331static inline uint32_t round_odd_f32(uint64_t g, uint32_t cp) {
   332    const uint128_t p = ((uint128_t)g) * cp;
   333    const uint32_t y1 = (uint64_t)(p >> 64);
   334    const uint32_t y0 = ((uint64_t)(p)) >> 32;
   335    return y1 | (y0 > 1);
   336}
   337
   338/**
   339 Rendering float point number into decimal.
   340 The function used Schubfach algorithm, reference:
   341 The Schubfach way to render doubles, Raffaello Giulietti, 2022-03-20.
   342 https://drive.google.com/file/d/1gp5xv4CAa78SVgCeWfGqqI4FfYYYuNFb
   343 https://mail.openjdk.java.net/pipermail/core-libs-dev/2021-November/083536.html
   344 https://github.com/openjdk/jdk/pull/3402 (Java implementation)
   345 https://github.com/abolz/Drachennest (C++ implementation)
   346 */
   347static inline f32_dec f32todec(uint32_t rsig, int32_t rexp, uint32_t c, int32_t q) {
   348    uint32_t cbl, cb, cbr, vbl, vb, vbr, lower, upper, s;
   349    int32_t k, h;
   350    bool even, irregular, w_inside, u_inside;
   351    f32_dec dec;
   352
   353    even = !(c & 1);
   354    irregular = rsig == 0 && rexp > 1;
   355
   356    cbl = 4 * c - 2 + irregular;
   357    cb = 4 * c;
   358    cbr = 4 * c + 2;
   359
   360    k = (q * 1262611 - (irregular ? 524031 : 0)) >> 22;
   361    h = q + ((-k) * 1741647 >> 19) + 1;
   362    uint64_t pow10 = pow10_ceil_sig_f32(-k);
   363    vbl = round_odd_f32(pow10, cbl << h);
   364    vb = round_odd_f32(pow10, cb << h);
   365    vbr = round_odd_f32(pow10, cbr << h);
   366
   367
   368    lower = vbl + !even;
   369    upper = vbr - !even;
   370
   371    s = vb / 4;
   372    if (s >= 10) {
   373        uint64_t sp = s / 10;
   374        bool up_inside = lower <= (40 * sp);
   375        bool wp_inside = (40 * sp + 40) <= upper;
   376        if (up_inside != wp_inside) {
   377            dec.sig = sp + wp_inside;
   378            dec.exp = k + 1;
   379            return dec;
   380        }
   381    }
   382
   383    u_inside = lower <= (4 * s);
   384    w_inside = (4 * s + 4) <= upper;
   385    if (u_inside != w_inside) {
   386        dec.sig = s + w_inside;
   387        dec.exp = k;
   388        return dec;
   389    }
   390
   391    uint64_t mid = 4 * s + 2;
   392    bool round_up = vb > mid || (vb == mid && (s & 1) != 0);
   393    dec.sig = s + round_up;
   394    dec.exp = k;
   395    return dec;
   396}
   397
   398int f32toa(char *out, float fp) {
   399    char* p = out;
   400    uint32_t raw = f32toraw(fp);
   401    bool neg;
   402    uint32_t rsig, c;
   403    int32_t rexp, q;
   404
   405    neg = ((raw >> (F32_BITS - 1)) != 0);
   406    rsig = raw & F32_SIG_MASK;
   407    rexp = (int32_t)((raw & F32_EXP_MASK) >> F32_SIG_BITS);
   408
   409    /* check infinity and nan */
   410    if (unlikely(rexp == F32_INF_NAN_EXP)) {
   411        return 0;
   412    }
   413
   414    /* check negative numbers */
   415    *p = '-';
   416    p += neg;
   417
   418    /* simple case of 0.0 */
   419    if ((raw << 1) ==  0) {
   420        *p++ = '0';
   421        return p - out;
   422    }
   423
   424    if (likely(rexp != 0)) {
   425        /* double is normal */
   426        c = rsig | F32_HIDDEN_BIT;
   427        q = rexp - F32_EXP_BIAS - F32_SIG_BITS;
   428
   429        /* fast path for integer */
   430        if (q <= 0 && q >= -F32_SIG_BITS && is_div_pow2(c, -q)) {
   431            uint32_t u = c >> -q;
   432            p = format_integer_u32(u, p, ctz10_u32(u));
   433            return p - out;
   434        }
   435    } else {
   436        c = rsig;
   437        q = 1 - F32_EXP_BIAS - F32_SIG_BITS;
   438    }
   439
   440    f32_dec dec = f32todec(rsig, rexp, c, q);
   441    p = write_dec_f32(dec, p);
   442    return p - out;
   443}
   444
   445#undef F32_BITS       
   446#undef F32_EXP_BITS   
   447#undef F32_SIG_BITS   
   448#undef F32_EXP_MASK   
   449#undef F32_SIG_MASK   
   450#undef F32_EXP_BIAS   
   451#undef F32_INF_NAN_EXP
   452#undef F32_HIDDEN_BIT 

View as plain text