...

Text file src/github.com/bytedance/sonic/native/fastint.c

Documentation: github.com/bytedance/sonic/native

     1/*
     2 * Copyright 2021 ByteDance Inc.
     3 *
     4 * Licensed under the Apache License, Version 2.0 (the "License");
     5 * you may not use this file except in compliance with the License.
     6 * You may obtain a copy of the License at
     7 *
     8 *     http://www.apache.org/licenses/LICENSE-2.0
     9 *
    10 * Unless required by applicable law or agreed to in writing, software
    11 * distributed under the License is distributed on an "AS IS" BASIS,
    12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13 * See the License for the specific language governing permissions and
    14 * limitations under the License.
    15 */
    16
    17#include "native.h"
    18#include "tab.h"
    19
    20static const char Vec16xA0[16] __attribute__((aligned(16))) = {
    21    '0', '0', '0', '0', '0', '0', '0', '0',
    22    '0', '0', '0', '0', '0', '0', '0', '0',
    23};
    24
    25static const uint16_t Vec8x10[8] __attribute__((aligned(16))) = {
    26    10, 10, 10, 10,
    27    10, 10, 10, 10,
    28};
    29
    30static const uint32_t Vec4x10k[4] __attribute__((aligned(16))) = {
    31    10000,
    32    10000,
    33    10000,
    34    10000,
    35};
    36
    37static const uint32_t Vec4xDiv10k[4] __attribute__((aligned(16))) = {
    38    0xd1b71759,
    39    0xd1b71759,
    40    0xd1b71759,
    41    0xd1b71759,
    42};
    43
    44static const uint16_t VecDivPowers[8] __attribute__((aligned(16))) = {
    45    0x20c5, 0x147b,
    46    0x3334, 0x8000,
    47    0x20c5, 0x147b,
    48    0x3334, 0x8000,
    49};
    50
    51static const uint16_t VecShiftPowers[8] __attribute__((aligned(16))) = {
    52    0x0080, 0x0800,
    53    0x2000, 0x8000,
    54    0x0080, 0x0800,
    55    0x2000, 0x8000,
    56};
    57
    58static const uint8_t VecShiftShuffles[144] __attribute__((aligned(16))) = {
    59    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    60    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff,
    61    0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff,
    62    0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff,
    63    0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
    64    0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff,
    65    0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    66    0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    67    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    68};
    69
    70static inline int itoa1(char *out, int n, uint32_t v) {
    71    out[n++] = (char)v + '0';
    72    return n;
    73}
    74
    75static inline int itoa2(char *out, int n, uint32_t v) {
    76    out[n++] = Digits[v];
    77    out[n++] = Digits[v + 1];
    78    return n;
    79}
    80
    81static inline __m128i itoa8_sse2(uint32_t v) {
    82    __m128i v00 = _mm_cvtsi32_si128  (v);
    83    __m128i v01 = _mm_mul_epu32      (v00, as_m128v(Vec4xDiv10k));
    84    __m128i v02 = _mm_srli_epi64     (v01, 45);
    85    __m128i v03 = _mm_mul_epu32      (v02, as_m128v(Vec4x10k));
    86    __m128i v04 = _mm_sub_epi32      (v00, v03);
    87    __m128i v05 = _mm_unpacklo_epi16 (v02, v04);
    88    __m128i v06 = _mm_slli_epi64     (v05, 2);
    89    __m128i v07 = _mm_unpacklo_epi16 (v06, v06);
    90    __m128i v08 = _mm_unpacklo_epi32 (v07, v07);
    91    __m128i v09 = _mm_mulhi_epu16    (v08, as_m128v(VecDivPowers));
    92    __m128i v10 = _mm_mulhi_epu16    (v09, as_m128v(VecShiftPowers));
    93    __m128i v11 = _mm_mullo_epi16    (v10, as_m128v(Vec8x10));
    94    __m128i v12 = _mm_slli_epi64     (v11, 16);
    95    __m128i v13 = _mm_sub_epi16      (v10, v12);
    96    return v13;
    97}
    98
    99static inline int u32toa_small(char *out, uint32_t val) {
   100    int      n  = 0;
   101    uint32_t d1 = (val / 100) << 1;
   102    uint32_t d2 = (val % 100) << 1;
   103
   104    /* 1000-th digit */
   105    if (val >= 1000) {
   106        out[n++] = Digits[d1];
   107    }
   108
   109    /* 100-th digit */
   110    if (val >= 100) {
   111        out[n++] = Digits[d1 + 1];
   112    }
   113
   114    /* 10-th digit */
   115    if (val >= 10) {
   116        out[n++] = Digits[d2];
   117    }
   118
   119    /* last digit */
   120    out[n++] = Digits[d2 + 1];
   121    return n;
   122}
   123
   124static inline int u32toa_medium(char *out, uint32_t val) {
   125    int      n  = 0;
   126    uint32_t b  = val / 10000;
   127    uint32_t c  = val % 10000;
   128    uint32_t d1 = (b / 100) << 1;
   129    uint32_t d2 = (b % 100) << 1;
   130    uint32_t d3 = (c / 100) << 1;
   131    uint32_t d4 = (c % 100) << 1;
   132
   133    /* 10000000-th digit */
   134    if (val >= 10000000) {
   135        out[n++] = Digits[d1];
   136    }
   137
   138    /* 1000000-th digit */
   139    if (val >= 1000000) {
   140        out[n++] = Digits[d1 + 1];
   141    }
   142
   143    /* 100000-th digit */
   144    if (val >= 100000) {
   145        out[n++] = Digits[d2];
   146    }
   147
   148    /* remaining digits */
   149    out[n++] = Digits[d2 + 1];
   150    out[n++] = Digits[d3];
   151    out[n++] = Digits[d3 + 1];
   152    out[n++] = Digits[d4];
   153    out[n++] = Digits[d4 + 1];
   154    return n;
   155}
   156
   157static inline int u64toa_large_sse2(char *out, uint64_t val) {
   158    uint32_t a = (uint32_t)(val / 100000000);
   159    uint32_t b = (uint32_t)(val % 100000000);
   160
   161    /* convert to digits */
   162    __m128i v0 = itoa8_sse2(a);
   163    __m128i v1 = itoa8_sse2(b);
   164
   165    /* convert to bytes, add '0' */
   166    __m128i v2 = _mm_packus_epi16 (v0, v1);
   167    __m128i v3 = _mm_add_epi8     (v2, as_m128v(Vec16xA0));
   168
   169    /* count number of digit */
   170    __m128i  v4 = _mm_cmpeq_epi8    (v3, as_m128v(Vec16xA0));
   171    uint32_t bm = _mm_movemask_epi8 (v4);
   172    uint32_t nd = __builtin_ctz     (~bm | 0x8000);
   173
   174    /* shift digits to the beginning */
   175    __m128i p = _mm_loadu_si128  (as_m128c(&VecShiftShuffles[nd * 16]));
   176    __m128i r = _mm_shuffle_epi8 (v3, p);
   177
   178    /* store the result */
   179    _mm_storeu_si128(as_m128p(out), r);
   180    return 16 - nd;
   181}
   182
   183static inline int u64toa_xlarge_sse2(char *out, uint64_t val) {
   184    int      n = 0;
   185    uint64_t b = val % 10000000000000000;
   186    uint32_t a = (uint32_t)(val / 10000000000000000);
   187
   188    /* the highest 4 digits */
   189    if (a < 10) {
   190        n = itoa1(out, n, a);
   191    } else if (a < 100) {
   192        n = itoa2(out, n, a << 1);
   193    } else if (a < 1000) {
   194        n = itoa1(out, n, a / 100);
   195        n = itoa2(out, n, (a % 100) << 1);
   196    } else {
   197        n = itoa2(out, n, (a / 100) << 1);
   198        n = itoa2(out, n, (a % 100) << 1);
   199    }
   200
   201    /* remaining digits */
   202    __m128i v0 = itoa8_sse2       ((uint32_t)(b / 100000000));
   203    __m128i v1 = itoa8_sse2       ((uint32_t)(b % 100000000));
   204    __m128i v2 = _mm_packus_epi16 (v0, v1);
   205    __m128i v3 = _mm_add_epi8     (v2, as_m128v(Vec16xA0));
   206
   207    /* convert to bytes, add '0' */
   208    _mm_storeu_si128(as_m128p(&out[n]), v3);
   209    return n + 16;
   210}
   211
   212int i64toa(char *out, int64_t val) {
   213    if (likely(val >= 0)) {
   214        return u64toa(out, (uint64_t)val);
   215    } else {
   216        *out = '-';
   217        return u64toa(out + 1, (uint64_t)(-val)) + 1;
   218    }
   219}
   220
   221int u64toa(char *out, uint64_t val) {
   222    if (likely(val < 10000)) {
   223        return u32toa_small(out, (uint32_t)val);
   224    } else if (likely(val < 100000000)) {
   225        return u32toa_medium(out, (uint32_t)val);
   226    } else if (likely(val < 10000000000000000)) {
   227        return u64toa_large_sse2(out, val);
   228    } else {
   229        return u64toa_xlarge_sse2(out, val);
   230    }
   231}

View as plain text