1/* Copyright 2020 Alexander Bolz
2 *
3 * Boost Software License - Version 1.0 - August 17th, 2003
4 *
5 * Permission is hereby granted, free of charge, to any person or organization
6 * obtaining a copy of the software and accompanying documentation covered by
7 * this license (the "Software") to use, reproduce, display, distribute,
8 * execute, and transmit the Software, and to prepare derivative works of the
9 * Software, and to permit third-parties to whom the Software is furnished to
10 * do so, all subject to the following:
11 *
12 * The copyright notices in the Software and this entire statement, including
13 * the above license grant, this restriction and the following disclaimer,
14 * must be included in all copies of the Software, in whole or in part, and
15 * all derivative works of the Software, unless such copies or derivative
16 * works are solely in the form of machine-executable object code generated by
17 * a source language processor.
18 *
19 * Unless required by applicable law or agreed to in writing, this software
20 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
21 * KIND, either express or implied.
22 *
23 * This file may have been modified by ByteDance authors. All ByteDance
24 * Modifications are Copyright 2022 ByteDance Authors.
25 */
26
27#include "native.h"
28#include "tab.h"
29#include "test/xassert.h"
30
31#define F32_BITS 32
32#define F32_EXP_BITS 8
33#define F32_SIG_BITS 23
34#define F32_EXP_MASK 0x7F800000u // middle 8 bits
35#define F32_SIG_MASK 0x007FFFFFu // lower 23 bits
36#define F32_EXP_BIAS 127
37#define F32_INF_NAN_EXP 0xFF
38#define F32_HIDDEN_BIT 0x00800000u
39
40typedef struct {
41 uint32_t sig;
42 int32_t exp;
43} f32_dec;
44
45static inline unsigned ctz10_u32(const uint32_t v) {
46 xassert(0 <= v && v < 1000000000u);
47 if (v >= 100000) {
48 if (v < 1000000) return 6;
49 if (v < 10000000) return 7;
50 if (v < 100000000) return 8;
51 else return 9;
52 } else {
53 if (v < 10) return 1;
54 if (v < 100) return 2;
55 if (v < 1000) return 3;
56 if (v < 10000) return 4;
57 else return 5;
58 }
59}
60
61static inline char* format_significand_f32(uint32_t sig, char *out, int cnt) {
62 char *r = out + cnt;
63 int ctz = 0;
64
65 /* at most 9 digits here */
66 if (sig >= 10000) {
67 uint32_t c = sig - 10000 * (sig / 10000);
68 sig /= 10000;
69 if (c != 0) {
70 uint32_t c0 = (c % 100) << 1;
71 uint32_t c1 = (c / 100) << 1;
72 copy_two_digs(r - 2, Digits + c0);
73 copy_two_digs(r - 4, Digits + c1);
74 } else {
75 ctz = 4;
76 }
77 r -= 4;
78 }
79
80 while (sig >= 100) {
81 uint32_t c = (sig % 100) << 1;
82 sig /= 100;
83 copy_two_digs(r - 2, Digits + c);
84 r -= 2;
85 }
86
87 if (sig >= 10) {
88 uint32_t c = sig << 1;
89 copy_two_digs(out, Digits + c);
90 } else {
91 *out = (char) ('0' + sig);
92 }
93
94 return out + cnt - ctz;
95}
96
97static inline char* format_integer_u32(uint32_t sig, char *out, unsigned cnt) {
98 char *r = out + cnt;
99
100 /* at most 9 digits here */
101 if (sig >= 10000) {
102 uint32_t c = sig - 10000 * (sig / 10000);
103 sig /= 10000;
104 uint32_t c0 = (c % 100) << 1;
105 uint32_t c1 = (c / 100) << 1;
106 copy_two_digs(r - 2, Digits + c0);
107 copy_two_digs(r - 4, Digits + c1);
108 r -= 4;
109 }
110
111 while (sig >= 100) {
112 uint32_t c = (sig % 100) << 1;
113 sig /= 100;
114 copy_two_digs(r - 2, Digits + c);
115 r -= 2;
116 }
117
118 if (sig >= 10) {
119 uint32_t c = sig << 1;
120 copy_two_digs(out, Digits + c);
121 } else {
122 *out = (char) ('0' + sig);
123 }
124
125 return out + cnt;
126}
127
128static inline char* format_exponent_f32(f32_dec v, char *out, int cnt) {
129 char* p = out + 1;
130 char* end = format_significand_f32(v.sig, p, cnt);
131 while (*(end - 1) == '0') end--;
132
133 /* Print decimal point if needed */
134 *out = *p;
135 if (end - p > 1) {
136 *p = '.';
137 } else {
138 end--;
139 }
140
141 /* Print the exponent */
142 *end++ = 'e';
143 int32_t exp = v.exp + (int32_t) cnt - 1;
144 if (exp < 0) {
145 *end++ = '-';
146 exp = -exp;
147 } else {
148 *end++ = '+';
149 }
150
151 if (exp >= 100) {
152 int32_t c = exp % 10;
153 copy_two_digs(end, Digits + 2 * (exp / 10));
154 end[2] = (char) ('0' + c);
155 end += 3;
156 } else if (exp >= 10) {
157 copy_two_digs(end, Digits + 2 * exp);
158 end += 2;
159 } else {
160 *end++ = (char) ('0' + exp);
161 }
162 return end;
163}
164
165static inline char* format_decimal_f32(f32_dec v, char* out, int cnt) {
166 char* p = out;
167 char* end;
168 int point = cnt + v.exp;
169
170 /* print leading zeros if fp < 1 */
171 if (point <= 0) {
172 *p++ = '0', *p++ = '.';
173 for (int i = 0; i < -point; i++) {
174 *p++ = '0';
175 }
176 }
177
178 /* add the remaining digits */
179 end = format_significand_f32(v.sig, p, cnt);
180 while (*(end - 1) == '0') end--;
181 if (point <= 0) {
182 return end;
183 }
184
185 /* insert point or add trailing zeros */
186 int digs = end - p, frac = digs - point;
187 if (digs > point) {
188 for (int i = 0; i < frac; i++) {
189 *(end - i) = *(end - i - 1);
190 }
191 p[point] = '.';
192 end++;
193 } else {
194 for (int i = 0; i < point - digs; i++) {
195 *end++ = '0';
196 }
197 }
198 return end;
199}
200
201static inline char* write_dec_f32(f32_dec dec, char* p) {
202 int cnt = ctz10_u32(dec.sig);
203 int dot = cnt + dec.exp;
204 int sci_exp = dot - 1;
205 bool exp_fmt = sci_exp < -6 || sci_exp > 20;
206 bool has_dot = dot < cnt;
207
208 if (exp_fmt) {
209 return format_exponent_f32(dec, p, cnt);
210 }
211 if (has_dot) {
212 return format_decimal_f32(dec, p, cnt);
213 }
214
215 char* end = p + dot;
216 p = format_integer_u32(dec.sig, p, cnt);
217 while (p < end) *p++ = '0';
218 return end;
219}
220
221static inline uint32_t f32toraw(float fp) {
222 union {
223 uint32_t u32;
224 float f32;
225 } uval;
226 uval.f32 = fp;
227 return uval.u32;
228}
229
230static inline uint64_t pow10_ceil_sig_f32(int32_t k)
231{
232 // There are unique beta and r such that 10^k = beta 2^r and
233 // 2^63 <= beta < 2^64, namely r = floor(log_2 10^k) - 63 and
234 // beta = 2^-r 10^k.
235 // Let g = ceil(beta), so (g-1) 2^r < 10^k <= g 2^r, with the latter
236 // value being a pretty good overestimate for 10^k.
237
238 // NB: Since for all the required exponents k, we have g < 2^64,
239 // all constants can be stored in 128-bit integers.
240 // reference from:
241 // https://github.com/abolz/Drachennest/blob/master/src/schubfach_32.cc#L144
242
243#define KMAX 45
244#define KMIN -31
245 static const uint64_t g[KMAX - KMIN + 1] = {
246 0x81CEB32C4B43FCF5, // -31
247 0xA2425FF75E14FC32, // -30
248 0xCAD2F7F5359A3B3F, // -29
249 0xFD87B5F28300CA0E, // -28
250 0x9E74D1B791E07E49, // -27
251 0xC612062576589DDB, // -26
252 0xF79687AED3EEC552, // -25
253 0x9ABE14CD44753B53, // -24
254 0xC16D9A0095928A28, // -23
255 0xF1C90080BAF72CB2, // -22
256 0x971DA05074DA7BEF, // -21
257 0xBCE5086492111AEB, // -20
258 0xEC1E4A7DB69561A6, // -19
259 0x9392EE8E921D5D08, // -18
260 0xB877AA3236A4B44A, // -17
261 0xE69594BEC44DE15C, // -16
262 0x901D7CF73AB0ACDA, // -15
263 0xB424DC35095CD810, // -14
264 0xE12E13424BB40E14, // -13
265 0x8CBCCC096F5088CC, // -12
266 0xAFEBFF0BCB24AAFF, // -11
267 0xDBE6FECEBDEDD5BF, // -10
268 0x89705F4136B4A598, // -9
269 0xABCC77118461CEFD, // -8
270 0xD6BF94D5E57A42BD, // -7
271 0x8637BD05AF6C69B6, // -6
272 0xA7C5AC471B478424, // -5
273 0xD1B71758E219652C, // -4
274 0x83126E978D4FDF3C, // -3
275 0xA3D70A3D70A3D70B, // -2
276 0xCCCCCCCCCCCCCCCD, // -1
277 0x8000000000000000, // 0
278 0xA000000000000000, // 1
279 0xC800000000000000, // 2
280 0xFA00000000000000, // 3
281 0x9C40000000000000, // 4
282 0xC350000000000000, // 5
283 0xF424000000000000, // 6
284 0x9896800000000000, // 7
285 0xBEBC200000000000, // 8
286 0xEE6B280000000000, // 9
287 0x9502F90000000000, // 10
288 0xBA43B74000000000, // 11
289 0xE8D4A51000000000, // 12
290 0x9184E72A00000000, // 13
291 0xB5E620F480000000, // 14
292 0xE35FA931A0000000, // 15
293 0x8E1BC9BF04000000, // 16
294 0xB1A2BC2EC5000000, // 17
295 0xDE0B6B3A76400000, // 18
296 0x8AC7230489E80000, // 19
297 0xAD78EBC5AC620000, // 20
298 0xD8D726B7177A8000, // 21
299 0x878678326EAC9000, // 22
300 0xA968163F0A57B400, // 23
301 0xD3C21BCECCEDA100, // 24
302 0x84595161401484A0, // 25
303 0xA56FA5B99019A5C8, // 26
304 0xCECB8F27F4200F3A, // 27
305 0x813F3978F8940985, // 28
306 0xA18F07D736B90BE6, // 29
307 0xC9F2C9CD04674EDF, // 30
308 0xFC6F7C4045812297, // 31
309 0x9DC5ADA82B70B59E, // 32
310 0xC5371912364CE306, // 33
311 0xF684DF56C3E01BC7, // 34
312 0x9A130B963A6C115D, // 35
313 0xC097CE7BC90715B4, // 36
314 0xF0BDC21ABB48DB21, // 37
315 0x96769950B50D88F5, // 38
316 0xBC143FA4E250EB32, // 39
317 0xEB194F8E1AE525FE, // 40
318 0x92EFD1B8D0CF37BF, // 41
319 0xB7ABC627050305AE, // 42
320 0xE596B7B0C643C71A, // 43
321 0x8F7E32CE7BEA5C70, // 44
322 0xB35DBF821AE4F38C, // 45
323 };
324
325 xassert(k >= KMIN && k <= KMAX);
326 return g[k - KMIN];
327#undef KMIN
328#undef KMAX
329}
330
331static inline uint32_t round_odd_f32(uint64_t g, uint32_t cp) {
332 const uint128_t p = ((uint128_t)g) * cp;
333 const uint32_t y1 = (uint64_t)(p >> 64);
334 const uint32_t y0 = ((uint64_t)(p)) >> 32;
335 return y1 | (y0 > 1);
336}
337
338/**
339 Rendering float point number into decimal.
340 The function used Schubfach algorithm, reference:
341 The Schubfach way to render doubles, Raffaello Giulietti, 2022-03-20.
342 https://drive.google.com/file/d/1gp5xv4CAa78SVgCeWfGqqI4FfYYYuNFb
343 https://mail.openjdk.java.net/pipermail/core-libs-dev/2021-November/083536.html
344 https://github.com/openjdk/jdk/pull/3402 (Java implementation)
345 https://github.com/abolz/Drachennest (C++ implementation)
346 */
347static inline f32_dec f32todec(uint32_t rsig, int32_t rexp, uint32_t c, int32_t q) {
348 uint32_t cbl, cb, cbr, vbl, vb, vbr, lower, upper, s;
349 int32_t k, h;
350 bool even, irregular, w_inside, u_inside;
351 f32_dec dec;
352
353 even = !(c & 1);
354 irregular = rsig == 0 && rexp > 1;
355
356 cbl = 4 * c - 2 + irregular;
357 cb = 4 * c;
358 cbr = 4 * c + 2;
359
360 k = (q * 1262611 - (irregular ? 524031 : 0)) >> 22;
361 h = q + ((-k) * 1741647 >> 19) + 1;
362 uint64_t pow10 = pow10_ceil_sig_f32(-k);
363 vbl = round_odd_f32(pow10, cbl << h);
364 vb = round_odd_f32(pow10, cb << h);
365 vbr = round_odd_f32(pow10, cbr << h);
366
367
368 lower = vbl + !even;
369 upper = vbr - !even;
370
371 s = vb / 4;
372 if (s >= 10) {
373 uint64_t sp = s / 10;
374 bool up_inside = lower <= (40 * sp);
375 bool wp_inside = (40 * sp + 40) <= upper;
376 if (up_inside != wp_inside) {
377 dec.sig = sp + wp_inside;
378 dec.exp = k + 1;
379 return dec;
380 }
381 }
382
383 u_inside = lower <= (4 * s);
384 w_inside = (4 * s + 4) <= upper;
385 if (u_inside != w_inside) {
386 dec.sig = s + w_inside;
387 dec.exp = k;
388 return dec;
389 }
390
391 uint64_t mid = 4 * s + 2;
392 bool round_up = vb > mid || (vb == mid && (s & 1) != 0);
393 dec.sig = s + round_up;
394 dec.exp = k;
395 return dec;
396}
397
398int f32toa(char *out, float fp) {
399 char* p = out;
400 uint32_t raw = f32toraw(fp);
401 bool neg;
402 uint32_t rsig, c;
403 int32_t rexp, q;
404
405 neg = ((raw >> (F32_BITS - 1)) != 0);
406 rsig = raw & F32_SIG_MASK;
407 rexp = (int32_t)((raw & F32_EXP_MASK) >> F32_SIG_BITS);
408
409 /* check infinity and nan */
410 if (unlikely(rexp == F32_INF_NAN_EXP)) {
411 return 0;
412 }
413
414 /* check negative numbers */
415 *p = '-';
416 p += neg;
417
418 /* simple case of 0.0 */
419 if ((raw << 1) == 0) {
420 *p++ = '0';
421 return p - out;
422 }
423
424 if (likely(rexp != 0)) {
425 /* double is normal */
426 c = rsig | F32_HIDDEN_BIT;
427 q = rexp - F32_EXP_BIAS - F32_SIG_BITS;
428
429 /* fast path for integer */
430 if (q <= 0 && q >= -F32_SIG_BITS && is_div_pow2(c, -q)) {
431 uint32_t u = c >> -q;
432 p = format_integer_u32(u, p, ctz10_u32(u));
433 return p - out;
434 }
435 } else {
436 c = rsig;
437 q = 1 - F32_EXP_BIAS - F32_SIG_BITS;
438 }
439
440 f32_dec dec = f32todec(rsig, rexp, c, q);
441 p = write_dec_f32(dec, p);
442 return p - out;
443}
444
445#undef F32_BITS
446#undef F32_EXP_BITS
447#undef F32_SIG_BITS
448#undef F32_EXP_MASK
449#undef F32_SIG_MASK
450#undef F32_EXP_BIAS
451#undef F32_INF_NAN_EXP
452#undef F32_HIDDEN_BIT
View as plain text