HIP: Heterogenous-computing Interface for Portability
Loading...
Searching...
No Matches
amd_hip_bf16.h
Go to the documentation of this file.
1
85#ifndef _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_BF16_H_
86#define _HIP_INCLUDE_HIP_AMD_DETAIL_HIP_BF16_H_
87
88#if !defined(__HIPCC_RTC__)
89#include <hip/amd_detail/amd_hip_common.h>
90#endif // !defined(__HIPCC_RTC__)
91
92#include "amd_hip_vector_types.h" // float2 etc
93#include "device_library_decls.h" // ocml conversion functions
94#include "math_fwd.h" // ocml device functions
95
96#if defined(__HIPCC_RTC__)
97#define __HOST_DEVICE__ __device__
98#else
99#include <climits>
100#define __HOST_DEVICE__ __host__ __device__
101#endif
102
103// Since we are using unsigned short to represent data in bfloat16, it can be of different sizes on
104// different machines. These naive checks should prevent some undefined behavior on systems which
105// have different sizes for basic types.
106#if !defined(__HIPCC_RTC__)
107static_assert(CHAR_BIT == 8, "byte size should be of 8 bits");
108#endif
109static_assert(sizeof(unsigned short) == 2, "size of unsigned short should be 2 bytes");
110
113 unsigned short data;
114};
115
120};
121
126__HOST_DEVICE__ inline float __bfloat162float(__hip_bfloat16 a) {
127 unsigned int uval = 0;
128 uval = a.data << 16;
129 union {
130 unsigned int u32;
131 float fp32;
132 } u = {uval};
133 return u.fp32;
134}
135
140__HOST_DEVICE__ __hip_bfloat16 __float2bfloat16(float f) {
141 __hip_bfloat16 ret;
142 union {
143 float fp32;
144 unsigned int u32;
145 } u = {f};
146 if (~u.u32 & 0x7f800000) {
147 // When the exponent bits are not all 1s, then the value is zero, normal,
148 // or subnormal. We round the bfloat16 mantissa up by adding 0x7FFF, plus
149 // 1 if the least significant bit of the bfloat16 mantissa is 1 (odd).
150 // This causes the bfloat16's mantissa to be incremented by 1 if the 16
151 // least significant bits of the float mantissa are greater than 0x8000,
152 // or if they are equal to 0x8000 and the least significant bit of the
153 // bfloat16 mantissa is 1 (odd). This causes it to be rounded to even when
154 // the lower 16 bits are exactly 0x8000. If the bfloat16 mantissa already
155 // has the value 0x7f, then incrementing it causes it to become 0x00 and
156 // the exponent is incremented by one, which is the next higher FP value
157 // to the unrounded bfloat16 value. When the bfloat16 value is subnormal
158 // with an exponent of 0x00 and a mantissa of 0x7F, it may be rounded up
159 // to a normal value with an exponent of 0x01 and a mantissa of 0x00.
160 // When the bfloat16 value has an exponent of 0xFE and a mantissa of 0x7F,
161 // incrementing it causes it to become an exponent of 0xFF and a mantissa
162 // of 0x00, which is Inf, the next higher value to the unrounded value.
163 u.u32 += 0x7fff + ((u.u32 >> 16) & 1); // Round to nearest, round to even
164 } else if (u.u32 & 0xffff) {
165 // When all of the exponent bits are 1, the value is Inf or NaN.
166 // Inf is indicated by a zero mantissa. NaN is indicated by any nonzero
167 // mantissa bit. Quiet NaN is indicated by the most significant mantissa
168 // bit being 1. Signaling NaN is indicated by the most significant
169 // mantissa bit being 0 but some other bit(s) being 1. If any of the
170 // lower 16 bits of the mantissa are 1, we set the least significant bit
171 // of the bfloat16 mantissa, in order to preserve signaling NaN in case
172 // the bloat16's mantissa bits are all 0.
173 u.u32 |= 0x10000; // Preserve signaling NaN
174 }
175
176 ret.data = (u.u32 >> 16);
177 return ret;
178}
179
184__HOST_DEVICE__ float2 __bfloat1622float2(const __hip_bfloat162 a) {
185 return float2{__bfloat162float(a.x), __bfloat162float(a.y)};
186}
187
193 return __hip_bfloat162{a, a};
194}
195
200__device__ short int __bfloat16_as_short(const __hip_bfloat16 h) { return (short)h.data; }
201
206__device__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h) { return h.data; }
207
212__HOST_DEVICE__ __hip_bfloat16 __double2bfloat16(const double a) {
213 return __float2bfloat16((float)a);
214}
215
222}
223
229 return __hip_bfloat162{a, b};
230}
231
236__device__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a) { return a.y; }
237
243 return __hip_bfloat162{a.y, a.y};
244}
245
250__HOST_DEVICE__ float __high2float(const __hip_bfloat162 a) { return __bfloat162float(a.y); }
251
257 return __hip_bfloat162{a.y, b.y};
258}
259
264__device__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a) { return a.x; }
265
271 return __hip_bfloat162{a.x, a.x};
272}
273
278__HOST_DEVICE__ float __low2float(const __hip_bfloat162 a) { return __bfloat162float(a.x); }
279
285 return __hip_bfloat162{a.y, a.x};
286}
287
293 return __hip_bfloat162{a.x, b.x};
294}
295
300__device__ __hip_bfloat16 __short_as_bfloat16(const short int a) {
301 return __hip_bfloat16{(unsigned short)a};
302}
303
308__device__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a) {
309 return __hip_bfloat16{a};
310}
311
312
320
328
336
342 const __hip_bfloat16 c) {
343 return __float2bfloat16(
344 __ocml_fma_f32(__bfloat162float(a), __bfloat162float(b), __bfloat162float(c)));
345}
346
354
360 auto ret = a;
361 ret.data ^= 0x8000;
362 return ret;
363}
364
370 auto ret = a;
371 ret.data &= 0x7FFF;
372 return ret;
373}
374
383
389 return __hip_bfloat162{__habs(a.x), __habs(a.y)};
390}
391
397 return __hip_bfloat162{__hadd(a.x, b.x), __hadd(a.y, b.y)};
398}
399
405 const __hip_bfloat162 c) {
406 return __hip_bfloat162{__hfma(a.x, b.x, c.x), __hfma(a.y, b.y, c.y)};
407}
408
414 return __hip_bfloat162{__hmul(a.x, b.x), __hmul(a.y, b.y)};
415}
416
422 return __hip_bfloat162{__hneg(a.x), __hneg(a.y)};
423}
424
430 return __hip_bfloat162{__hsub(a.x, b.x), __hsub(a.y, b.y)};
431}
432
437__device__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b) {
438 return __bfloat162float(a) == __bfloat162float(b);
439}
440
445__device__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b) {
446 return !(__bfloat162float(a) < __bfloat162float(b)) &&
448}
449
454__device__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
455 return __bfloat162float(a) > __bfloat162float(b);
456}
457
462__device__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
463 return !(__bfloat162float(a) <= __bfloat162float(b));
464}
465
470__device__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b) {
471 return __bfloat162float(a) >= __bfloat162float(b);
472}
473
478__device__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
479 return !(__bfloat162float(a) < __bfloat162float(b));
480}
481
486__device__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b) {
487 return __bfloat162float(a) != __bfloat162float(b);
488}
489
494__device__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
495 return !(__bfloat162float(a) == __bfloat162float(b));
496}
497
503 return __float2bfloat16(__ocml_fmax_f32(__bfloat162float(a), __bfloat162float(b)));
504}
505
511 return __float2bfloat16(__ocml_fmin_f32(__bfloat162float(a), __bfloat162float(b)));
512}
513
518__device__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b) {
519 return __bfloat162float(a) < __bfloat162float(b);
520}
521
526__device__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
527 return !(__bfloat162float(a) >= __bfloat162float(b));
528}
529
534__device__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b) {
535 return __bfloat162float(a) <= __bfloat162float(b);
536}
537
542__device__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b) {
543 return !(__bfloat162float(a) > __bfloat162float(b));
544}
545
550__device__ int __hisinf(const __hip_bfloat16 a) { return __ocml_isinf_f32(__bfloat162float(a)); }
551
556__device__ bool __hisnan(const __hip_bfloat16 a) { return __ocml_isnan_f32(__bfloat162float(a)); }
557
562__device__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
563 return __heq(a.x, b.x) && __heq(a.y, b.y);
564}
565
570__device__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
571 return __hequ(a.x, b.x) && __hequ(a.y, b.y);
572}
573
578__device__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
579 return __hge(a.x, b.x) && __hge(a.y, b.y);
580}
581
586__device__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
587 return __hgeu(a.x, b.x) && __hgeu(a.y, b.y);
588}
589
594__device__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
595 return __hgt(a.x, b.x) && __hgt(a.y, b.y);
596}
597
602__device__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
603 return __hgtu(a.x, b.x) && __hgtu(a.y, b.y);
604}
605
610__device__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
611 return __hle(a.x, b.x) && __hle(a.y, b.y);
612}
613
618__device__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
619 return __hleu(a.x, b.x) && __hleu(a.y, b.y);
620}
621
626__device__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
627 return __hlt(a.x, b.x) && __hlt(a.y, b.y);
628}
629
634__device__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
635 return __hltu(a.x, b.x) && __hltu(a.y, b.y);
636}
637
642__device__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
643 return __hne(a.x, b.x) && __hne(a.y, b.y);
644}
645
650__device__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b) {
651 return __hneu(a.x, b.x) && __hneu(a.y, b.y);
652}
653
659 return __hip_bfloat162{{__heq(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
660 {__heq(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
661}
662
668 return __hip_bfloat162{{__hge(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
669 {__hge(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
670}
671
677 return __hip_bfloat162{{__hgt(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
678 {__hgt(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
679}
680
686 return __hip_bfloat162{
687 {__ocml_isnan_f32(__bfloat162float(a.x)) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
688 {__ocml_isnan_f32(__bfloat162float(a.y)) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
689}
690
696 return __hip_bfloat162{{__hle(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
697 {__hle(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
698}
699
705 return __hip_bfloat162{{__hlt(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
706 {__hlt(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
707}
708
714 return __hip_bfloat162{
715 __float2bfloat16(__ocml_fmax_f32(__bfloat162float(a.x), __bfloat162float(b.x))),
716 __float2bfloat16(__ocml_fmax_f32(__bfloat162float(a.y), __bfloat162float(b.y)))};
717}
718
724 return __hip_bfloat162{
725 __float2bfloat16(__ocml_fmin_f32(__bfloat162float(a.x), __bfloat162float(b.x))),
726 __float2bfloat16(__ocml_fmin_f32(__bfloat162float(a.y), __bfloat162float(b.y)))};
727}
728
734 return __hip_bfloat162{{__hne(a.x, b.x) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)},
735 {__hne(a.y, b.y) ? __float2bfloat16(1.0f) : __float2bfloat16(0.0f)}};
736}
737
743 return __float2bfloat16(__ocml_ceil_f32(__bfloat162float(h)));
744}
745
750__device__ __hip_bfloat16 hcos(const __hip_bfloat16 h) {
751 return __float2bfloat16(__ocml_cos_f32(__bfloat162float(h)));
752}
753
758__device__ __hip_bfloat16 hexp(const __hip_bfloat16 h) {
759 return __float2bfloat16(__ocml_exp_f32(__bfloat162float(h)));
760}
761
767 return __float2bfloat16(__ocml_exp10_f32(__bfloat162float(h)));
768}
769
775 return __float2bfloat16(__ocml_exp2_f32(__bfloat162float(h)));
776}
777
783 return __float2bfloat16(__ocml_floor_f32(__bfloat162float(h)));
784}
785
790__device__ __hip_bfloat16 hlog(const __hip_bfloat16 h) {
791 return __float2bfloat16(__ocml_log_f32(__bfloat162float(h)));
792}
793
799 return __float2bfloat16(__ocml_log10_f32(__bfloat162float(h)));
800}
801
807 return __float2bfloat16(__ocml_log2_f32(__bfloat162float(h)));
808}
809
814__device__ __hip_bfloat16 hrcp(const __hip_bfloat16 h) {
815 return __float2bfloat16(1.0f / (__bfloat162float(h)));
816}
817
823 return __float2bfloat16(__ocml_rint_f32(__bfloat162float(h)));
824}
825
831 return __float2bfloat16(__ocml_rsqrt_f32(__bfloat162float(h)));
832}
833
838__device__ __hip_bfloat16 hsin(const __hip_bfloat16 h) {
839 return __float2bfloat16(__ocml_sin_f32(__bfloat162float(h)));
840}
841
847 return __float2bfloat16(__ocml_sqrt_f32(__bfloat162float(h)));
848}
849
855 return __float2bfloat16(__ocml_trunc_f32(__bfloat162float(h)));
856}
857
863 return __hip_bfloat162{hceil(h.x), hceil(h.y)};
864}
865
871 return __hip_bfloat162{hcos(h.x), hcos(h.y)};
872}
873
879 return __hip_bfloat162{hexp(h.x), hexp(h.y)};
880}
881
887 return __hip_bfloat162{hexp10(h.x), hexp10(h.y)};
888}
889
895 return __hip_bfloat162{hexp2(h.x), hexp2(h.y)};
896}
897
903 return __hip_bfloat162{hfloor(h.x), hfloor(h.y)};
904}
905
911 return __hip_bfloat162{hlog(h.x), hlog(h.y)};
912}
913
919 return __hip_bfloat162{hlog10(h.x), hlog10(h.y)};
920}
921
927 return __hip_bfloat162{hlog2(h.x), hlog2(h.y)};
928}
929
935 return __hip_bfloat162{hrcp(h.x), hrcp(h.y)};
936}
937
943 return __hip_bfloat162{hrint(h.x), hrint(h.y)};
944}
945
951 return __hip_bfloat162{hrsqrt(h.x), hrsqrt(h.y)};
952}
953
959 return __hip_bfloat162{hsin(h.x), hsin(h.y)};
960}
961
967 return __hip_bfloat162{hsqrt(h.x), hsqrt(h.y)};
968}
969
975 return __hip_bfloat162{htrunc(h.x), htrunc(h.y)};
976}
977
978#endif
Contains declarations for types and functions in device library. Uses int64_t and uint64_t instead of...
__device__ __hip_bfloat16 __habs(const __hip_bfloat16 a)
Returns absolute of a bfloat16.
Definition amd_hip_bf16.h:369
__device__ __hip_bfloat16 __hsub(const __hip_bfloat16 a, const __hip_bfloat16 b)
Subtracts two bfloat16 values.
Definition amd_hip_bf16.h:325
__device__ __hip_bfloat16 __hfma(const __hip_bfloat16 a, const __hip_bfloat16 b, const __hip_bfloat16 c)
Performs FMA of given bfloat16 values.
Definition amd_hip_bf16.h:341
__device__ __hip_bfloat16 __hdiv(const __hip_bfloat16 a, const __hip_bfloat16 b)
Divides two bfloat16 values.
Definition amd_hip_bf16.h:333
__device__ __hip_bfloat16 __hneg(const __hip_bfloat16 a)
Negate a bfloat16 value.
Definition amd_hip_bf16.h:359
__device__ __hip_bfloat16 __hmul(const __hip_bfloat16 a, const __hip_bfloat16 b)
Multiplies two bfloat16 values.
Definition amd_hip_bf16.h:351
__device__ __hip_bfloat16 __hadd(const __hip_bfloat16 a, const __hip_bfloat16 b)
Adds two bfloat16 values.
Definition amd_hip_bf16.h:317
__device__ bool __hge(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than equal.
Definition amd_hip_bf16.h:470
__device__ bool __hlt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than operator.
Definition amd_hip_bf16.h:518
__device__ bool __hneu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered not equal.
Definition amd_hip_bf16.h:494
__device__ bool __hgt(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - greater than.
Definition amd_hip_bf16.h:454
__device__ bool __heq(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values.
Definition amd_hip_bf16.h:437
__device__ bool __hltu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than.
Definition amd_hip_bf16.h:526
__device__ bool __hleu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered less than equal.
Definition amd_hip_bf16.h:542
__device__ __hip_bfloat16 __hmin(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return min.
Definition amd_hip_bf16.h:510
__device__ bool __hne(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - not equal.
Definition amd_hip_bf16.h:486
__device__ bool __hequ(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered equal.
Definition amd_hip_bf16.h:445
__device__ __hip_bfloat16 __hmax(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - return max.
Definition amd_hip_bf16.h:502
__device__ bool __hgtu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than.
Definition amd_hip_bf16.h:462
__device__ int __hisinf(const __hip_bfloat16 a)
Checks if number is inf.
Definition amd_hip_bf16.h:550
__device__ bool __hle(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - less than.
Definition amd_hip_bf16.h:534
__device__ bool __hisnan(const __hip_bfloat16 a)
Checks if number is nan.
Definition amd_hip_bf16.h:556
__device__ bool __hgeu(const __hip_bfloat16 a, const __hip_bfloat16 b)
Compare two bfloat162 values - unordered greater than equal.
Definition amd_hip_bf16.h:478
__device__ bool __hbgeu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b - unordered.
Definition amd_hip_bf16.h:586
__device__ __hip_bfloat162 __hge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:667
__device__ bool __hbltu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b - unordered.
Definition amd_hip_bf16.h:634
__device__ bool __hble2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b.
Definition amd_hip_bf16.h:610
__device__ __hip_bfloat162 __hmin2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Returns min of two elements.
Definition amd_hip_bf16.h:723
__device__ __hip_bfloat162 __hlt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:704
__device__ bool __hbequ2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal - unordered.
Definition amd_hip_bf16.h:570
__device__ bool __hbeq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks if two numbers are equal.
Definition amd_hip_bf16.h:562
__device__ __hip_bfloat162 __hle2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:695
__device__ __hip_bfloat162 __hmax2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Returns max of two elements.
Definition amd_hip_bf16.h:713
__device__ __hip_bfloat162 __heq2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b, returns 1.0 if equal, otherwise 0.0.
Definition amd_hip_bf16.h:658
__device__ __hip_bfloat162 __hne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Checks for not equal to.
Definition amd_hip_bf16.h:733
__device__ bool __hblt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a < b.
Definition amd_hip_bf16.h:626
__device__ bool __hbge2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a >= b.
Definition amd_hip_bf16.h:578
__device__ bool __hbgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b.
Definition amd_hip_bf16.h:594
__device__ bool __hbneu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:650
__device__ bool __hbleu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a <= b - unordered.
Definition amd_hip_bf16.h:618
__device__ __hip_bfloat162 __hisnan2(const __hip_bfloat162 a)
Check for a is NaN, returns 1.0 if NaN, otherwise 0.0.
Definition amd_hip_bf16.h:685
__device__ bool __hbne2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a != b.
Definition amd_hip_bf16.h:642
__device__ __hip_bfloat162 __hgt2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b, returns 1.0 if greater than equal, otherwise 0.0.
Definition amd_hip_bf16.h:676
__device__ bool __hbgtu2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Check for a > b - unordered.
Definition amd_hip_bf16.h:602
__device__ __hip_bfloat162 __hfma2(const __hip_bfloat162 a, const __hip_bfloat162 b, const __hip_bfloat162 c)
Performs FMA of given bfloat162 values.
Definition amd_hip_bf16.h:404
__device__ __hip_bfloat162 __h2div(const __hip_bfloat162 a, const __hip_bfloat162 b)
Divides bfloat162 values.
Definition amd_hip_bf16.h:379
__device__ __hip_bfloat162 __habs2(const __hip_bfloat162 a)
Returns absolute of a bfloat162.
Definition amd_hip_bf16.h:388
__device__ __hip_bfloat162 __hadd2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Adds two bfloat162 values.
Definition amd_hip_bf16.h:396
__device__ __hip_bfloat162 __hsub2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Subtracts two bfloat162 values.
Definition amd_hip_bf16.h:429
__device__ __hip_bfloat162 __hneg2(const __hip_bfloat162 a)
Converts a bfloat162 into negative.
Definition amd_hip_bf16.h:421
__device__ __hip_bfloat162 __hmul2(const __hip_bfloat162 a, const __hip_bfloat162 b)
Multiplies two bfloat162 values.
Definition amd_hip_bf16.h:413
__HOST_DEVICE__ float __bfloat162float(__hip_bfloat16 a)
Converts bfloat16 to float.
Definition amd_hip_bf16.h:126
__HOST_DEVICE__ __hip_bfloat16 __float2bfloat16(float f)
Converts float to bfloat16.
Definition amd_hip_bf16.h:140
__device__ unsigned short int __bfloat16_as_ushort(const __hip_bfloat16 h)
Reinterprets bits in a __hip_bfloat16 as an unsigned signed short integer.
Definition amd_hip_bf16.h:206
__HOST_DEVICE__ __hip_bfloat16 __double2bfloat16(const double a)
Convert double to __hip_bfloat16.
Definition amd_hip_bf16.h:212
__device__ __hip_bfloat16 __short_as_bfloat16(const short int a)
Reinterprets short int into a bfloat16.
Definition amd_hip_bf16.h:300
__HOST_DEVICE__ float __high2float(const __hip_bfloat162 a)
Converts high 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:250
__HOST_DEVICE__ __hip_bfloat162 __float22bfloat162_rn(const float2 a)
Convert float2 to __hip_bfloat162.
Definition amd_hip_bf16.h:220
__device__ __hip_bfloat162 __low2bfloat162(const __hip_bfloat162 a)
Returns low 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:270
__device__ __hip_bfloat16 __ushort_as_bfloat16(const unsigned short int a)
Reinterprets unsigned short int into a bfloat16.
Definition amd_hip_bf16.h:308
__device__ __hip_bfloat162 __highs2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b)
Extracts high 16 bits from each and combines them.
Definition amd_hip_bf16.h:256
__device__ __hip_bfloat162 __bfloat162bfloat162(const __hip_bfloat16 a)
Moves bfloat16 value to bfloat162.
Definition amd_hip_bf16.h:192
__HOST_DEVICE__ float __low2float(const __hip_bfloat162 a)
Converts low 16 bits of __hip_bfloat162 to float and returns the result.
Definition amd_hip_bf16.h:278
__device__ __hip_bfloat16 __high2bfloat16(const __hip_bfloat162 a)
Returns high 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:236
__device__ short int __bfloat16_as_short(const __hip_bfloat16 h)
Reinterprets bits in a __hip_bfloat16 as a signed short integer.
Definition amd_hip_bf16.h:200
__device__ __hip_bfloat162 __lowhigh2highlow(const __hip_bfloat162 a)
Swaps both halves.
Definition amd_hip_bf16.h:284
__device__ __hip_bfloat162 __halves2bfloat162(const __hip_bfloat16 a, const __hip_bfloat16 b)
Combine two __hip_bfloat16 to __hip_bfloat162.
Definition amd_hip_bf16.h:228
__device__ __hip_bfloat162 __lows2bfloat162(const __hip_bfloat162 a, const __hip_bfloat162 b)
Extracts low 16 bits from each and combines them.
Definition amd_hip_bf16.h:292
__device__ __hip_bfloat16 __low2bfloat16(const __hip_bfloat162 a)
Returns low 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:264
__device__ __hip_bfloat162 __high2bfloat162(const __hip_bfloat162 a)
Returns high 16 bits of __hip_bfloat162.
Definition amd_hip_bf16.h:242
__HOST_DEVICE__ float2 __bfloat1622float2(const __hip_bfloat162 a)
Converts and moves bfloat162 to float2.
Definition amd_hip_bf16.h:184
__device__ __hip_bfloat16 hexp(const __hip_bfloat16 h)
Calculate exponential of bfloat16.
Definition amd_hip_bf16.h:758
__device__ __hip_bfloat16 hrint(const __hip_bfloat16 h)
Round to nearest int.
Definition amd_hip_bf16.h:822
__device__ __hip_bfloat16 hrsqrt(const __hip_bfloat16 h)
Reciprocal square root.
Definition amd_hip_bf16.h:830
__device__ __hip_bfloat16 hcos(const __hip_bfloat16 h)
Calculate cosine of bfloat16.
Definition amd_hip_bf16.h:750
__device__ __hip_bfloat16 htrunc(const __hip_bfloat16 h)
Calculate truncate of bfloat16.
Definition amd_hip_bf16.h:854
__device__ __hip_bfloat16 hlog10(const __hip_bfloat16 h)
Calculate log 10 of bfloat16.
Definition amd_hip_bf16.h:798
__device__ __hip_bfloat16 hexp10(const __hip_bfloat16 h)
Calculate exponential 10 of bfloat16.
Definition amd_hip_bf16.h:766
__device__ __hip_bfloat16 hceil(const __hip_bfloat16 h)
Calculate ceil of bfloat16.
Definition amd_hip_bf16.h:742
__device__ __hip_bfloat16 hrcp(const __hip_bfloat16 h)
Calculate reciprocal.
Definition amd_hip_bf16.h:814
__device__ __hip_bfloat16 hsqrt(const __hip_bfloat16 h)
Calculate sqrt of bfloat16.
Definition amd_hip_bf16.h:846
__device__ __hip_bfloat16 hfloor(const __hip_bfloat16 h)
Calculate floor of bfloat16.
Definition amd_hip_bf16.h:782
__device__ __hip_bfloat16 hsin(const __hip_bfloat16 h)
Calculate sin of bfloat16.
Definition amd_hip_bf16.h:838
__device__ __hip_bfloat16 hlog(const __hip_bfloat16 h)
Calculate natural log of bfloat16.
Definition amd_hip_bf16.h:790
__device__ __hip_bfloat16 hlog2(const __hip_bfloat16 h)
Calculate log 2 of bfloat16.
Definition amd_hip_bf16.h:806
__device__ __hip_bfloat16 hexp2(const __hip_bfloat16 h)
Calculate exponential 2 of bfloat16.
Definition amd_hip_bf16.h:774
__device__ __hip_bfloat162 h2sin(const __hip_bfloat162 h)
Calculate sin of bfloat162.
Definition amd_hip_bf16.h:958
__device__ __hip_bfloat162 h2log(const __hip_bfloat162 h)
Calculate natural log of bfloat162.
Definition amd_hip_bf16.h:910
__device__ __hip_bfloat162 h2log2(const __hip_bfloat162 h)
Calculate log 2 of bfloat162.
Definition amd_hip_bf16.h:926
__device__ __hip_bfloat162 h2sqrt(const __hip_bfloat162 h)
Calculate sqrt of bfloat162.
Definition amd_hip_bf16.h:966
__device__ __hip_bfloat162 h2log10(const __hip_bfloat162 h)
Calculate log 10 of bfloat162.
Definition amd_hip_bf16.h:918
__device__ __hip_bfloat162 h2ceil(const __hip_bfloat162 h)
Calculate ceil of bfloat162.
Definition amd_hip_bf16.h:862
__device__ __hip_bfloat162 h2rint(const __hip_bfloat162 h)
Calculate vector round to nearest int.
Definition amd_hip_bf16.h:942
__device__ __hip_bfloat162 h2rcp(const __hip_bfloat162 h)
Calculate vector reciprocal.
Definition amd_hip_bf16.h:934
__device__ __hip_bfloat162 h2rsqrt(const __hip_bfloat162 h)
Calculate vector reciprocal square root.
Definition amd_hip_bf16.h:950
__device__ __hip_bfloat162 h2cos(const __hip_bfloat162 h)
Calculate cosine of bfloat162.
Definition amd_hip_bf16.h:870
__device__ __hip_bfloat162 h2floor(const __hip_bfloat162 h)
Calculate floor of bfloat162.
Definition amd_hip_bf16.h:902
__device__ __hip_bfloat162 h2exp10(const __hip_bfloat162 h)
Calculate exponential 10 of bfloat162.
Definition amd_hip_bf16.h:886
__device__ __hip_bfloat162 h2trunc(const __hip_bfloat162 h)
Calculate truncate of bfloat162.
Definition amd_hip_bf16.h:974
__device__ __hip_bfloat162 h2exp2(const __hip_bfloat162 h)
Calculate exponential 2 of bfloat162.
Definition amd_hip_bf16.h:894
__device__ __hip_bfloat162 h2exp(const __hip_bfloat162 h)
Calculate exponential of bfloat162.
Definition amd_hip_bf16.h:878
Struct to represent a 16 bit brain floating point number.
Definition amd_hip_bf16.h:112
Struct to represent two 16 bit brain floating point numbers.
Definition amd_hip_bf16.h:117
Definition amd_hip_vector_types.h:1986