Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions src/mat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1331,6 +1331,95 @@ float float16_to_float32(unsigned short value)
return tmp.f;
}

unsigned char float16_to_float8(unsigned short value)
{
// 1 : 5 : 10 -> 1 : 4 : 3 (E4M3)
unsigned short sign = (value & 0x8000) >> 15;
unsigned short exponent = (value & 0x7c00) >> 10;
unsigned short significand = value & 0x03FF;

// 1 : 4 : 3
unsigned char fp8;
if (exponent == 0)
{
// zero or denormal, always underflow to zero
fp8 = (sign << 7) | (0x0 << 3) | 0x0;
}
else if (exponent == 0x1F)
{
// infinity or NaN
if (significand == 0)
{
// infinity -> NaN (E4M3 has no infinity)
fp8 = (sign << 7) | (0xF << 3) | 0x7;
}
else
{
// NaN -> NaN
fp8 = (sign << 7) | (0xF << 3) | 0x7;
}
}
else
{
// normalized
short newexp = exponent + (-15 + 7);
if (newexp >= 15)
{
// overflow, return NaN (E4M3 has no infinity)
fp8 = (sign << 7) | (0xF << 3) | 0x7;
}
else if (newexp <= 0)
{
// underflow to zero
fp8 = (sign << 7) | (0x0 << 3) | 0x0;
}
else
{
// normal fp8
fp8 = (sign << 7) | (newexp << 3) | (significand >> 7);
}
}

return fp8;
}

unsigned short float8_to_float16(unsigned char value)
{
// 1 : 4 : 3 -> 1 : 5 : 10 (E4M3)
unsigned char sign = (value & 0x80) >> 7;
unsigned char exponent = (value & 0x78) >> 3;
unsigned char significand = value & 0x07;

// 1 : 5 : 10
unsigned short fp16;
if (exponent == 0)
{
if (significand == 0)
{
// zero
fp16 = (sign << 15) | (0x00 << 10) | 0x00;
}
else
{
// denormal (should not happen in E4M3, but handle it)
fp16 = (sign << 15) | (0x00 << 10) | 0x00;
}
}
else if (exponent == 0xF)
{
// NaN (E4M3 has no infinity)
fp16 = (sign << 15) | (0x1F << 10) | 0x200;
}
else
{
// normalized
unsigned short newexp = exponent + (-7 + 15);
fp16 = (sign << 15) | (newexp << 10) | (significand << 7);
}

return fp16;
}

void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int type, float v, const Option& opt)
{
Layer* padding = create_layer(LayerType::Padding);
Expand Down
18 changes: 18 additions & 0 deletions src/mat.h
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,24 @@ NCNN_EXPORT NCNN_FORCEINLINE float bfloat16_to_float32(unsigned short value)
tmp.u = value << 16;
return tmp.f;
}
// convert float16 to float8 e4m3
NCNN_EXPORT unsigned char float16_to_float8(unsigned short value);
// convert float8 e4m3 to float16
NCNN_EXPORT unsigned short float8_to_float16(unsigned char value);
// convert float16 to bfloat8 e5m2
NCNN_EXPORT NCNN_FORCEINLINE unsigned char float16_to_bfloat8(unsigned short value)
{
// 1 : 5 : 10 -> 1 : 5 : 2
// direct truncation for bfloat8 e5m2, similar to bfloat16
return value >> 8;
}
// convert bfloat8 e5m2 to float16
NCNN_EXPORT NCNN_FORCEINLINE unsigned short bfloat8_to_float16(unsigned char value)
{
// 1 : 5 : 2 -> 1 : 5 : 10
// direct extension for bfloat8 e5m2, similar to bfloat16
return value << 8;
}

// mat process
enum BorderType
Expand Down
Loading