This is a simple C or C++ code for UTF-8 decoder
// given that this is first byte of the character,
// how many bytes is the character occupy?
int NumberOfUTF8Chars(unsigned char ch)
{
if (ch < 0x80u) return 1;
else if (ch < 0xE0u) return 2;
else if (ch < 0xF0u) return 3;
else if (ch < 0xF8u) return 4;
else if (ch < 0xFCu) return 5;
else return 6;
}
// given that this is first byte of the character,
// what is the code value of that character?
unsigned int ValueOfUTF8Code(const char* ch)
{
unsigned int Value;
int Size = NumberOfUTF8Chars( *ch );
switch( Size )
{
case 6:
Value = ch[0] & 0x01;
break;
case 5:
Value = ch[0] & 0x03;
break;
case 4:
Value = ch[0] & 0x07;
break;
case 3:
Value = ch[0] & 0x0F;
break;
case 2:
Value = ch[0] & 0x1F;
break;
case 1:
Value = ch[0];
}
for ( int i= 1; i < Size; i++ )
{
Value = Value << 6 | ch[i] & 0x3F;
}
return Value;
}
This code was never actually tested. Use it on your own risk.
Advertisement
Thank you !