This repository has been archived by the owner on Sep 21, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 46
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add string encoding utility functions
- Loading branch information
1 parent
0a1699c
commit 490f269
Showing
5 changed files
with
128 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
/** | ||
* @file libtransistor/encoding.h | ||
* @brief String encoding functions | ||
* Based on https://github.com/nothings/stb | ||
*/ | ||
|
||
#pragma once | ||
|
||
#ifdef __cplusplus | ||
extern "C" { | ||
#endif | ||
|
||
#include<stdint.h> | ||
#include<string.h> | ||
|
||
typedef uint16_t trn_char16_t; | ||
|
||
/** | ||
* @brief Converts a UTF-8 string to UTF-16 | ||
* Writes at most n characters to out, returning NULL if there is an error. | ||
*/ | ||
trn_char16_t *trn_utf8_to_utf16(trn_char16_t *out, const char *in, size_t n); | ||
|
||
/** | ||
* @brief Converts a UTF-16 string to UTF-8 | ||
* Writes at most n characters to out, returning NULL if there is an error. | ||
*/ | ||
char *trn_utf16_to_utf8(char *out, const trn_char16_t *in, size_t n); | ||
|
||
#ifdef __cplusplus | ||
} | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
#include<libtransistor/encoding.h> | ||
|
||
#include<stdint.h> | ||
#include<stdlib.h> | ||
#include<stddef.h> | ||
|
||
// based on stb.h | ||
|
||
trn_char16_t *trn_utf8_to_utf16(trn_char16_t *buffer, const char *ostr, size_t n) | ||
{ | ||
unsigned char *str = (unsigned char *) ostr; | ||
uint32_t c; | ||
size_t i=0; | ||
--n; | ||
while (*str) { | ||
if (i >= n) | ||
return NULL; | ||
if (!(*str & 0x80)) | ||
buffer[i++] = *str++; | ||
else if ((*str & 0xe0) == 0xc0) { | ||
if (*str < 0xc2) return NULL; | ||
c = (*str++ & 0x1f) << 6; | ||
if ((*str & 0xc0) != 0x80) return NULL; | ||
buffer[i++] = c + (*str++ & 0x3f); | ||
} else if ((*str & 0xf0) == 0xe0) { | ||
if (*str == 0xe0 && (str[1] < 0xa0 || str[1] > 0xbf)) return NULL; | ||
if (*str == 0xed && str[1] > 0x9f) return NULL; // str[1] < 0x80 is checked below | ||
c = (*str++ & 0x0f) << 12; | ||
if ((*str & 0xc0) != 0x80) return NULL; | ||
c += (*str++ & 0x3f) << 6; | ||
if ((*str & 0xc0) != 0x80) return NULL; | ||
buffer[i++] = c + (*str++ & 0x3f); | ||
} else if ((*str & 0xf8) == 0xf0) { | ||
if (*str > 0xf4) return NULL; | ||
if (*str == 0xf0 && (str[1] < 0x90 || str[1] > 0xbf)) return NULL; | ||
if (*str == 0xf4 && str[1] > 0x8f) return NULL; // str[1] < 0x80 is checked below | ||
c = (*str++ & 0x07) << 18; | ||
if ((*str & 0xc0) != 0x80) return NULL; | ||
c += (*str++ & 0x3f) << 12; | ||
if ((*str & 0xc0) != 0x80) return NULL; | ||
c += (*str++ & 0x3f) << 6; | ||
if ((*str & 0xc0) != 0x80) return NULL; | ||
c += (*str++ & 0x3f); | ||
// utf-8 encodings of values used in surrogate pairs are invalid | ||
if ((c & 0xFFFFF800) == 0xD800) return NULL; | ||
if (c >= 0x10000) { | ||
c -= 0x10000; | ||
if (i + 2 > n) return NULL; | ||
buffer[i++] = 0xD800 | (0x3ff & (c >> 10)); | ||
buffer[i++] = 0xDC00 | (0x3ff & (c )); | ||
} | ||
} else | ||
return NULL; | ||
} | ||
buffer[i] = 0; | ||
return buffer; | ||
} | ||
|
||
char *trn_utf16_to_utf8(char *buffer, const trn_char16_t *str, size_t n) | ||
{ | ||
size_t i=0; | ||
--n; | ||
while (*str) { | ||
if (*str < 0x80) { | ||
if (i+1 > n) return NULL; | ||
buffer[i++] = (char) *str++; | ||
} else if (*str < 0x800) { | ||
if (i+2 > n) return NULL; | ||
buffer[i++] = 0xc0 + (*str >> 6); | ||
buffer[i++] = 0x80 + (*str & 0x3f); | ||
str += 1; | ||
} else if (*str >= 0xd800 && *str < 0xdc00) { | ||
uint32_t c; | ||
if (i+4 > n) return NULL; | ||
c = ((str[0] - 0xd800) << 10) + ((str[1]) - 0xdc00) + 0x10000; | ||
buffer[i++] = 0xf0 + (c >> 18); | ||
buffer[i++] = 0x80 + ((c >> 12) & 0x3f); | ||
buffer[i++] = 0x80 + ((c >> 6) & 0x3f); | ||
buffer[i++] = 0x80 + ((c ) & 0x3f); | ||
str += 2; | ||
} else if (*str >= 0xdc00 && *str < 0xe000) { | ||
return NULL; | ||
} else { | ||
if (i+3 > n) return NULL; | ||
buffer[i++] = 0xe0 + (*str >> 12); | ||
buffer[i++] = 0x80 + ((*str >> 6) & 0x3f); | ||
buffer[i++] = 0x80 + ((*str ) & 0x3f); | ||
str += 1; | ||
} | ||
} | ||
buffer[i] = 0; | ||
return buffer; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters