16bit-Unicode(UCS2)とUTF8との相互変換。
必要に迫られて書いてました。コード表片手にゴリゴリと。
#include <string>
// UTF16BE→UTF8
std::string utf16be_to_utf8(const std::wstring& utf16) {
std::string utf8;
utf8.reserve(utf16.size());
for ( size_t i = 0; i < utf16.size(); ++i ) {
unsigned short wch = static_cast<unsigned short>(utf16.at(i));
if ( wch <= 0x007f ) {
utf8 += ((wch & 0x007f) );
} else
if ( wch <= 0x07ff ) {
utf8 += ((wch & 0x07c0) >> 6) | 0xc0;
utf8 += ((wch & 0x003f) ) | 0x80;
} else {
utf8 += ((wch & 0xf000) >> 12) | 0xe0;
utf8 += ((wch & 0x0fc0) >> 6) | 0x80;
utf8 += ((wch & 0x003f) ) | 0x80;
}
}
return utf8;
}
// UTF8→UTF16BE
std::wstring utf8_to_utf16be(const std::string& utf8) {
std::wstring utf16;
utf16.reserve(utf8.size());
for ( size_t i = 0; i < utf8.size(); ++i ) {
unsigned char ch0 = utf8.at(i);
if ( (ch0 & 0x80) == 0x00 ) {
utf16 += ((ch0 & 0x7f) );
} else
if ( (ch0 & 0xe0) == 0xc0 ) {
unsigned char ch1 = utf8.at(++i);
utf16 += ((ch0 & 0x3f) << 6)
| ((ch1 & 0x3f) );
} else {
unsigned char ch1 = utf8.at(++i);
unsigned char ch2 = utf8.at(++i);
utf16 += ((ch0 & 0x0f) << 12)
| ((ch1 & 0x3f) << 6)
| ((ch2 & 0x3f) );
}
}
return utf16;
}
間違えちゃいないと思うけど...