43 "ASCII",
"UTF-8",
"UTF-16LE",
"UTF-16BE",
"ISO-8859-15",
"WINDOWS-1250",
"WINDOWS-1251",
44 "WINDOWS-1252",
"CP932",
"CP936",
"CP949",
"CP950" 48 1, 1, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4
52 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1
56 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1
102 throw Exception(
"Invalid encoding %d", encoding);
109 throw Exception(
"Invalid encoding %d", encoding);
120 size_t inBytes = nIn;
121 size_t outBytes = nOut;
128 iconv(ctx, 0, 0, 0, 0);
131 if (iconv(ctx, const_cast<ICONV_CONST char **>(reinterpret_cast<char **>(&data)), &inBytes,
132 reinterpret_cast<char **>(&outBuf), &outBytes) == ((
size_t) -1)) {
134 warning(
"iconv() failed: %s", strerror(errno));
138 size = nOut - outBytes;
144 if (ctx == ((iconv_t) -1))
152 while (termSize-- > 0)
153 dataOut[size++] =
'\0';
155 return UString(reinterpret_cast<const char *>(dataOut.
get()));
159 if (ctx == ((iconv_t) -1))
163 size_t nIn = std::strlen(str.
c_str());
164 size_t nOut = nIn * growth + termSize;
171 while (termSize-- > 0)
172 dataOut[size++] =
'\0';
180 #define ConvMan Common::ConversionManager::instance() 212 if (stream.
read(data, 1) != 1)
218 if (stream.
read(data, 2) != 2)
221 return READ_LE_UINT16(data);
224 if (stream.
read(data, 2) != 2)
227 return READ_BE_UINT16(data);
254 WRITE_LE_UINT16(data, c);
255 output.push_back(data[0]);
256 output.push_back(data[1]);
260 WRITE_BE_UINT16(data, c);
261 output.push_back(data[0]);
262 output.push_back(data[1]);
277 output.push_back(
'\0');
278 return UString(reinterpret_cast<const char *>(&output[0]));
281 return ConvMan.convert(encoding, &output[0], output.size());
288 std::vector<byte> output;
291 while (((c =
readFakeChar(stream, encoding)) !=
'\0') && !stream.
eos())
301 std::vector<byte> output;
302 output.resize(length);
304 length = stream.
read(&output[0], length);
305 output.resize(length);
311 std::vector<byte> output;
314 while (((c =
readFakeChar(stream, encoding)) !=
'\0') && !stream.
eos()) {
331 std::vector<byte> output;
334 std::memcpy(&output[0], data, size);
361 std::strlen(str.
c_str()) + (terminateString ? 1 : 0));
363 return ConvMan.convert(encoding, str, terminateString);
384 throw Exception(
"getBytesPerCodepoint(): Encoding with variable number of bytes per codepoint");
390 throw Exception(
"getBytesPerCodepoint(): Invalid encoding (%d)", (
int)encoding);
402 return (cp <= 0x7F) || (cp >= 0xA0);
405 return (cp != 0x81) && (cp != 0x83) && (cp != 0x88) &&
406 (cp != 0x90) && (cp != 0x98);
412 return (cp != 0x81) && (cp != 0x8D) && (cp != 0x8F) &&
413 (cp != 0x90) && (cp != 0x9D);
A manager handling string encoding conversions.
Class and macro for implementing singletons.
UString convert(Encoding encoding, byte *data, size_t n)
static const size_t kTerminatorLength[kEncodingMAX]
iconv_t _contextFrom[kEncodingMAX]
bool hasSupportEncoding(Encoding encoding)
Do we have support for this encoding?
A class holding an UTF-8 string.
UString getEncodingName(Encoding encoding)
Return the human readable name of an encoding.
PointerType release()
Returns the plain pointer value and releases ScopedPtr.
virtual bool eos() const =0
Returns true if a read failed because the stream has been reached.
Implementing the reading stream interfaces for plain memory blocks.
size_t writeString(WriteStream &stream, const Common::UString &str, Encoding encoding, bool terminate)
Write a string into a stream with a given encoding.
UTF-16 LE (little endian).
MemoryReadStream * convert(Encoding encoding, const UString &str, bool terminate=true)
A simple scoped smart pointer template.
UString convert(iconv_t &ctx, byte *data, size_t n, size_t growth, size_t termSize)
static void writeFakeChar(std::vector< byte > &output, uint32 c, Encoding encoding)
size_t getBytesPerCodepoint(Encoding encoding)
Return the number of bytes per codepoint in this encoding.
byte * doConvert(iconv_t &ctx, byte *data, size_t nIn, size_t nOut, size_t &size)
Basic exceptions to throw.
MemoryReadStream * convert(iconv_t &ctx, const UString &str, size_t growth, size_t termSize)
UString readStringLine(SeekableReadStream &stream, Encoding encoding)
Read a line with the given encoding out of a stream.
const char * c_str() const
Return the (utf8 encoded) string data.
Generic template base class for implementing the singleton design pattern.
Windows codepage 950 (Traditional Chinese, similar to Big5).
#define DECLARE_SINGLETON(T)
Note that you need to use this macro from the global namespace.
static const char *const kEncodingName[kEncodingMAX]
Basic writing stream interfaces.
bool hasSupportTranscode(Encoding from, Encoding to)
virtual size_t read(void *dataPtr, size_t dataSize)=0
Read data from the stream.
Simple memory based 'stream', which implements the ReadStream interface for a plain memory block...
static UString createString(std::vector< byte > &output, Encoding encoding)
Utility functions for working with differing string encodings.
static const size_t kEncodingGrowthTo[kEncodingMAX]
A scoped plain pointer, allowing pointer-y access and normal deletion.
void writeByte(byte value)
void warning(const char *s,...)
Generic interface for a writable data stream.
Plain, unextended ASCII (7bit clean).
PointerType get() const
Returns the plain pointer value.
Windows codepage 932 (Japanese, extended Shift-JIS).
size_t writeStream(ReadStream &stream, size_t n)
Copy n bytes of the given stream into the stream.
Windows codepage 1250 (Eastern European, Latin alphabet).
UString readString(SeekableReadStream &stream, Encoding encoding)
Read a string with the given encoding of a stream.
Windows codepage 1251 (Eastern European, Cyrillic alphabet).
UString readStringFixed(SeekableReadStream &stream, Encoding encoding, size_t length)
Read length bytes as a string with the given encoding out of a stream.
Windows codepage 1252 (Western European, Latin alphabet).
static const size_t kEncodingGrowthFrom[kEncodingMAX]
iconv_t _contextTo[kEncodingMAX]
bool isValidCodepoint(Encoding encoding, uint32 cp)
Return whether the given codepoint is valid in this encoding.
MemoryReadStream * convertString(const UString &str, Encoding encoding, bool terminateString)
Convert a string into the given encoding.
Windows codepage 949 (Korean, similar to EUC-KR).
Windows codepage 936 (Simplified Chinese, extended GB2312 with GBK codepoints).
Interface for a seekable & readable data stream.
static uint32 readFakeChar(SeekableReadStream &stream, Encoding encoding)
void writeStringFixed(WriteStream &stream, const Common::UString &str, Encoding encoding, size_t length)
Write a string into a stream with a given encoding and fixed length in bytes.