Implementing UTF8 to ISO-8859-1 conversion to improve chances of finding files in zip archives
parent
0bf4aea9d5
commit
e2676ec176
|
@ -379,6 +379,43 @@ void BaseImporter::ConvertToUTF8(std::vector<char>& data)
|
|||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
// Convert to UTF8 data to ISO-8859-1
|
||||
void BaseImporter::ConvertUTF8toISO8859_1(std::string& data)
|
||||
{
|
||||
unsigned int size = data.size();
|
||||
unsigned int i = 0, j = 0;
|
||||
|
||||
while(i < size) {
|
||||
if((unsigned char) data[i] < 0x80) {
|
||||
data[j] = data[i];
|
||||
} else if(i < size - 1) {
|
||||
if((unsigned char) data[i] == 0xC2) {
|
||||
data[j] = data[++i];
|
||||
} else if((unsigned char) data[i] == 0xC3) {
|
||||
data[j] = ((unsigned char) data[++i] + 0x40);
|
||||
} else {
|
||||
std::stringstream stream;
|
||||
|
||||
stream << "UTF8 code " << std::hex << data[i] << data[i + 1] << " can not be converted into ISA-8859-1.";
|
||||
|
||||
DefaultLogger::get()->error(stream.str());
|
||||
|
||||
data[j++] = data[i++];
|
||||
data[j] = data[i];
|
||||
}
|
||||
} else {
|
||||
DefaultLogger::get()->error("UTF8 code but only one character remaining");
|
||||
|
||||
data[j] = data[i];
|
||||
}
|
||||
|
||||
i++; j++;
|
||||
}
|
||||
|
||||
data.resize(j);
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
void BaseImporter::TextFileToBuffer(IOStream* stream,
|
||||
std::vector<char>& data)
|
||||
|
|
|
@ -331,6 +331,15 @@ public: // static utilities
|
|||
static void ConvertToUTF8(
|
||||
std::vector<char>& data);
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
/** An utility for all text file loaders. It converts a file from our
|
||||
* UTF8 character set back to ISO-8859-1. Errors are reported, but ignored.
|
||||
*
|
||||
* @param data File buffer to be converted from UTF8 to ISO-8859-1. The buffer
|
||||
* is resized as appropriate. */
|
||||
static void ConvertUTF8toISO8859_1(
|
||||
std::string& data);
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
/** Utility for text file loaders which copies the contents of the
|
||||
* file into a memory buffer and converts it to our UTF8
|
||||
|
|
Loading…
Reference in New Issue