Implementing UTF8 to ISO-8859-1 conversion to improve chances of finding files in zip archives
parent
0bf4aea9d5
commit
e2676ec176
|
@ -379,6 +379,43 @@ void BaseImporter::ConvertToUTF8(std::vector<char>& data)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------------
|
||||||
|
// Convert to UTF8 data to ISO-8859-1
|
||||||
|
void BaseImporter::ConvertUTF8toISO8859_1(std::string& data)
|
||||||
|
{
|
||||||
|
unsigned int size = data.size();
|
||||||
|
unsigned int i = 0, j = 0;
|
||||||
|
|
||||||
|
while(i < size) {
|
||||||
|
if((unsigned char) data[i] < 0x80) {
|
||||||
|
data[j] = data[i];
|
||||||
|
} else if(i < size - 1) {
|
||||||
|
if((unsigned char) data[i] == 0xC2) {
|
||||||
|
data[j] = data[++i];
|
||||||
|
} else if((unsigned char) data[i] == 0xC3) {
|
||||||
|
data[j] = ((unsigned char) data[++i] + 0x40);
|
||||||
|
} else {
|
||||||
|
std::stringstream stream;
|
||||||
|
|
||||||
|
stream << "UTF8 code " << std::hex << data[i] << data[i + 1] << " can not be converted into ISA-8859-1.";
|
||||||
|
|
||||||
|
DefaultLogger::get()->error(stream.str());
|
||||||
|
|
||||||
|
data[j++] = data[i++];
|
||||||
|
data[j] = data[i];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
DefaultLogger::get()->error("UTF8 code but only one character remaining");
|
||||||
|
|
||||||
|
data[j] = data[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
i++; j++;
|
||||||
|
}
|
||||||
|
|
||||||
|
data.resize(j);
|
||||||
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------------------------------------------
|
// ------------------------------------------------------------------------------------------------
|
||||||
void BaseImporter::TextFileToBuffer(IOStream* stream,
|
void BaseImporter::TextFileToBuffer(IOStream* stream,
|
||||||
std::vector<char>& data)
|
std::vector<char>& data)
|
||||||
|
|
|
@ -331,6 +331,15 @@ public: // static utilities
|
||||||
static void ConvertToUTF8(
|
static void ConvertToUTF8(
|
||||||
std::vector<char>& data);
|
std::vector<char>& data);
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------
|
||||||
|
/** An utility for all text file loaders. It converts a file from our
|
||||||
|
* UTF8 character set back to ISO-8859-1. Errors are reported, but ignored.
|
||||||
|
*
|
||||||
|
* @param data File buffer to be converted from UTF8 to ISO-8859-1. The buffer
|
||||||
|
* is resized as appropriate. */
|
||||||
|
static void ConvertUTF8toISO8859_1(
|
||||||
|
std::string& data);
|
||||||
|
|
||||||
// -------------------------------------------------------------------
|
// -------------------------------------------------------------------
|
||||||
/** Utility for text file loaders which copies the contents of the
|
/** Utility for text file loaders which copies the contents of the
|
||||||
* file into a memory buffer and converts it to our UTF8
|
* file into a memory buffer and converts it to our UTF8
|
||||||
|
|
Loading…
Reference in New Issue