From e2676ec1767b4ba4dcdc93aa9b2e07b0a2c4185b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9o=20Terziman?= Date: Tue, 26 Nov 2013 16:34:16 +0100 Subject: [PATCH] Implementing UTF8 to ISO-8859-1 conversion to improve chances of finding files in zip archives --- code/BaseImporter.cpp | 37 +++++++++++++++++++++++++++++++++++++ code/BaseImporter.h | 9 +++++++++ 2 files changed, 46 insertions(+) diff --git a/code/BaseImporter.cpp b/code/BaseImporter.cpp index af36eccd5..dab4eb003 100644 --- a/code/BaseImporter.cpp +++ b/code/BaseImporter.cpp @@ -379,6 +379,43 @@ void BaseImporter::ConvertToUTF8(std::vector& data) } } +// ------------------------------------------------------------------------------------------------ +// Convert to UTF8 data to ISO-8859-1 +void BaseImporter::ConvertUTF8toISO8859_1(std::string& data) +{ + unsigned int size = data.size(); + unsigned int i = 0, j = 0; + + while(i < size) { + if((unsigned char) data[i] < 0x80) { + data[j] = data[i]; + } else if(i < size - 1) { + if((unsigned char) data[i] == 0xC2) { + data[j] = data[++i]; + } else if((unsigned char) data[i] == 0xC3) { + data[j] = ((unsigned char) data[++i] + 0x40); + } else { + std::stringstream stream; + + stream << "UTF8 code " << std::hex << data[i] << data[i + 1] << " can not be converted into ISA-8859-1."; + + DefaultLogger::get()->error(stream.str()); + + data[j++] = data[i++]; + data[j] = data[i]; + } + } else { + DefaultLogger::get()->error("UTF8 code but only one character remaining"); + + data[j] = data[i]; + } + + i++; j++; + } + + data.resize(j); +} + // ------------------------------------------------------------------------------------------------ void BaseImporter::TextFileToBuffer(IOStream* stream, std::vector& data) diff --git a/code/BaseImporter.h b/code/BaseImporter.h index 073a476f3..6d3594eaa 100644 --- a/code/BaseImporter.h +++ b/code/BaseImporter.h @@ -331,6 +331,15 @@ public: // static utilities static void ConvertToUTF8( std::vector& data); + // ------------------------------------------------------------------- + /** An utility for all text file loaders. It converts a file from our + * UTF8 character set back to ISO-8859-1. Errors are reported, but ignored. + * + * @param data File buffer to be converted from UTF8 to ISO-8859-1. The buffer + * is resized as appropriate. */ + static void ConvertUTF8toISO8859_1( + std::string& data); + // ------------------------------------------------------------------- /** Utility for text file loaders which copies the contents of the * file into a memory buffer and converts it to our UTF8