mirror of
https://github.com/SpartanJ/eepp.git
synced 2026-05-30 18:16:31 +03:00
Text encodings WIP.
This commit is contained in:
@@ -17,6 +17,7 @@ class EE_API TextFormat {
|
||||
UTF16LE = String::hash( "UTF-16 LE" ),
|
||||
UTF16BE = String::hash( "UTF-16 BE" ),
|
||||
Latin1 = String::hash( "ISO-8859-1" ),
|
||||
Shift_JIS = String::hash( "Shift_JIS" ),
|
||||
};
|
||||
|
||||
enum class LineEnding { LF, CRLF, CR };
|
||||
|
||||
@@ -15,6 +15,11 @@ class EE_API PlatformHelper {
|
||||
*/
|
||||
virtual bool openURL( const std::string& url ) = 0;
|
||||
|
||||
virtual char* iconv( const char* tocode, const char* fromcode, const char* inbuf,
|
||||
size_t inbytesleft ) = 0;
|
||||
|
||||
virtual void iconvFree( char* buf ) = 0;
|
||||
|
||||
#if EE_PLATFORM == EE_PLATFORM_ANDROID
|
||||
/** @return The Activity object for the application */
|
||||
virtual void* getActivity() = 0;
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#include <cstdio>
|
||||
#include <eepp/core/debug.hpp>
|
||||
#include <eepp/core/debug.hpp>
|
||||
#include <eepp/network/uri.hpp>
|
||||
#include <eepp/system/filesystem.hpp>
|
||||
#include <eepp/system/iostreamfile.hpp>
|
||||
@@ -12,6 +11,7 @@
|
||||
#include <eepp/ui/doc/syntaxdefinitionmanager.hpp>
|
||||
#include <eepp/ui/doc/syntaxhighlighter.hpp>
|
||||
#include <eepp/ui/doc/textdocument.hpp>
|
||||
#include <eepp/window/engine.hpp>
|
||||
#include <string>
|
||||
|
||||
using namespace std::literals;
|
||||
@@ -104,6 +104,17 @@ void TextDocument::resetCursor() {
|
||||
notifySelectionChanged();
|
||||
}
|
||||
|
||||
String shiftJISToUTF32( const std::string_view& shiftJISString ) {
|
||||
String string;
|
||||
auto* ret = Window::Engine::instance()->getPlatformHelper()->iconv(
|
||||
"UTF-32LE", "SHIFT-JIS", shiftJISString.data(), shiftJISString.size() );
|
||||
if ( ret ) {
|
||||
string = String( reinterpret_cast<String::StringBaseType*>( ret ) );
|
||||
Window::Engine::instance()->getPlatformHelper()->iconvFree( ret );
|
||||
}
|
||||
return string;
|
||||
}
|
||||
|
||||
static constexpr int codepointSize( TextFormat::Encoding enc ) {
|
||||
switch ( enc ) {
|
||||
case TextFormat::Encoding::UTF16LE:
|
||||
@@ -167,7 +178,9 @@ static String ptrGetLine( char* data, const size_t& size, size_t& position,
|
||||
position++;
|
||||
}
|
||||
|
||||
if ( enc == TextFormat::Encoding::Latin1 )
|
||||
if ( enc == TextFormat::Encoding::Shift_JIS )
|
||||
return shiftJISToUTF32( std::string_view{ data, position } );
|
||||
else if ( enc == TextFormat::Encoding::Latin1 )
|
||||
return String::fromLatin1( data, position );
|
||||
|
||||
return String( data, position );
|
||||
@@ -687,6 +700,7 @@ bool TextDocument::save( IOStream& stream, bool keepUndoRedoStatus ) {
|
||||
MD5::update( md5Ctx, bom, sizeof( bom ) );
|
||||
break;
|
||||
}
|
||||
case TextFormat::Encoding::Shift_JIS:
|
||||
case TextFormat::Encoding::Latin1:
|
||||
break;
|
||||
}
|
||||
@@ -764,6 +778,15 @@ bool TextDocument::save( IOStream& stream, bool keepUndoRedoStatus ) {
|
||||
MD5::update( md5Ctx, latin1.data(), latin1.size() );
|
||||
break;
|
||||
}
|
||||
case TextFormat::Encoding::Shift_JIS: {
|
||||
auto* ret = Window::Engine::instance()->getPlatformHelper()->iconv(
|
||||
"SHIFT-JIS", "UTF-8", text.c_str(), text.size() );
|
||||
auto len = strlen( ret );
|
||||
stream.write( ret, len );
|
||||
MD5::update( md5Ctx, ret, len );
|
||||
Window::Engine::instance()->getPlatformHelper()->iconvFree( ret );
|
||||
break;
|
||||
}
|
||||
case TextFormat::Encoding::UTF8: {
|
||||
stream.write( text.c_str(), text.size() );
|
||||
MD5::update( md5Ctx, text.data(), text.size() );
|
||||
|
||||
@@ -91,6 +91,47 @@ template <bool BigEndian> struct UTF16 {
|
||||
using UTF16_LE = UTF16<false>;
|
||||
using UTF16_BE = UTF16<true>;
|
||||
|
||||
//-------------------------------------------------------------------
|
||||
// Shift JIS
|
||||
//-------------------------------------------------------------------
|
||||
struct ShiftJIS {
|
||||
static inline Uint16 getUnit( const char* src ) {
|
||||
return Uint8( src[0] ) | ( Uint16( Uint8( src[1] ) ) << 8 );
|
||||
}
|
||||
|
||||
static inline TextDecodeResult decodePoint( std::string_view view ) {
|
||||
// Shift JIS ranges for single-byte and double-byte characters
|
||||
static constexpr std::pair<unsigned char, unsigned char> firstByteRange1( 0x81, 0x9F );
|
||||
static constexpr std::pair<unsigned char, unsigned char> firstByteRange2( 0xE0, 0xEF );
|
||||
static constexpr std::pair<unsigned char, unsigned char> secondByteRange1( 0x40, 0x7E );
|
||||
static constexpr std::pair<unsigned char, unsigned char> secondByteRange2( 0x80, 0xFC );
|
||||
|
||||
if ( view.size() == 0 )
|
||||
return {};
|
||||
|
||||
Uint8 first = view[0];
|
||||
if ( first < 0x7F )
|
||||
return { first, TextDecodeResult::Status::Valid, 1 };
|
||||
|
||||
if ( view.size() < 2 &&
|
||||
( ( first >= secondByteRange1.first && first <= secondByteRange1.second ) ||
|
||||
( first >= secondByteRange2.first && first <= secondByteRange2.second ) ) ) {
|
||||
return { first, TextDecodeResult::Status::Valid, 1 };
|
||||
}
|
||||
|
||||
Uint8 second = view[1];
|
||||
|
||||
if ( ( ( first >= firstByteRange1.first && first <= firstByteRange1.second ) ||
|
||||
( first >= firstByteRange2.first && first <= firstByteRange2.second ) ) &&
|
||||
( ( second >= secondByteRange1.first && second <= secondByteRange1.second ) ||
|
||||
( second >= secondByteRange2.first && second <= secondByteRange2.second ) ) ) {
|
||||
return { getUnit( view.data() ), TextDecodeResult::Status::Valid, 2 };
|
||||
}
|
||||
|
||||
return { first, TextDecodeResult::Status::Invalid, 1 };
|
||||
}
|
||||
};
|
||||
|
||||
//-------------------------------------------------------------------
|
||||
// UTF8
|
||||
//-------------------------------------------------------------------
|
||||
@@ -193,6 +234,9 @@ template <> struct TextEncoding::Wrapper<UTF16_LE> {
|
||||
template <> struct TextEncoding::Wrapper<UTF16_BE> {
|
||||
static TextEncoding Instance;
|
||||
};
|
||||
template <> struct TextEncoding::Wrapper<ShiftJIS> {
|
||||
static TextEncoding Instance;
|
||||
};
|
||||
|
||||
//-------------------------------------------------------------------
|
||||
// TextEncoding (indirect through function vectors)
|
||||
@@ -217,6 +261,11 @@ TextEncoding TextEncoding::Wrapper<UTF16_BE>::Instance = {
|
||||
2,
|
||||
};
|
||||
|
||||
TextEncoding TextEncoding::Wrapper<ShiftJIS>::Instance = {
|
||||
&ShiftJIS::decodePoint,
|
||||
1,
|
||||
};
|
||||
|
||||
const TextEncoding* encodingFromEnum( TextFormat::Encoding enc ) {
|
||||
switch ( enc ) {
|
||||
default:
|
||||
@@ -243,7 +292,10 @@ struct TextFileStats {
|
||||
Uint32 numPlainAscii = 0; // includes whitespace, excludes control characters < 32
|
||||
Uint32 numWhitespace = 0;
|
||||
Uint32 numExtended = 0;
|
||||
Uint32 num16bytes = 0;
|
||||
float ooNumPoints = 0.f;
|
||||
float score = 0.f;
|
||||
bool count16b{ false };
|
||||
|
||||
Uint32 numInvalidPoints() const { return numPoints - numValidPoints; }
|
||||
|
||||
@@ -256,11 +308,15 @@ struct TextFileStats {
|
||||
}
|
||||
}
|
||||
|
||||
float getScore() const {
|
||||
return ( 2.5f * numWhitespace + numPlainAscii - 100.f * numInvalidPoints() -
|
||||
50.f * numControl + 5.f * numExtended ) *
|
||||
ooNumPoints;
|
||||
void calcScore() {
|
||||
if ( !score ) {
|
||||
score = ( 2.5f * numWhitespace + numPlainAscii - 100.f * numInvalidPoints() -
|
||||
50.f * numControl + 5.f * numExtended + 2.5f * num16bytes ) *
|
||||
ooNumPoints;
|
||||
}
|
||||
}
|
||||
|
||||
float getScore() const { return score; }
|
||||
};
|
||||
|
||||
static Uint32 scanTextFile( TextFileStats& stats, IOStream& ins, const TextEncoding* encoding,
|
||||
@@ -314,6 +370,8 @@ static Uint32 scanTextFile( TextFileStats& stats, IOStream& ins, const TextEncod
|
||||
}
|
||||
} else if ( decoded.point >= 65536 ) {
|
||||
stats.numExtended++;
|
||||
} else if ( stats.count16b && decoded.point >= 0x8140 ) {
|
||||
stats.num16bytes++;
|
||||
}
|
||||
}
|
||||
prevWasCR = ( decoded.point == '\r' );
|
||||
@@ -321,6 +379,7 @@ static Uint32 scanTextFile( TextFileStats& stats, IOStream& ins, const TextEncod
|
||||
if ( stats.numPoints > 0 ) {
|
||||
stats.ooNumPoints = 1.f / stats.numPoints;
|
||||
}
|
||||
stats.calcScore();
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
@@ -371,6 +430,16 @@ TextFormat guessFileEncoding( IOStream& ins ) {
|
||||
encoding = TextFormat::Encoding::UTF16BE;
|
||||
}
|
||||
|
||||
TextFileStats statsShiftJIS;
|
||||
statsShiftJIS.count16b = true;
|
||||
scanTextFile( statsShiftJIS, ins, TextEncoding::get<ShiftJIS>(), NumBytesForAutodetect );
|
||||
ins.seek( 0 );
|
||||
|
||||
if ( statsShiftJIS.getScore() > stats->getScore() ) {
|
||||
stats = &statsShiftJIS;
|
||||
encoding = TextFormat::Encoding::Shift_JIS;
|
||||
}
|
||||
|
||||
// Choose between the UTF16 and 8-bit encoding:
|
||||
if ( stats8.getScore() >= stats->getScore() ) {
|
||||
stats = &stats8;
|
||||
@@ -448,6 +517,8 @@ TextFormat::Encoding TextFormat::encodingFromString( const std::string_view& str
|
||||
return TextFormat::Encoding::UTF16BE;
|
||||
case static_cast<String::HashType>( TextFormat::Encoding::Latin1 ):
|
||||
return TextFormat::Encoding::Latin1;
|
||||
case static_cast<String::HashType>( TextFormat::Encoding::Shift_JIS ):
|
||||
return TextFormat::Encoding::Shift_JIS;
|
||||
case static_cast<String::HashType>( TextFormat::Encoding::UTF8 ):
|
||||
default:
|
||||
return TextFormat::Encoding::UTF8;
|
||||
@@ -462,6 +533,8 @@ std::string TextFormat::encodingToString( TextFormat::Encoding enc ) {
|
||||
return "UTF-16 BE";
|
||||
case TextFormat::Encoding::Latin1:
|
||||
return "ISO-8859-1";
|
||||
case TextFormat::Encoding::Shift_JIS:
|
||||
return "Shift_JIS";
|
||||
case TextFormat::Encoding::UTF8:
|
||||
default:
|
||||
break;
|
||||
@@ -475,6 +548,7 @@ std::vector<std::pair<TextFormat::Encoding, std::string>> TextFormat::encodings(
|
||||
encs.emplace_back( Encoding::UTF16BE, encodingToString( Encoding::UTF16BE ) );
|
||||
encs.emplace_back( Encoding::UTF16LE, encodingToString( Encoding::UTF16LE ) );
|
||||
encs.emplace_back( Encoding::Latin1, encodingToString( Encoding::Latin1 ) );
|
||||
encs.emplace_back( Encoding::Shift_JIS, encodingToString( Encoding::Shift_JIS ) );
|
||||
return encs;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
#include <eepp/system/log.hpp>
|
||||
#include <eepp/window/backend/SDL2/base.hpp>
|
||||
#include <eepp/window/backend/SDL2/platformhelpersdl2.hpp>
|
||||
#include <eepp/system/log.hpp>
|
||||
|
||||
using namespace EE::System;
|
||||
|
||||
#if EE_PLATFORM == EE_PLATFORM_EMSCRIPTEN
|
||||
#include <emscripten.h>
|
||||
EM_JS(void, emscripten_open_url, (const char *msg), {
|
||||
window.open(UTF8ToString(msg), 'blank');
|
||||
});
|
||||
EM_JS( void, emscripten_open_url, ( const char* msg ),
|
||||
{ window.open( UTF8ToString( msg ), 'blank' ); } );
|
||||
#endif
|
||||
|
||||
#if EE_PLATFORM == EE_PLATFORM_ANDROID
|
||||
@@ -21,18 +20,27 @@ PlatformHelperSDL2::PlatformHelperSDL2() {}
|
||||
|
||||
bool PlatformHelperSDL2::openURL( const std::string& url ) {
|
||||
#if EE_PLATFORM == EE_PLATFORM_EMSCRIPTEN
|
||||
emscripten_open_url(url.c_str());
|
||||
emscripten_open_url( url.c_str() );
|
||||
return true;
|
||||
#else
|
||||
#if SDL_VERSION_ATLEAST(2,0,14)
|
||||
int res = SDL_OpenURL( url.c_str() );
|
||||
if ( res != 0 )
|
||||
Log::error( "PlatformHelperSDL2::openURL: Failed with error - %s", SDL_GetError() );
|
||||
return res == 0;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
#if SDL_VERSION_ATLEAST( 2, 0, 14 )
|
||||
int res = SDL_OpenURL( url.c_str() );
|
||||
if ( res != 0 )
|
||||
Log::error( "PlatformHelperSDL2::openURL: Failed with error - %s", SDL_GetError() );
|
||||
return res == 0;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
char* PlatformHelperSDL2::iconv( const char* tocode, const char* fromcode, const char* inbuf,
|
||||
size_t inbytesleft ) {
|
||||
return SDL_iconv_string( tocode, fromcode, inbuf, inbytesleft );
|
||||
}
|
||||
|
||||
void PlatformHelperSDL2::iconvFree( char* buf ) {
|
||||
SDL_free( buf );
|
||||
}
|
||||
|
||||
#if EE_PLATFORM == EE_PLATFORM_ANDROID
|
||||
|
||||
@@ -12,6 +12,10 @@ class EE_API PlatformHelperSDL2 : public PlatformHelper {
|
||||
|
||||
bool openURL( const std::string& url );
|
||||
|
||||
char* iconv( const char* tocode, const char* fromcode, const char* inbuf, size_t inbytesleft );
|
||||
|
||||
void iconvFree( char* buf );
|
||||
|
||||
#if EE_PLATFORM == EE_PLATFORM_ANDROID
|
||||
void* getActivity();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user