diff --git a/include/eepp/core/string.hpp b/include/eepp/core/string.hpp index cf0a4fb80..bd27aca94 100644 --- a/include/eepp/core/string.hpp +++ b/include/eepp/core/string.hpp @@ -496,6 +496,11 @@ class EE_API String { **/ String( const String::View& str ); + static String fromUtf16( const char* utf16String, const size_t& utf16StringSize, + bool isBigEndian = false ); + + static String fromLatin1( const char* string, const size_t& stringSize ); + /** @brief Create a new String from a UTF-8 encoded string ** @param begin Forward iterator to the begining of the UTF-8 sequence ** @param end Forward iterator to the end of the UTF-8 sequence diff --git a/include/eepp/core/utf.hpp b/include/eepp/core/utf.hpp index bfb872109..4e335d59f 100644 --- a/include/eepp/core/utf.hpp +++ b/include/eepp/core/utf.hpp @@ -137,7 +137,7 @@ template <> class Utf<8> { template static Out fromWide( In begin, In end, Out output ); //////////////////////////////////////////////////////////// - /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-8 + /// \brief Convert a latin-1 (ISO-8859-1) characters range to UTF-8 /// /// \param begin Iterator pointing to the beginning of the input sequence /// \param end Iterator pointing to the end of the input sequence @@ -184,7 +184,7 @@ template <> class Utf<8> { #endif //////////////////////////////////////////////////////////// - /// \brief Convert an UTF-8 characters range to latin-1 (ISO-5589-1) characters + /// \brief Convert an UTF-8 characters range to latin-1 (ISO-8859-1) characters /// /// \param begin Iterator pointing to the beginning of the input sequence /// \param end Iterator pointing to the end of the input sequence @@ -336,7 +336,7 @@ template <> class Utf<16> { template static Out fromWide( In begin, In end, Out output ); //////////////////////////////////////////////////////////// - /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-16 + /// \brief Convert a latin-1 (ISO-8859-1) characters range to UTF-16 /// /// \param begin Iterator pointing to the beginning of the input sequence /// \param end Iterator pointing to the end of the input sequence @@ -383,7 +383,7 @@ template <> class Utf<16> { #endif //////////////////////////////////////////////////////////// - /// \brief Convert an UTF-16 characters range to latin-1 (ISO-5589-1) characters + /// \brief Convert an UTF-16 characters range to latin-1 (ISO-8859-1) characters /// /// \param begin Iterator pointing to the beginning of the input sequence /// \param end Iterator pointing to the end of the input sequence @@ -536,7 +536,7 @@ template <> class Utf<32> { template static Out fromWide( In begin, In end, Out output ); //////////////////////////////////////////////////////////// - /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-32 + /// \brief Convert a latin-1 (ISO-8859-1) characters range to UTF-32 /// /// \param begin Iterator pointing to the beginning of the input sequence /// \param end Iterator pointing to the end of the input sequence @@ -583,7 +583,7 @@ template <> class Utf<32> { #endif //////////////////////////////////////////////////////////// - /// \brief Convert an UTF-16 characters range to latin-1 (ISO-5589-1) characters + /// \brief Convert an UTF-16 characters range to latin-1 (ISO-8859-1) characters /// /// \param begin Iterator pointing to the beginning of the input sequence /// \param end Iterator pointing to the end of the input sequence diff --git a/include/eepp/ui/doc/textdocument.hpp b/include/eepp/ui/doc/textdocument.hpp index f92409f75..c0e643e39 100644 --- a/include/eepp/ui/doc/textdocument.hpp +++ b/include/eepp/ui/doc/textdocument.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -38,8 +39,6 @@ class EE_API TextDocument { enum class IndentType { IndentSpaces, IndentTabs }; - enum class LineEnding { LF, CRLF, CR }; - enum class FindReplaceType { Normal, LuaPattern }; enum class LoadStatus { Loaded, Interrupted, Failed }; @@ -74,25 +73,6 @@ class EE_API TextDocument { typedef std::function DocumentCommand; typedef std::function DocumentRefCommand; - static std::string lineEndingToString( const LineEnding& le ) { - switch ( le ) { - case LineEnding::CRLF: - return "CRLF"; - case LineEnding::CR: - return "CR"; - case LineEnding::LF: - default: - return "LF"; - } - } - - static LineEnding stringToLineEnding( const std::string& str ) { - if ( "CR" == str ) - return LineEnding::CR; - if ( "CRLF" == str ) - return LineEnding::CRLF; - return LineEnding::LF; - } TextDocument( bool verbose = true ); @@ -435,9 +415,9 @@ class EE_API TextDocument { void setAutoDetectIndentType( bool autodetect ); - const LineEnding& getLineEnding() const; + const TextFormat::LineEnding& getLineEnding() const; - void setLineEnding( const LineEnding& lineEnding ); + void setLineEnding( const TextFormat::LineEnding& lineEnding ); bool getForceNewLineAtEndOfFile() const; @@ -453,7 +433,7 @@ class EE_API TextDocument { void setBOM( bool active ); - bool getBOM() const; + bool isBOM() const; TextRange sanitizeRange( const TextRange& range ) const; @@ -599,6 +579,10 @@ class EE_API TextDocument { void resetUndoRedo(); + TextFormat::Encoding getEncoding() const; + + void setEncoding( TextFormat::Encoding encoding ); + protected: friend class TextUndoStack; @@ -614,7 +598,8 @@ class EE_API TextDocument { TextRanges mSelection; UnorderedSet mClients; Mutex mClientsMutex; - LineEnding mLineEnding{ LineEnding::LF }; + TextFormat::Encoding mEncoding{ TextFormat::Encoding::UTF8 }; + TextFormat::LineEnding mLineEnding{ TextFormat::LineEnding::LF }; std::atomic mLoading{ false }; std::atomic mRunningTransaction{ false }; std::atomic mLoadingAsync{ false }; diff --git a/include/eepp/ui/doc/textformat.hpp b/include/eepp/ui/doc/textformat.hpp new file mode 100644 index 000000000..9f3eb3016 --- /dev/null +++ b/include/eepp/ui/doc/textformat.hpp @@ -0,0 +1,41 @@ +#ifndef EE_UI_DOC_TEXTFORMAT_HPP +#define EE_UI_DOC_TEXTFORMAT_HPP + +#include +#include +#include + +using namespace EE; +using namespace EE::System; + +namespace EE { namespace UI { namespace Doc { + +class EE_API TextFormat { + public: + static constexpr Uint32 NumBytesForAutodetect = 4000; + + enum class Encoding : String::HashType { + UTF8 = String::hash( "UTF-8" ), + UTF16LE = String::hash( "UTF-16 LE" ), + UTF16BE = String::hash( "UTF-16 BE" ), + Latin1 = String::hash( "ISO-8859-1" ), + }; + + enum class LineEnding { LF, CRLF, CR }; + + Encoding encoding = Encoding::UTF8; + LineEnding newLine = LineEnding::LF; + bool bom = false; + + static std::string lineEndingToString( const TextFormat::LineEnding& le ); + + static TextFormat::LineEnding stringToLineEnding( const std::string& str ); + + static std::string encodingToString( TextFormat::Encoding enc ); + + static TextFormat autodetect( IOStream& ins ); +}; + +}}} // namespace EE::UI::Doc + +#endif // EE_UI_DOC_TEXTFORMAT_HPP diff --git a/projects/linux/ee.files b/projects/linux/ee.files index 64ba0621d..3418285af 100644 --- a/projects/linux/ee.files +++ b/projects/linux/ee.files @@ -341,6 +341,7 @@ ../../include/eepp/ui/doc/syntaxtokenizer.hpp ../../include/eepp/ui/doc/textdocument.hpp ../../include/eepp/ui/doc/textdocumentline.hpp +../../include/eepp/ui/doc/textformat.hpp ../../include/eepp/ui/doc/textposition.hpp ../../include/eepp/ui/doc/textrange.hpp ../../include/eepp/ui/doc/textundostack.hpp @@ -951,6 +952,7 @@ ../../src/eepp/ui/doc/syntaxhighlighter.cpp ../../src/eepp/ui/doc/syntaxtokenizer.cpp ../../src/eepp/ui/doc/textdocument.cpp +../../src/eepp/ui/doc/textformat.cpp ../../src/eepp/ui/doc/textundostack.cpp ../../src/eepp/ui/keyboardshortcut.cpp ../../src/eepp/ui/models/filesystemmodel.cpp diff --git a/src/eepp/core/string.cpp b/src/eepp/core/string.cpp index 6c7cfbd5a..d06978e6e 100644 --- a/src/eepp/core/string.cpp +++ b/src/eepp/core/string.cpp @@ -1063,6 +1063,42 @@ String::String( const String& str ) : mString( str.mString ) {} String::String( const String::View& utf32String ) : mString( utf32String ) {} +String String::fromUtf16( const char* utf16String, const size_t& utf16StringSize, + bool isBigEndian ) { + String string; + if ( nullptr == utf16String || utf16StringSize == 0 ) + return string; + + string.mString.reserve( utf16StringSize / 2 + 1 ); + + int skip = 0; + // Skip BOM + if ( utf16StringSize >= 2 ) { + if ( ( (char)0xFF == utf16String[0] && (char)0xFE == utf16String[1] ) || + ( (char)0xFE == utf16String[0] && (char)0xFF == utf16String[1] ) ) { + skip = 2; + } + } + + for ( size_t i = skip; i < utf16StringSize; i += 2 ) { + Uint16 utf16 = isBigEndian ? ( utf16String[i] << 8 ) | ( utf16String[i + 1] & 0xFF ) + : ( utf16String[i + 1] << 8 ) | ( utf16String[i] & 0xFF ); + Uint32 codepoint = 0; + Utf16::toUtf32( &utf16, &utf16 + 1, &codepoint ); + string.mString.push_back( codepoint ); + } + + return string; +} + +String String::fromLatin1( const char* str, const size_t& stringSize ) { + String::StringType utf32; + utf32.reserve( stringSize ); + for ( size_t i = 0; i < stringSize; i++ ) + utf32.push_back( static_cast( str[i] ) ); + return String( utf32 ); +} + String String::fromUtf8( const std::string& utf8String ) { String::StringType utf32; diff --git a/src/eepp/ui/doc/textdocument.cpp b/src/eepp/ui/doc/textdocument.cpp index afa0a3de5..f6d8d1fb8 100644 --- a/src/eepp/ui/doc/textdocument.cpp +++ b/src/eepp/ui/doc/textdocument.cpp @@ -1,4 +1,5 @@ -#include +#include +#include #include #include #include @@ -14,6 +15,8 @@ #include #include +using namespace std::literals; + using namespace EE::Network; namespace EE { namespace UI { namespace Doc { @@ -102,8 +105,61 @@ void TextDocument::resetCursor() { notifySelectionChanged(); } -static String ptrGetLine( char* data, const size_t& size, size_t& position ) { +static constexpr int codepointSize( TextFormat::Encoding enc ) { + switch ( enc ) { + case TextFormat::Encoding::UTF16LE: + case TextFormat::Encoding::UTF16BE: + return 2; + case TextFormat::Encoding::UTF8: + default: + break; + } + return 1; +} + +static inline void searchSubstr( char* data, const size_t& size, size_t& position, + const std::string_view& substr, const std::string_view& substrfb, + int codepointSize ) { position = 0; + const char* found = + std::search( data, data + size, substr.data(), substr.data() + substr.size() ); + if ( found != data + size ) { + position = ( found - data ); + position += codepointSize; + } else { + found = + std::search( data, data + size, substrfb.data(), substrfb.data() + substrfb.size() ); + if ( found != data + size ) { + position = ( found - data ); + position += codepointSize; + } else { + position = size; + } + } +} + +static String ptrGetLine( char* data, const size_t& size, size_t& position, + TextFormat::Encoding enc ) { + static constexpr auto LE_END_LF = "\n\0"sv; + static constexpr auto LE_END_CR = "\r\0"sv; + static constexpr auto BE_END_LF = "\0\n"sv; + static constexpr auto BE_END_CR = "\0\r"sv; + position = 0; + switch ( enc ) { + case TextFormat::Encoding::UTF16LE: { + searchSubstr( data, size, position, LE_END_LF, LE_END_CR, codepointSize( enc ) ); + return String::fromUtf16( data, position, false ); + } + case TextFormat::Encoding::UTF16BE: { + searchSubstr( data, size, position, BE_END_LF, BE_END_CR, codepointSize( enc ) ); + return String::fromUtf16( data, position, true ); + } + case TextFormat::Encoding::UTF8: + case TextFormat::Encoding::Latin1: + default: + break; + } + while ( position < size && data[position] != '\n' && data[position] != '\r' ) position++; if ( position < size ) { @@ -111,6 +167,10 @@ static String ptrGetLine( char* data, const size_t& size, size_t& position ) { position++; position++; } + + if ( enc == TextFormat::Encoding::Latin1 ) + return String::fromLatin1( data, position ); + return String( data, position ); } @@ -154,11 +214,32 @@ TextDocument::LoadStatus TextDocument::loadFromStream( IOStream& file, std::stri bufferPtr += 3; consume -= 3; mIsBOM = true; + mEncoding = TextFormat::Encoding::UTF8; + } + // Check UTF-16 LE BOM header + else if ( (char)0xFF == data.get()[0] && (char)0xFE == data.get()[1] ) { + bufferPtr += 2; + consume -= 2; + mIsBOM = true; + mEncoding = TextFormat::Encoding::UTF16LE; + } + // Check UTF-16 BE BOM header + else if ( (char)0xFE == data.get()[0] && (char)0xFF == data.get()[1] ) { + bufferPtr += 2; + consume -= 2; + mIsBOM = true; + mEncoding = TextFormat::Encoding::UTF16BE; + } + // Try to guess + else { + mIsBOM = false; + IOStreamMemory iomem( bufferPtr, read ); + mEncoding = TextFormat::autodetect( iomem ).encoding; } } while ( consume && mLoading ) { - lineBuffer += ptrGetLine( bufferPtr, consume, position ); + lineBuffer += ptrGetLine( bufferPtr, consume, position, mEncoding ); bufferPtr += position; consume -= position; size_t lineBufferSize = lineBuffer.size(); @@ -168,20 +249,20 @@ TextDocument::LoadStatus TextDocument::loadFromStream( IOStream& file, std::stri if ( mLines.empty() ) { if ( lineBufferSize > 1 && lineBuffer[lineBufferSize - 2] == '\r' && lastChar == '\n' ) { - mLineEnding = LineEnding::CRLF; + mLineEnding = TextFormat::LineEnding::CRLF; } else if ( lastChar == '\r' ) { - mLineEnding = LineEnding::CR; + mLineEnding = TextFormat::LineEnding::CR; } mMightBeBinary = lineBuffer.find_first_of( (String::StringBaseType)'\0' ) != String::InvalidPos; } - if ( mLineEnding == LineEnding::CRLF && lineBufferSize > 1 && + if ( mLineEnding == TextFormat::LineEnding::CRLF && lineBufferSize > 1 && lastChar == '\n' ) { lineBuffer[lineBuffer.size() - 2] = '\n'; lineBuffer.resize( lineBufferSize - 1 ); - } else if ( mLineEnding == LineEnding::CR && lineBufferSize > 0 ) { + } else if ( mLineEnding == TextFormat::LineEnding::CR && lineBufferSize > 0 ) { lineBuffer[lineBuffer.size() - 1] = '\n'; } @@ -310,11 +391,11 @@ void TextDocument::setAutoDetectIndentType( bool autodetect ) { mAutoDetectIndentType = autodetect; } -const TextDocument::LineEnding& TextDocument::getLineEnding() const { +const TextFormat::LineEnding& TextDocument::getLineEnding() const { return mLineEnding; } -void TextDocument::setLineEnding( const LineEnding& lineEnding ) { +void TextDocument::setLineEnding( const TextFormat::LineEnding& lineEnding ) { mLineEnding = lineEnding; } @@ -346,7 +427,7 @@ void TextDocument::setBOM( bool active ) { mIsBOM = active; } -bool TextDocument::getBOM() const { +bool TextDocument::isBOM() const { return mIsBOM; } @@ -578,14 +659,36 @@ bool TextDocument::save( IOStream& stream, bool keepUndoRedoStatus ) { const std::string whitespaces( " \t\f\v\n\r" ); MD5::Context md5Ctx; MD5::init( md5Ctx ); + if ( mIsBOM ) { - unsigned char bom[] = { 0xEF, 0xBB, 0xBF }; - stream.write( (char*)bom, sizeof( bom ) ); - MD5::update( md5Ctx, bom, sizeof( bom ) ); + switch ( mEncoding ) { + case TextFormat::Encoding::UTF16LE: { + unsigned char bom[] = { 0xFF, 0xFE }; + stream.write( (char*)bom, sizeof( bom ) ); + MD5::update( md5Ctx, bom, sizeof( bom ) ); + break; + } + case TextFormat::Encoding::UTF16BE: { + unsigned char bom[] = { 0xFE, 0xFF }; + stream.write( (char*)bom, sizeof( bom ) ); + MD5::update( md5Ctx, bom, sizeof( bom ) ); + break; + } + case TextFormat::Encoding::UTF8: { + unsigned char bom[] = { 0xEF, 0xBB, 0xBF }; + stream.write( (char*)bom, sizeof( bom ) ); + MD5::update( md5Ctx, bom, sizeof( bom ) ); + break; + } + case TextFormat::Encoding::Latin1: + break; + } } + size_t lastLine = mLines.size() - 1; for ( size_t i = 0; i <= lastLine; i++ ) { std::string text( mLines[i].toUtf8() ); + if ( !keepUndoRedoStatus && mTrimTrailingWhitespaces && text.size() > 1 && whitespaces.find( text[text.size() - 2] ) != std::string::npos ) { size_t pos = text.find_last_not_of( whitespaces ); @@ -598,6 +701,7 @@ bool TextDocument::save( IOStream& stream, bool keepUndoRedoStatus ) { } text = mLines[i].toUtf8(); } + if ( i == lastLine ) { if ( !text.empty() && text[text.size() - 1] == '\n' ) { // Last \n is added by the document but it's not part of the document. @@ -611,20 +715,53 @@ bool TextDocument::save( IOStream& stream, bool keepUndoRedoStatus ) { insert( 0, endOfDoc(), "\n" ); } } - if ( mLineEnding == LineEnding::CRLF ) { - if ( text[text.size() - 1] == '\n' ) { - text[text.size() - 1] = '\r'; - text += "\n"; + + switch ( mLineEnding ) { + case TextFormat::LineEnding::CRLF: { + if ( text[text.size() - 1] == '\n' ) { + text[text.size() - 1] = '\r'; + text += "\n"; + } + break; + } + case TextFormat::LineEnding::CR: { + text[text.size() - 1] = '\r'; + break; + } + case TextFormat::LineEnding::LF: { + break; + } + } + + switch ( mEncoding ) { + case TextFormat::Encoding::UTF16LE: + case TextFormat::Encoding::UTF16BE: { + std::wstring_convert, char16_t> convert; + std::u16string utf16String = convert.from_bytes( text ); + if ( mEncoding == TextFormat::Encoding::UTF16BE ) { + for ( char16_t& c : utf16String ) + c = ( ( c >> 8 ) & 0xFF ) | ( ( c << 8 ) & 0xFF00 ); + } + stream.write( (const char*)utf16String.data(), utf16String.size() * 2 ); + MD5::update( md5Ctx, (const char*)utf16String.data(), utf16String.size() * 2 ); + break; + } + case TextFormat::Encoding::Latin1: { + std::string latin1; + String utf32( text ); // TODO: Do direct conversion + latin1.reserve( utf32.size() ); + for ( size_t i = 0; i < utf32.size(); i++ ) + if ( utf32[i] < 0xFF ) + latin1.push_back( utf32[i] ); + stream.write( latin1.c_str(), latin1.size() ); + MD5::update( md5Ctx, latin1.data(), latin1.size() ); + break; + } + case TextFormat::Encoding::UTF8: { + stream.write( text.c_str(), text.size() ); + MD5::update( md5Ctx, text.data(), text.size() ); + break; } - stream.write( text.c_str(), text.size() ); - MD5::update( md5Ctx, text.data(), text.size() ); - } else if ( mLineEnding == LineEnding::CR ) { - text[text.size() - 1] = '\r'; - stream.write( text.c_str(), text.size() ); - MD5::update( md5Ctx, text.data(), text.size() ); - } else { - stream.write( text.c_str(), text.size() ); - MD5::update( md5Ctx, text.data(), text.size() ); } } @@ -2622,6 +2759,14 @@ void TextDocument::resetUndoRedo() { notifySelectionChanged(); } +TextFormat::Encoding TextDocument::getEncoding() const { + return mEncoding; +} + +void TextDocument::setEncoding( TextFormat::Encoding encoding ) { + mEncoding = encoding; +} + static inline void changeDepth( SyntaxHighlighter* highlighter, int& depth, const TextPosition& pos, int dir ) { if ( highlighter ) { diff --git a/src/eepp/ui/doc/textformat.cpp b/src/eepp/ui/doc/textformat.cpp new file mode 100644 index 000000000..21984b72a --- /dev/null +++ b/src/eepp/ui/doc/textformat.cpp @@ -0,0 +1,538 @@ +#include +#include +#include + +namespace EE { namespace UI { namespace Doc { + +// Adapted from plywood https://preshing.com/20200727/automatically-detecting-text-encodings-in-cpp/ +// MIT Licensed +struct TextDecodeResult { + enum class Status : Uint8 { + Truncated, // std::string_view wasn't long enough to read a valid point. A (invalid) point + // may be available anyway, such as when flushing the last few bytes of a UTF-8 + // file. + Invalid, // Invalid byte sequence was encountered. Such sequences are typically decoded one + // code unit at a time. + Valid, + }; + + // (point >= 0) if and only if (numBytes > 0), which means that a code point is available to + // read (even if status is Invalid or Truncated). + Int32 point = -1; + Status status = Status::Truncated; + Uint8 numBytes = 0; +}; + +//------------------------------------------------------------------- +// Enc_Bytes +//------------------------------------------------------------------- +struct Enc_Bytes { + static inline TextDecodeResult decodePoint( std::string_view view ) { + return view.empty() + ? TextDecodeResult{} + : TextDecodeResult{ (Uint8)view[0], TextDecodeResult::Status::Valid, 1 }; + } + + static inline Uint32 backNumBytes( std::string_view view ) { return view.empty() ? 0 : 1; } + + static inline Uint32 numBytes( Uint32 ) { return 1; } +}; + +//------------------------------------------------------------------- +// UTF8 +//------------------------------------------------------------------- +struct UTF8 { + static TextDecodeResult decodePointSlowPath( std::string_view view ); + + static inline TextDecodeResult decodePoint( std::string_view view ) { + if ( view.size() > 0 ) { + Uint8 first = view[0]; + if ( first < 0x80 ) { + return { first, TextDecodeResult::Status::Valid, 1 }; + } + } + return decodePointSlowPath( view ); + } + + static Uint32 backNumBytesSlowPath( std::string_view view ); + + static inline Uint32 backNumBytes( std::string_view view ) { + if ( view.size() > 0 ) { + Uint8 last = view[view.size() - 1]; + if ( last < 0x80 ) { + return 1; + } + } + return backNumBytesSlowPath( view ); + } + + static inline Uint32 numBytes( Uint32 point ) { + if ( point < 0x80 ) + return 1; + else if ( point < 0x800 ) + return 2; + else if ( point < 0x10000 ) + return 3; + else + return 4; + } +}; + +//------------------------------------------------------------------- +// UTF16 +//------------------------------------------------------------------- +template struct UTF16 { + static inline Uint16 getUnit( const char* src ) { + if constexpr ( BigEndian ) { + return ( Uint16( Uint8( src[0] ) ) << 8 ) | Uint8( src[1] ); + } else { + return Uint8( src[0] ) | ( Uint16( Uint8( src[1] ) ) << 8 ); + } + } + + static inline void putUnit( char* src, Uint16 u ) { + if constexpr ( BigEndian ) { + src[0] = Uint8( u >> 8 ); + src[1] = Uint8( u ); + } else { + src[0] = Uint8( u ); + src[1] = Uint8( u >> 8 ); + } + } + + static inline TextDecodeResult decodePoint( std::string_view view ) { + if ( view.size() < 2 ) { + return {}; + } + Uint16 first = getUnit( view.data() ); + auto status = TextDecodeResult::Status::Invalid; + if ( first >= 0xd800 && first < 0xdc00 ) { + if ( view.size() < 4 ) { + status = TextDecodeResult::Status::Truncated; + } else { + Uint16 second = getUnit( view.data() + 2 ); + if ( second >= 0xdc00 && second < 0xe000 ) { + Uint32 value = 0x10000 + ( ( first - 0xd800 ) << 10 ) + ( second - 0xdc00 ); + return { (Int32)value, TextDecodeResult::Status::Valid, 4 }; + } + } + } else if ( !( first >= 0xdc00 && first < 0xe000 ) ) { + status = TextDecodeResult::Status::Valid; + } + return { first, status, 2 }; + } + + static inline Uint32 backNumBytes( std::string_view view ) { + if ( view.size() < 2 ) { + return 0; + } + const char* tail = view.data() + view.size(); + if ( view.size() >= 4 ) { + Uint16 first = getUnit( tail - 4 ); + Uint16 second = getUnit( tail - 2 ); + if ( first >= 0xd800 && first < 0xdc00 && second >= 0xdc00 && second < 0xe000 ) + return 4; + } + return 2; + } + + static inline Uint32 numBytes( Uint32 point ) { + if ( point < 0x10000 ) + return 2; + else + return 4; + } +}; + +using UTF16_LE = UTF16; +using UTF16_BE = UTF16; + +//------------------------------------------------------------------- +// UTF8 +//------------------------------------------------------------------- +TextDecodeResult UTF8::decodePointSlowPath( std::string_view view ) { + if ( view.size() == 0 ) { + return {}; + } + + TextDecodeResult result; // Default status is Truncated + Uint32 value = 0; + const Uint8* bytes = (const Uint8*)view.data(); + Uint8 first = *bytes++; + switch ( ( first >> 3 ) & 0xf ) { + case 0b1000: + case 0b1001: + case 0b1010: + case 0b1011: { + if ( view.size() >= 2 ) { + if ( ( bytes[0] & 0xc0 ) == 0x80 ) { + result.numBytes = 2; + value = first & 0x1f; + goto consume1Byte; + } + result.status = TextDecodeResult::Status::Invalid; + } + break; + } + + case 0b1100: + case 0b1101: { + if ( view.size() >= 3 ) { + if ( ( bytes[0] & 0xc0 ) == 0x80 && ( bytes[1] & 0xc0 ) == 0x80 ) { + result.numBytes = 3; + value = first & 0xf; + goto consume2Bytes; + } + result.status = TextDecodeResult::Status::Invalid; + } + break; + } + + case 0b1110: { + if ( view.size() >= 4 ) { + if ( ( bytes[0] & 0xc0 ) == 0x80 && ( bytes[1] & 0xc0 ) == 0x80 && + ( bytes[2] & 0xc0 ) == 0x80 ) { + result.numBytes = 4; + value = first & 0x7; + goto consume3Bytes; + } + result.status = TextDecodeResult::Status::Invalid; + } + break; + } + + default: + break; + } + + // Bad encoding; consume just one byte + // Invalid/truncated status has already been set + result.point = first; + result.numBytes = 1; + return result; + +consume3Bytes: + value = ( value << 6 ) | ( *bytes & 0x3f ); + bytes++; +consume2Bytes: + value = ( value << 6 ) | ( *bytes & 0x3f ); + bytes++; +consume1Byte: + value = ( value << 6 ) | ( *bytes & 0x3f ); + result.point = value; + result.status = TextDecodeResult::Status::Valid; + return result; +} + +Uint32 UTF8::backNumBytesSlowPath( std::string_view view ) { + if ( view.size() == 0 ) { + return 0; + } + const Uint8* tail = (const Uint8*)view.data() + view.size(); + if ( ( tail[-1] & 0xc0 ) == 0x80 && view.size() >= 2 ) { + if ( ( tail[-2] & 0xc0 ) == 0x80 && view.size() >= 3 ) { + if ( ( tail[-3] & 0xc0 ) == 0x80 && view.size() >= 4 ) { + if ( ( tail[-4] & 0xf8 ) == 0xf0 ) + return 4; + } else { + if ( ( tail[-3] & 0xf0 ) == 0xe0 ) + return 3; + } + } else { + if ( ( tail[-2] & 0xe0 ) == 0xc0 ) + return 2; + } + } + return 1; +} + +//------------------------------------------------------------------- +// TextEncoding helper objects +//------------------------------------------------------------------- +struct TextEncoding { + TextDecodeResult ( *decodePoint )( std::string_view view ) = nullptr; + Uint32 unitSize = 0; + + template struct Wrapper; + template inline static const TextEncoding* get() { + return &TextEncoding::Wrapper::Instance; + } +}; + +template <> struct TextEncoding::Wrapper { + static TextEncoding Instance; +}; +template <> struct TextEncoding::Wrapper { + static TextEncoding Instance; +}; +template <> struct TextEncoding::Wrapper { + static TextEncoding Instance; +}; +template <> struct TextEncoding::Wrapper { + static TextEncoding Instance; +}; + +//------------------------------------------------------------------- +// TextEncoding (indirect through function vectors) +//------------------------------------------------------------------- +TextEncoding TextEncoding::Wrapper::Instance = { + &Enc_Bytes::decodePoint, + 1, +}; + +TextEncoding TextEncoding::Wrapper::Instance = { + &UTF8::decodePoint, + 1, +}; + +TextEncoding TextEncoding::Wrapper::Instance = { + &UTF16_LE::decodePoint, + 2, +}; + +TextEncoding TextEncoding::Wrapper::Instance = { + &UTF16_BE::decodePoint, + 2, +}; + +const TextEncoding* encodingFromEnum( TextFormat::Encoding enc ) { + switch ( enc ) { + default: + eeASSERT( 0 ); + case TextFormat::Encoding::Latin1: + return TextEncoding::get(); + case TextFormat::Encoding::UTF8: + return TextEncoding::get(); + case TextFormat::Encoding::UTF16BE: + return TextEncoding::get>(); + case TextFormat::Encoding::UTF16LE: + return TextEncoding::get>(); + } +}; + +struct TextFileStats { + Uint32 numPoints = 0; + Uint32 numValidPoints = 0; + Uint32 totalPointValue = 0; // This value won't be accurate if byte encoding is detected + Uint32 numLines = 0; + Uint32 numCRLF = 0; + Uint32 numControl = 0; // non-whitespace points < 32, including nulls + Uint32 numNull = 0; + Uint32 numPlainAscii = 0; // includes whitespace, excludes control characters < 32 + Uint32 numWhitespace = 0; + Uint32 numExtended = 0; + float ooNumPoints = 0.f; + + Uint32 numInvalidPoints() const { return numPoints - numValidPoints; } + + TextFormat::LineEnding getNewLineType() const { + eeASSERT( numCRLF <= numLines ); + if ( numCRLF == 0 || numCRLF * 2 < numLines ) { + return TextFormat::LineEnding::LF; + } else { + return TextFormat::LineEnding::CRLF; + } + } + + float getScore() const { + return ( 2.5f * numWhitespace + numPlainAscii - 100.f * numInvalidPoints() - + 50.f * numControl + 5.f * numExtended ) * + ooNumPoints; + } +}; + +static Uint32 scanTextFile( TextFileStats& stats, IOStream& ins, const TextEncoding* encoding, + Uint32 maxBytes ) { + if ( encoding == nullptr ) + return 0; + bool prevWasCR = false; + Uint32 numBytes = 0; + ins.seek( 0 ); + char buf[4]; + while ( numBytes < maxBytes ) { + size_t read = ins.read( buf, 4 ); + if ( 0 == read ) + break; + + TextDecodeResult decoded = encoding->decodePoint( std::string_view{ buf, read } ); + if ( decoded.status == TextDecodeResult::Status::Truncated ) + break; // EOF/error + eeASSERT( decoded.point >= 0 && decoded.numBytes > 0 ); + + numBytes += decoded.numBytes; + ins.seek( numBytes ); + + stats.numPoints++; + if ( decoded.status == TextDecodeResult::Status::Valid ) { + stats.numValidPoints++; + stats.totalPointValue += decoded.point; + if ( decoded.point < 32 ) { + if ( decoded.point == '\n' ) { + stats.numPlainAscii++; + stats.numLines++; + stats.numWhitespace++; + if ( prevWasCR ) { + stats.numCRLF++; + } + } else if ( decoded.point == '\t' ) { + stats.numPlainAscii++; + stats.numWhitespace++; + } else if ( decoded.point == '\r' ) { + stats.numPlainAscii++; + } else { + stats.numControl++; + if ( decoded.point == 0 ) { + stats.numNull++; + } + } + } else if ( decoded.point < 127 ) { + stats.numPlainAscii++; + if ( decoded.point == ' ' ) { + stats.numWhitespace++; + } + } else if ( decoded.point >= 65536 ) { + stats.numExtended++; + } + } + prevWasCR = ( decoded.point == '\r' ); + } + if ( stats.numPoints > 0 ) { + stats.ooNumPoints = 1.f / stats.numPoints; + } + return numBytes; +} + +TextFormat guessFileEncoding( IOStream& ins ) { + auto start = ins.tell(); + ScopedOp op( [] {}, [&start, &ins] { ins.seek( start ); } ); + TextFileStats stats8; + + // Try UTF8 first: + Uint32 numBytesRead = + scanTextFile( stats8, ins, TextEncoding::get(), TextFormat::NumBytesForAutodetect ); + if ( numBytesRead == 0 ) + return { TextFormat::Encoding::UTF8, stats8.getNewLineType(), false }; + + ins.seek( 0 ); + if ( stats8.numInvalidPoints() == 0 && stats8.numControl == 0 ) { + // No UTF-8 encoding errors, and no weird control characters/nulls. Pick UTF-8. + return { TextFormat::Encoding::UTF8, stats8.getNewLineType(), false }; + } + + // If more than 20% of the high bytes in UTF-8 are encoding errors, reinterpret UTF-8 as just + // bytes. + TextFormat::Encoding encoding8 = TextFormat::Encoding::UTF8; + { + Uint32 numHighBytes = numBytesRead - stats8.numPlainAscii - stats8.numControl; + if ( stats8.numInvalidPoints() >= numHighBytes * 0.2f ) { + // Too many UTF-8 errors. Consider it bytes. + encoding8 = TextFormat::Encoding::Latin1; + stats8.numPoints = numBytesRead; + stats8.numValidPoints = numBytesRead; + } + } + + // Examine both UTF16 endianness: + TextFileStats stats16_le; + scanTextFile( stats16_le, ins, TextEncoding::get(), + TextFormat::NumBytesForAutodetect ); + ins.seek( 0 ); + + TextFileStats stats16_be; + scanTextFile( stats16_be, ins, TextEncoding::get(), + TextFormat::NumBytesForAutodetect ); + ins.seek( 0 ); + + // Choose the better UTF16 candidate: + TextFileStats* stats = &stats16_le; + TextFormat::Encoding encoding = TextFormat::Encoding::UTF16LE; + if ( stats16_be.getScore() > stats16_le.getScore() ) { + stats = &stats16_be; + encoding = TextFormat::Encoding::UTF16BE; + } + + // Choose between the UTF16 and 8-bit encoding: + if ( stats8.getScore() >= stats->getScore() ) { + stats = &stats8; + encoding = encoding8; + } + + // Return best guess + return { encoding, stats->getNewLineType(), false }; +} + +TextFormat TextFormat::autodetect( IOStream& ins ) { + const auto readByte = [&ins]() -> Uint8 { + Uint8 byte; + ins.read( (char*)&byte, 1 ); + return byte; + }; + TextFormat tff; + auto start = ins.tell(); + auto size = ins.getSize(); + if ( size >= 2 ) { + Uint8 h[3] = { 0 }; + h[0] = readByte(); + h[1] = readByte(); + if ( h[0] == 0xef && h[1] == 0xbb && size >= 3 ) { + h[2] = readByte(); + if ( h[2] == 0xbf ) { + tff.encoding = TextFormat::Encoding::UTF8; + tff.bom = true; + } + } else if ( h[0] == 0xfe && h[1] == 0xff ) { + tff.encoding = TextFormat::Encoding::UTF16BE; + tff.bom = true; + } else if ( h[0] == 0xff && h[1] == 0xfe ) { + tff.encoding = TextFormat::Encoding::UTF16BE; + tff.bom = true; + } + ins.seek( start ); + } + if ( !tff.bom ) { + return guessFileEncoding( ins ); + } else { + // Detect LF or CRLF + TextFileStats stats; + scanTextFile( stats, ins, encodingFromEnum( tff.encoding ), NumBytesForAutodetect ); + tff.newLine = stats.getNewLineType(); + return tff; + } +} + +std::string TextFormat::lineEndingToString( const LineEnding& le ) { + switch ( le ) { + case TextFormat::LineEnding::CRLF: + return "CRLF"; + case TextFormat::LineEnding::CR: + return "CR"; + case TextFormat::LineEnding::LF: + default: + return "LF"; + } +} + +TextFormat::LineEnding TextFormat::stringToLineEnding( const std::string& str ) { + if ( "CR" == str ) + return TextFormat::LineEnding::CR; + if ( "CRLF" == str ) + return TextFormat::LineEnding::CRLF; + return TextFormat::LineEnding::LF; +} + +std::string TextFormat::encodingToString( TextFormat::Encoding enc ) { + switch ( enc ) { + case TextFormat::Encoding::UTF16LE: + return "UTF-16 LE"; + case TextFormat::Encoding::UTF16BE: + return "UTF-16 BE"; + case TextFormat::Encoding::Latin1: + return "ISO-8859-1"; + case TextFormat::Encoding::UTF8: + default: + break; + } + return "UTF-8"; +} + +}}} // namespace EE::UI::Doc diff --git a/src/tools/ecode/appconfig.cpp b/src/tools/ecode/appconfig.cpp index b61a67173..d212ba832 100644 --- a/src/tools/ecode/appconfig.cpp +++ b/src/tools/ecode/appconfig.cpp @@ -111,12 +111,12 @@ void AppConfig::load( const std::string& confPath, std::string& keybindingsPath, doc.indentWidth = ini.getValueI( "document", "indent_width", 4 ); doc.indentSpaces = ini.getValueB( "document", "indent_spaces", false ); doc.lineEndings = - TextDocument::stringToLineEnding( ini.getValue( "document", "line_endings", "LF" ) ); + TextFormat::stringToLineEnding( ini.getValue( "document", "line_endings", "LF" ) ); // Migrate old data if ( ini.keyValueExists( "document", "windows_line_endings" ) && !ini.keyValueExists( "document", "line_endings" ) && ini.getValueB( "document", "windows_line_endings" ) == true ) { - doc.lineEndings = TextDocument::LineEnding::CRLF; + doc.lineEndings = TextFormat::LineEnding::CRLF; } doc.tabWidth = eemax( 2, ini.getValueI( "document", "tab_width", 4 ) ); @@ -243,7 +243,7 @@ void AppConfig::save( const std::vector& recentFiles, ini.setValueB( "document", "write_bom", doc.writeUnicodeBOM ); ini.setValueI( "document", "indent_width", doc.indentWidth ); ini.setValueB( "document", "indent_spaces", doc.indentSpaces ); - ini.setValue( "document", "line_endings", TextDocument::lineEndingToString( doc.lineEndings ) ); + ini.setValue( "document", "line_endings", TextFormat::lineEndingToString( doc.lineEndings ) ); ini.setValueI( "document", "tab_width", doc.tabWidth ); ini.setValueI( "document", "line_breaking_column", doc.lineBreakingColumn ); ini.setValue( "editor", "auto_close_brackets", editor.autoCloseBrackets ); @@ -384,7 +384,7 @@ void AppConfig::saveProject( std::string projectFolder, UICodeEditorSplitter* ed cfg.setValueI( "document", "indent_width", docConfig.doc.indentWidth ); cfg.setValueB( "document", "indent_spaces", docConfig.doc.indentSpaces ); cfg.setValue( "document", "line_endings", - TextDocument::lineEndingToString( docConfig.doc.lineEndings ) ); + TextFormat::lineEndingToString( docConfig.doc.lineEndings ) ); cfg.setValueI( "document", "tab_width", docConfig.doc.tabWidth ); cfg.setValueI( "document", "line_breaking_column", docConfig.doc.lineBreakingColumn ); cfg.setValue( "build", "build_name", buildConfig.buildName ); @@ -524,7 +524,7 @@ void AppConfig::loadProject( std::string projectFolder, UICodeEditorSplitter* ed docConfig.doc.indentWidth = cfg.getValueI( "document", "indent_width", 4 ); docConfig.doc.indentSpaces = cfg.getValueB( "document", "indent_spaces", false ); docConfig.doc.lineEndings = - TextDocument::stringToLineEnding( cfg.getValue( "document", "line_endings", "LF" ) ); + TextFormat::stringToLineEnding( cfg.getValue( "document", "line_endings", "LF" ) ); docConfig.doc.tabWidth = eemax( 2, cfg.getValueI( "document", "tab_width", 4 ) ); docConfig.doc.lineBreakingColumn = diff --git a/src/tools/ecode/appconfig.hpp b/src/tools/ecode/appconfig.hpp index 252b959db..f1ccfa2bd 100644 --- a/src/tools/ecode/appconfig.hpp +++ b/src/tools/ecode/appconfig.hpp @@ -84,7 +84,7 @@ struct DocumentConfig { bool autoDetectIndentType{ true }; bool writeUnicodeBOM{ false }; bool indentSpaces{ false }; - TextDocument::LineEnding lineEndings{ TextDocument::LineEnding::LF }; + TextFormat::LineEnding lineEndings{ TextFormat::LineEnding::LF }; int indentWidth{ 4 }; int tabWidth{ 4 }; int lineBreakingColumn{ 100 }; diff --git a/src/tools/ecode/ecode.cpp b/src/tools/ecode/ecode.cpp index 49da0177a..1c85eedfc 100644 --- a/src/tools/ecode/ecode.cpp +++ b/src/tools/ecode/ecode.cpp @@ -25,6 +25,8 @@ extern "C" { } #endif +using namespace std::literals; + namespace fs = std::filesystem; using json = nlohmann::json; @@ -1613,11 +1615,12 @@ void App::updateDocInfo( TextDocument& doc ) { mDocInfo->setVisible( true ); updateDocInfoLocation(); String infoStr( String::format( - "%s: %lld / %zu %s: %lld %s %s", i18n( "line_abbr", "line" ).toUtf8(), + "%s: %lld / %zu %s: %lld %s %s%s %s", i18n( "line_abbr", "line" ).toUtf8(), doc.getSelection().start().line() + 1, doc.linesCount(), i18n( "col_abbr", "col" ).toUtf8(), mSplitter->getCurEditor()->getCurrentColumnCount(), doc.getSyntaxDefinition().getLanguageName(), - TextDocument::lineEndingToString( doc.getLineEnding() ) ) ); + TextFormat::encodingToString( doc.getEncoding() ), doc.isBOM() ? " (with BOM)"sv : ""sv, + TextFormat::lineEndingToString( doc.getLineEnding() ) ) ); mDocInfo->debounce( [this, infoStr] { mDocInfo->setText( infoStr ); }, Time::Zero, String::hash( "ecode::doc_info::update" ) ); } diff --git a/src/tools/ecode/settingsmenu.cpp b/src/tools/ecode/settingsmenu.cpp index 2a62b6637..93438a323 100644 --- a/src/tools/ecode/settingsmenu.cpp +++ b/src/tools/ecode/settingsmenu.cpp @@ -297,21 +297,21 @@ UIMenu* SettingsMenu::createDocumentMenu() { UIPopUpMenu* lineEndingsMenu = UIPopUpMenu::New(); lineEndingsMenu ->addRadioButton( "Windows/DOS (CR/LF)", - mApp->getConfig().doc.lineEndings == TextDocument::LineEnding::CRLF ) + mApp->getConfig().doc.lineEndings == TextFormat::LineEnding::CRLF ) ->setId( "CRLF" ); lineEndingsMenu ->addRadioButton( "Unix (LF)", - mApp->getConfig().doc.lineEndings == TextDocument::LineEnding::LF ) + mApp->getConfig().doc.lineEndings == TextFormat::LineEnding::LF ) ->setId( "LF" ); lineEndingsMenu ->addRadioButton( "Macintosh (CR)", - mApp->getConfig().doc.lineEndings == TextDocument::LineEnding::CR ) + mApp->getConfig().doc.lineEndings == TextFormat::LineEnding::CR ) ->setId( "CR" ); mDocMenu->addSubMenu( i18n( "line_endings", "Line Endings" ), nullptr, lineEndingsMenu ) ->setId( "line_endings_cur" ); lineEndingsMenu->on( Event::OnItemClicked, [this]( const Event* event ) { auto le = - TextDocument::stringToLineEnding( event->getNode()->asType()->getId() ); + TextFormat::stringToLineEnding( event->getNode()->asType()->getId() ); if ( mSplitter->curEditorExistsAndFocused() ) { mSplitter->getCurEditor()->getDocument().setLineEnding( le ); mApp->updateDocInfo( mSplitter->getCurEditor()->getDocument() ); @@ -417,21 +417,21 @@ UIMenu* SettingsMenu::createDocumentMenu() { UIPopUpMenu* lineEndingsGlobalMenu = UIPopUpMenu::New(); lineEndingsGlobalMenu ->addRadioButton( "Windows/DOS (CR/LF)", - mApp->getConfig().doc.lineEndings == TextDocument::LineEnding::CRLF ) + mApp->getConfig().doc.lineEndings == TextFormat::LineEnding::CRLF ) ->setId( "CRLF" ); lineEndingsGlobalMenu ->addRadioButton( "Unix (LF)", - mApp->getConfig().doc.lineEndings == TextDocument::LineEnding::LF ) + mApp->getConfig().doc.lineEndings == TextFormat::LineEnding::LF ) ->setId( "LF" ); lineEndingsGlobalMenu ->addRadioButton( "Macintosh (CR)", - mApp->getConfig().doc.lineEndings == TextDocument::LineEnding::CR ) + mApp->getConfig().doc.lineEndings == TextFormat::LineEnding::CR ) ->setId( "CR" ); globalMenu->addSubMenu( i18n( "line_endings", "Line Endings" ), nullptr, lineEndingsGlobalMenu ) ->setId( "line_endings" ); lineEndingsGlobalMenu->on( Event::OnItemClicked, [this]( const Event* event ) { mApp->getConfig().doc.lineEndings = - TextDocument::stringToLineEnding( event->getNode()->asType()->getId() ); + TextFormat::stringToLineEnding( event->getNode()->asType()->getId() ); } ); UIPopUpMenu* bracketsMenu = UIPopUpMenu::New(); @@ -619,15 +619,15 @@ UIMenu* SettingsMenu::createDocumentMenu() { UIPopUpMenu* lineEndingsProjectMenu = UIPopUpMenu::New(); lineEndingsProjectMenu ->addRadioButton( "Windows (CR/LF)", mApp->getProjectDocConfig().doc.lineEndings == - TextDocument::LineEnding::CRLF ) + TextFormat::LineEnding::CRLF ) ->setId( "CRLF" ); lineEndingsProjectMenu ->addRadioButton( "Unix (LF)", mApp->getProjectDocConfig().doc.lineEndings == - TextDocument::LineEnding::LF ) + TextFormat::LineEnding::LF ) ->setId( "LF" ); lineEndingsProjectMenu ->addRadioButton( "Macintosh (CR)", mApp->getProjectDocConfig().doc.lineEndings == - TextDocument::LineEnding::CR ) + TextFormat::LineEnding::CR ) ->setId( "CR" ); mProjectDocMenu ->addSubMenu( i18n( "line_endings", "Line Endings" ), nullptr, lineEndingsProjectMenu ) @@ -635,7 +635,7 @@ UIMenu* SettingsMenu::createDocumentMenu() { ->setEnabled( !mApp->getProjectDocConfig().useGlobalSettings ); lineEndingsProjectMenu->on( Event::OnItemClicked, [this]( const Event* event ) { mApp->getProjectDocConfig().doc.lineEndings = - TextDocument::stringToLineEnding( event->getNode()->asType()->getId() ); + TextFormat::stringToLineEnding( event->getNode()->asType()->getId() ); } ); mProjectDocMenu @@ -1514,7 +1514,7 @@ void SettingsMenu::updateProjectSettingsMenu() { mProjectDocMenu->find( "line_endings" ) ->asType() ->getSubMenu() - ->find( TextDocument::lineEndingToString( mApp->getProjectDocConfig().doc.lineEndings ) ) + ->find( TextFormat::lineEndingToString( mApp->getProjectDocConfig().doc.lineEndings ) ) ->asType() ->setActive( true ); @@ -1590,12 +1590,12 @@ void SettingsMenu::updateDocumentMenu() { ->asType() ->setActive( doc.getForceNewLineAtEndOfFile() ); - mDocMenu->find( "write_bom_cur" )->asType()->setActive( doc.getBOM() ); + mDocMenu->find( "write_bom_cur" )->asType()->setActive( doc.isBOM() ); mDocMenu->find( "line_endings_cur" ) ->asType() ->getSubMenu() - ->find( TextDocument::lineEndingToString( doc.getLineEnding() ) ) + ->find( TextFormat::lineEndingToString( doc.getLineEnding() ) ) ->asType() ->setActive( true );