From c61abf37bfa53c8b2037dcebca51b316907896ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=ADn=20Lucas=20Golini?= Date: Sat, 24 Dec 2022 11:54:21 -0300 Subject: [PATCH] Added utf8cpp as a auxiliary helper library. Added UITooltip::getTooltipPosition. Fixes in System::Process. UICodeEditor: Performance improvements in minimap rendering. ecode: AutoCompleteModule fixes. Added coloring to tooltips from the LSP Client. LSP Client Server parsing improvements. --- include/eepp/core/string.hpp | 5 +- .../eepp/ui/doc/syntaxdefinitionmanager.hpp | 2 + include/eepp/ui/uitooltip.hpp | 9 + src/eepp/core/string.cpp | 7 +- src/eepp/system/process.cpp | 8 +- src/eepp/ui/doc/syntaxdefinitionmanager.cpp | 11 +- src/eepp/ui/doc/syntaxtokenizer.cpp | 2 +- src/eepp/ui/uicodeeditor.cpp | 17 +- src/eepp/ui/uitooltip.cpp | 56 ++- src/eepp/ui/uiwidget.cpp | 24 +- src/thirdparty/subprocess/subprocess.h | 6 + src/thirdparty/utf8cpp/utf8.h | 34 ++ src/thirdparty/utf8cpp/utf8/checked.h | 335 +++++++++++++++++ src/thirdparty/utf8cpp/utf8/core.h | 338 ++++++++++++++++++ src/thirdparty/utf8cpp/utf8/cpp11.h | 103 ++++++ src/thirdparty/utf8cpp/utf8/cpp17.h | 103 ++++++ src/thirdparty/utf8cpp/utf8/unchecked.h | 274 ++++++++++++++ .../autocomplete/autocompleteplugin.cpp | 50 ++- .../autocomplete/autocompleteplugin.hpp | 3 + .../ecode/plugins/linter/linterplugin.cpp | 4 +- .../ecode/plugins/lsp/lspclientplugin.cpp | 15 +- .../ecode/plugins/lsp/lspclientserver.cpp | 30 +- src/tools/ecode/projectsearch.cpp | 4 +- 23 files changed, 1379 insertions(+), 61 deletions(-) create mode 100644 src/thirdparty/utf8cpp/utf8.h create mode 100644 src/thirdparty/utf8cpp/utf8/checked.h create mode 100644 src/thirdparty/utf8cpp/utf8/core.h create mode 100644 src/thirdparty/utf8cpp/utf8/cpp11.h create mode 100644 src/thirdparty/utf8cpp/utf8/cpp17.h create mode 100644 src/thirdparty/utf8cpp/utf8/unchecked.h diff --git a/include/eepp/core/string.hpp b/include/eepp/core/string.hpp index 29661fa8e..dd68e35d4 100644 --- a/include/eepp/core/string.hpp +++ b/include/eepp/core/string.hpp @@ -325,7 +325,10 @@ class EE_API String { static String fromUtf8( const std::string& utf8String ); /** @return The number of codepoints of the utf8 string. */ - static size_t utf8StringLength( const std::string& utf8String ); + static size_t utf8Length( const std::string& utf8String ); + + /** @return The next character in a utf8 null terminated string */ + static Uint32 utf8Next( char*& utf8String ); /** @brief Default constructor ** This constructor creates an empty string. diff --git a/include/eepp/ui/doc/syntaxdefinitionmanager.hpp b/include/eepp/ui/doc/syntaxdefinitionmanager.hpp index 1c50d1840..b960a6e98 100644 --- a/include/eepp/ui/doc/syntaxdefinitionmanager.hpp +++ b/include/eepp/ui/doc/syntaxdefinitionmanager.hpp @@ -25,6 +25,8 @@ class EE_API SyntaxDefinitionManager { const SyntaxDefinition& getByLanguageName( const std::string& name ) const; + const SyntaxDefinition& getByLSPName( const std::string& name ) const; + const SyntaxDefinition& getByLanguageId( const String::HashType& id ) const; SyntaxDefinition& getByLanguageNameRef( const std::string& name ); diff --git a/include/eepp/ui/uitooltip.hpp b/include/eepp/ui/uitooltip.hpp index 432fedea5..e9f58be2d 100644 --- a/include/eepp/ui/uitooltip.hpp +++ b/include/eepp/ui/uitooltip.hpp @@ -16,6 +16,8 @@ class EE_API UITooltip : public UIWidget { public: static UITooltip* New(); + static Vector2f getTooltipPosition( UITooltip* toolip, Vector2f requestedPosition ); + UITooltip(); virtual ~UITooltip(); @@ -107,6 +109,12 @@ class EE_API UITooltip : public UIWidget { void setTextTransform( const TextTransform::Value& textTransform ); + Vector2f getTooltipPosition( Vector2f requestedPosition ); + + bool getUsingCustomStyling() const; + + void setUsingCustomStyling( bool usingCustomStyling ); + protected: Text* mTextCache; UIFontStyleConfig mStyleConfig; @@ -116,6 +124,7 @@ class EE_API UITooltip : public UIWidget { String mStringBuffer; TextTransform::Value mTextTransform{ TextTransform::None }; bool mDontAutoHideOnMouseMove{ false }; + bool mUsingCustomStyling{ false }; virtual void onAlphaChange(); diff --git a/src/eepp/core/string.cpp b/src/eepp/core/string.cpp index 7451140ec..ceff3da6d 100644 --- a/src/eepp/core/string.cpp +++ b/src/eepp/core/string.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace EE { @@ -933,10 +934,14 @@ static inline size_t utf8_length( const char* s, const char* e ) { return i; } -size_t String::utf8StringLength( const std::string& utf8String ) { +size_t String::utf8Length( const std::string& utf8String ) { return utf8_length( utf8String.c_str(), utf8String.c_str() + utf8String.length() ); } +Uint32 String::utf8Next( char*& utf8String ) { + return utf8::unchecked::next( utf8String ); +} + String::operator std::string() const { return toUtf8(); } diff --git a/src/eepp/system/process.cpp b/src/eepp/system/process.cpp index 1b13742a0..d736ebc63 100644 --- a/src/eepp/system/process.cpp +++ b/src/eepp/system/process.cpp @@ -34,12 +34,14 @@ Process::Process( const std::string& command, const Uint32& options, Process::~Process() { mShuttingDown = true; - destroy(); + if ( mProcess ) + destroy(); if ( mStdOutThread.joinable() ) mStdOutThread.join(); if ( mStdErrThread.joinable() ) mStdErrThread.join(); - kill(); + if ( mProcess && isAlive() ) + kill(); eeFree( mProcess ); } @@ -153,7 +155,7 @@ bool Process::join( int* const returnCodeOut ) { bool Process::kill() { eeASSERT( mProcess != nullptr ); - return 0 == subprocess_terminate( PROCESS_PTR ); + return ( PROCESS_PTR->child != 0 ) ? 0 == subprocess_terminate( PROCESS_PTR ) : false; } bool Process::destroy() { diff --git a/src/eepp/ui/doc/syntaxdefinitionmanager.cpp b/src/eepp/ui/doc/syntaxdefinitionmanager.cpp index 4ad5da2bf..f2eff84af 100644 --- a/src/eepp/ui/doc/syntaxdefinitionmanager.cpp +++ b/src/eepp/ui/doc/syntaxdefinitionmanager.cpp @@ -123,7 +123,7 @@ SyntaxDefinitionManager::SyntaxDefinitionManager() { } void SyntaxDefinitionManager::addPlainText() { - add( { "Plain Text", { "%.txt$" }, {} } ); + add( { "Plain Text", { "%.txt$" }, {}, {}, "", {}, "plaintext" } ); } void SyntaxDefinitionManager::addXML() { @@ -656,6 +656,7 @@ void SyntaxDefinitionManager::addPython() { "#", { "^#!.*[ /]python", "^#!.*[ /]python3" } } ); } + void SyntaxDefinitionManager::addBash() { add( { "Bash", { "%.sh$", "%.bash$", "%.bashrc$", "%.bash_profile$" }, @@ -3792,6 +3793,14 @@ SyntaxDefinitionManager::getByLanguageName( const std::string& name ) const { return mDefinitions[0]; } +const SyntaxDefinition& SyntaxDefinitionManager::getByLSPName( const std::string& name ) const { + for ( auto& style : mDefinitions ) { + if ( style.getLSPName() == name ) + return style; + } + return mDefinitions[0]; +} + const SyntaxDefinition& SyntaxDefinitionManager::getByLanguageId( const String::HashType& id ) const { for ( auto& style : mDefinitions ) { diff --git a/src/eepp/ui/doc/syntaxtokenizer.cpp b/src/eepp/ui/doc/syntaxtokenizer.cpp index 39df39c2d..3151c6345 100644 --- a/src/eepp/ui/doc/syntaxtokenizer.cpp +++ b/src/eepp/ui/doc/syntaxtokenizer.cpp @@ -317,7 +317,7 @@ Text& SyntaxTokenizer::tokenizeText( const SyntaxDefinition& syntax, size_t start = startIndex; for ( auto& token : tokens ) { if ( start < endIndex ) { - size_t strSize = String::utf8StringLength( token.text ); + size_t strSize = String::utf8Length( token.text ); text.setFillColor( colorScheme.getSyntaxStyle( token.type ).color, start, std::min( start + strSize, endIndex ) ); start += strSize; diff --git a/src/eepp/ui/uicodeeditor.cpp b/src/eepp/ui/uicodeeditor.cpp index c3aed63bd..b123c4909 100644 --- a/src/eepp/ui/uicodeeditor.cpp +++ b/src/eepp/ui/uicodeeditor.cpp @@ -3119,16 +3119,21 @@ void UICodeEditor::drawMinimap( const Vector2f& start, { rect.getWidth(), charHeight }, charSpacing, gutterWidth ); - auto& tokens = mHighlighter.getLine( index ); - for ( auto& token : tokens ) { - String text( token.text ); + const auto& tokens = mHighlighter.getLine( index ); + for ( const auto& token : tokens ) { if ( batchSyntaxType != token.type ) { flushBatch( batchSyntaxType ); batchSyntaxType = token.type; } - for ( size_t i = 0; i < text.size(); ++i ) { - String::StringBaseType ch = text[i]; + if ( token.text.empty() ) + continue; + + char* str = (char*)token.text.c_str(); + char* end = str + token.text.size() + 1; + + do { + Uint32 ch = String::utf8Next( str ); if ( ch == ' ' || ch == '\n' ) { flushBatch( token.type ); batchStart += charSpacing; @@ -3141,7 +3146,7 @@ void UICodeEditor::drawMinimap( const Vector2f& start, } else { batchWidth += charSpacing; } - } + } while ( str < end ); } flushBatch( "normal" ); diff --git a/src/eepp/ui/uitooltip.cpp b/src/eepp/ui/uitooltip.cpp index 300f5dfad..f4f26c070 100644 --- a/src/eepp/ui/uitooltip.cpp +++ b/src/eepp/ui/uitooltip.cpp @@ -13,6 +13,32 @@ UITooltip* UITooltip::New() { return eeNew( UITooltip, () ); } +Vector2f UITooltip::getTooltipPosition( UITooltip* toolip, Vector2f requestedPosition ) { + UISceneNode* uiSceneNode = toolip->getUISceneNode(); + + if ( NULL == uiSceneNode ) + return Vector2f::Zero; + + UIThemeManager* themeManager = toolip->getUISceneNode()->getUIThemeManager(); + if ( NULL == themeManager ) + return Vector2f::Zero; + + Vector2f pos = requestedPosition; + pos -= uiSceneNode->getScreenPos(); // TODO: Fix UISceneNode inside UISceneNode position + pos.x += themeManager->getCursorSize().x / 2; + pos.y += themeManager->getCursorSize().y; + + if ( pos.x + toolip->getPixelsSize().getWidth() > uiSceneNode->getPixelsSize().getWidth() ) { + pos.x = requestedPosition.x - toolip->getPixelsSize().getWidth() - 1; + } + + if ( pos.y + toolip->getPixelsSize().getHeight() > uiSceneNode->getPixelsSize().getHeight() ) { + pos.y = requestedPosition.y - toolip->getPixelsSize().getHeight() - 1; + } + + return pos; +} + UITooltip::UITooltip() : UIWidget( "tooltip" ), mAlignOffset( 0.f, 0.f ), mTooltipTime( Time::Zero ), mTooltipOf() { setFlags( UI_NODE_DEFAULT_FLAGS_CENTERED | UI_AUTO_PADDING | UI_AUTO_SIZE ); @@ -412,18 +438,33 @@ void UITooltip::setTextTransform( const TextTransform::Value& textTransform ) { } } +Vector2f UITooltip::getTooltipPosition( Vector2f requestedPosition ) { + return UITooltip::getTooltipPosition( this, requestedPosition ); +} + +bool UITooltip::getUsingCustomStyling() const { + return mUsingCustomStyling; +} + +void UITooltip::setUsingCustomStyling( bool usingCustomStyling ) { + mUsingCustomStyling = usingCustomStyling; +} + bool UITooltip::applyProperty( const StyleSheetProperty& attribute ) { if ( !checkPropertyDefinition( attribute ) ) return false; switch ( attribute.getPropertyDefinition()->getPropertyId() ) { case PropertyId::TextTransform: - setTextTransform( TextTransform::fromString( attribute.asString() ) ); + if ( !mUsingCustomStyling ) + setTextTransform( TextTransform::fromString( attribute.asString() ) ); case PropertyId::Color: - setFontColor( attribute.asColor() ); + if ( !mUsingCustomStyling ) + setFontColor( attribute.asColor() ); break; case PropertyId::ShadowColor: - setFontShadowColor( attribute.asColor() ); + if ( !mUsingCustomStyling ) + setFontShadowColor( attribute.asColor() ); break; case PropertyId::FontFamily: { Font* font = FontManager::instance()->getByName( attribute.asString() ); @@ -437,13 +478,16 @@ bool UITooltip::applyProperty( const StyleSheetProperty& attribute ) { setFontSize( attribute.asDpDimensionI() ); break; case PropertyId::FontStyle: - setFontStyle( attribute.asFontStyle() ); + if ( !mUsingCustomStyling ) + setFontStyle( attribute.asFontStyle() ); break; case PropertyId::TextStrokeWidth: - setOutlineThickness( PixelDensity::dpToPx( attribute.asDpDimension() ) ); + if ( !mUsingCustomStyling ) + setOutlineThickness( PixelDensity::dpToPx( attribute.asDpDimension() ) ); break; case PropertyId::TextStrokeColor: - setOutlineColor( attribute.asColor() ); + if ( !mUsingCustomStyling ) + setOutlineColor( attribute.asColor() ); break; case PropertyId::TextAlign: { std::string align = String::toLower( attribute.value() ); diff --git a/src/eepp/ui/uiwidget.cpp b/src/eepp/ui/uiwidget.cpp index d95b614e4..4d210e288 100644 --- a/src/eepp/ui/uiwidget.cpp +++ b/src/eepp/ui/uiwidget.cpp @@ -269,29 +269,9 @@ Uint32 UIWidget::onKeyDown( const KeyEvent& event ) { } Vector2f UIWidget::getTooltipPosition() { - EventDispatcher* eventDispatcher = getEventDispatcher(); - UIThemeManager* themeManager = getUISceneNode()->getUIThemeManager(); - - if ( NULL == eventDispatcher || NULL == themeManager ) + if ( NULL == getEventDispatcher() ) return Vector2f::Zero; - - UISceneNode* uiSceneNode = getUISceneNode(); - Vector2f pos = eventDispatcher->getMousePosf(); - pos -= uiSceneNode->getScreenPos(); // TODO: Fix UISceneNode inside UISceneNode position - pos.x += themeManager->getCursorSize().x; - pos.y += themeManager->getCursorSize().y; - - if ( pos.x + mTooltip->getPixelsSize().getWidth() > - eventDispatcher->getSceneNode()->getPixelsSize().getWidth() ) { - pos.x = eventDispatcher->getMousePos().x - mTooltip->getPixelsSize().getWidth(); - } - - if ( pos.y + mTooltip->getPixelsSize().getHeight() > - eventDispatcher->getSceneNode()->getPixelsSize().getHeight() ) { - pos.y = eventDispatcher->getMousePos().y - mTooltip->getPixelsSize().getHeight(); - } - - return pos; + return mTooltip->getTooltipPosition( getEventDispatcher()->getMousePosf() ); } void UIWidget::createStyle() { diff --git a/src/thirdparty/subprocess/subprocess.h b/src/thirdparty/subprocess/subprocess.h index bfb9bd186..054645821 100644 --- a/src/thirdparty/subprocess/subprocess.h +++ b/src/thirdparty/subprocess/subprocess.h @@ -1022,6 +1022,7 @@ int subprocess_join(struct subprocess_s *const process, } process->alive = 0; + process->child = 0; return 0; #else @@ -1105,10 +1106,14 @@ int subprocess_terminate(struct subprocess_s *const process) { windows_call_result = TerminateProcess(process->hProcess, killed_process_exit_code); success_terminate = (windows_call_result == 0) ? 1 : 0; + process->child = 0; + process->alive = 0; return success_terminate; #else int result; result = kill(process->child, 9); + process->child = 0; + process->alive = 0; return result; #endif } @@ -1242,6 +1247,7 @@ int subprocess_alive(struct subprocess_s *const process) { if (!is_alive) { process->alive = 0; + process->child = 0; } return is_alive; diff --git a/src/thirdparty/utf8cpp/utf8.h b/src/thirdparty/utf8cpp/utf8.h new file mode 100644 index 000000000..82b13f59f --- /dev/null +++ b/src/thirdparty/utf8cpp/utf8.h @@ -0,0 +1,34 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include "utf8/checked.h" +#include "utf8/unchecked.h" + +#endif // header guard diff --git a/src/thirdparty/utf8cpp/utf8/checked.h b/src/thirdparty/utf8cpp/utf8/checked.h new file mode 100644 index 000000000..9b0a98db8 --- /dev/null +++ b/src/thirdparty/utf8cpp/utf8/checked.h @@ -0,0 +1,335 @@ +// Copyright 2006-2016 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include "core.h" +#include + +namespace utf8 +{ + // Base for the exceptions that may be thrown from the library + class exception : public ::std::exception { + }; + + // Exceptions that may be thrown from the library functions. + class invalid_code_point : public exception { + uint32_t cp; + public: + invalid_code_point(uint32_t codepoint) : cp(codepoint) {} + virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid code point"; } + uint32_t code_point() const {return cp;} + }; + + class invalid_utf8 : public exception { + uint8_t u8; + public: + invalid_utf8 (uint8_t u) : u8(u) {} + virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-8"; } + uint8_t utf8_octet() const {return u8;} + }; + + class invalid_utf16 : public exception { + uint16_t u16; + public: + invalid_utf16 (uint16_t u) : u16(u) {} + virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-16"; } + uint16_t utf16_word() const {return u16;} + }; + + class not_enough_room : public exception { + public: + virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Not enough space"; } + }; + + /// The library API - functions intended to be called by the users + + template + octet_iterator append(uint32_t cp, octet_iterator result) + { + if (!utf8::internal::is_code_point_valid(cp)) + throw invalid_code_point(cp); + + if (cp < 0x80) // one octet + *(result++) = static_cast(cp); + else if (cp < 0x800) { // two octets + *(result++) = static_cast((cp >> 6) | 0xc0); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else if (cp < 0x10000) { // three octets + *(result++) = static_cast((cp >> 12) | 0xe0); + *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else { // four octets + *(result++) = static_cast((cp >> 18) | 0xf0); + *(result++) = static_cast(((cp >> 12) & 0x3f) | 0x80); + *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + return result; + } + + template + output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement) + { + while (start != end) { + octet_iterator sequence_start = start; + internal::utf_error err_code = utf8::internal::validate_next(start, end); + switch (err_code) { + case internal::UTF8_OK : + for (octet_iterator it = sequence_start; it != start; ++it) + *out++ = *it; + break; + case internal::NOT_ENOUGH_ROOM: + out = utf8::append (replacement, out); + start = end; + break; + case internal::INVALID_LEAD: + out = utf8::append (replacement, out); + ++start; + break; + case internal::INCOMPLETE_SEQUENCE: + case internal::OVERLONG_SEQUENCE: + case internal::INVALID_CODE_POINT: + out = utf8::append (replacement, out); + ++start; + // just one replacement mark for the sequence + while (start != end && utf8::internal::is_trail(*start)) + ++start; + break; + } + } + return out; + } + + template + inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out) + { + static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd); + return utf8::replace_invalid(start, end, out, replacement_marker); + } + + template + uint32_t next(octet_iterator& it, octet_iterator end) + { + uint32_t cp = 0; + internal::utf_error err_code = utf8::internal::validate_next(it, end, cp); + switch (err_code) { + case internal::UTF8_OK : + break; + case internal::NOT_ENOUGH_ROOM : + throw not_enough_room(); + case internal::INVALID_LEAD : + case internal::INCOMPLETE_SEQUENCE : + case internal::OVERLONG_SEQUENCE : + throw invalid_utf8(static_cast(*it)); + case internal::INVALID_CODE_POINT : + throw invalid_code_point(cp); + } + return cp; + } + + template + uint32_t peek_next(octet_iterator it, octet_iterator end) + { + return utf8::next(it, end); + } + + template + uint32_t prior(octet_iterator& it, octet_iterator start) + { + // can't do much if it == start + if (it == start) + throw not_enough_room(); + + octet_iterator end = it; + // Go back until we hit either a lead octet or start + while (utf8::internal::is_trail(*(--it))) + if (it == start) + throw invalid_utf8(*it); // error - no lead byte in the sequence + return utf8::peek_next(it, end); + } + + template + void advance (octet_iterator& it, distance_type n, octet_iterator end) + { + const distance_type zero(0); + if (n < zero) { + // backward + for (distance_type i = n; i < zero; ++i) + utf8::prior(it, end); + } else { + // forward + for (distance_type i = zero; i < n; ++i) + utf8::next(it, end); + } + } + + template + typename std::iterator_traits::difference_type + distance (octet_iterator first, octet_iterator last) + { + typename std::iterator_traits::difference_type dist; + for (dist = 0; first < last; ++dist) + utf8::next(first, last); + return dist; + } + + template + octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) + { + while (start != end) { + uint32_t cp = utf8::internal::mask16(*start++); + // Take care of surrogate pairs first + if (utf8::internal::is_lead_surrogate(cp)) { + if (start != end) { + uint32_t trail_surrogate = utf8::internal::mask16(*start++); + if (utf8::internal::is_trail_surrogate(trail_surrogate)) + cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; + else + throw invalid_utf16(static_cast(trail_surrogate)); + } + else + throw invalid_utf16(static_cast(cp)); + + } + // Lone trail surrogate + else if (utf8::internal::is_trail_surrogate(cp)) + throw invalid_utf16(static_cast(cp)); + + result = utf8::append(cp, result); + } + return result; + } + + template + u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) + { + while (start < end) { + uint32_t cp = utf8::next(start, end); + if (cp > 0xffff) { //make a surrogate pair + *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); + *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); + } + else + *result++ = static_cast(cp); + } + return result; + } + + template + octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) + { + while (start != end) + result = utf8::append(*(start++), result); + + return result; + } + + template + u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) + { + while (start < end) + (*result++) = utf8::next(start, end); + + return result; + } + + // The iterator class + template + class iterator { + octet_iterator it; + octet_iterator range_start; + octet_iterator range_end; + public: + typedef uint32_t value_type; + typedef uint32_t* pointer; + typedef uint32_t& reference; + typedef std::ptrdiff_t difference_type; + typedef std::bidirectional_iterator_tag iterator_category; + iterator () {} + explicit iterator (const octet_iterator& octet_it, + const octet_iterator& rangestart, + const octet_iterator& rangeend) : + it(octet_it), range_start(rangestart), range_end(rangeend) + { + if (it < range_start || it > range_end) + throw std::out_of_range("Invalid utf-8 iterator position"); + } + // the default "big three" are OK + octet_iterator base () const { return it; } + uint32_t operator * () const + { + octet_iterator temp = it; + return utf8::next(temp, range_end); + } + bool operator == (const iterator& rhs) const + { + if (range_start != rhs.range_start || range_end != rhs.range_end) + throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); + return (it == rhs.it); + } + bool operator != (const iterator& rhs) const + { + return !(operator == (rhs)); + } + iterator& operator ++ () + { + utf8::next(it, range_end); + return *this; + } + iterator operator ++ (int) + { + iterator temp = *this; + utf8::next(it, range_end); + return temp; + } + iterator& operator -- () + { + utf8::prior(it, range_start); + return *this; + } + iterator operator -- (int) + { + iterator temp = *this; + utf8::prior(it, range_start); + return temp; + } + }; // class iterator + +} // namespace utf8 + +#if UTF_CPP_CPLUSPLUS >= 201703L // C++ 17 or later +#include "cpp17.h" +#elif UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later +#include "cpp11.h" +#endif // C++ 11 or later + +#endif //header guard + diff --git a/src/thirdparty/utf8cpp/utf8/core.h b/src/thirdparty/utf8cpp/utf8/core.h new file mode 100644 index 000000000..de6199f2a --- /dev/null +++ b/src/thirdparty/utf8cpp/utf8/core.h @@ -0,0 +1,338 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include + +// Determine the C++ standard version. +// If the user defines UTF_CPP_CPLUSPLUS, use that. +// Otherwise, trust the unreliable predefined macro __cplusplus + +#if !defined UTF_CPP_CPLUSPLUS + #define UTF_CPP_CPLUSPLUS __cplusplus +#endif + +#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later + #define UTF_CPP_OVERRIDE override + #define UTF_CPP_NOEXCEPT noexcept +#else // C++ 98/03 + #define UTF_CPP_OVERRIDE + #define UTF_CPP_NOEXCEPT throw() +#endif // C++ 11 or later + + +namespace utf8 +{ + // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers + // You may need to change them to match your system. + // These typedefs have the same names as ones from cstdint, or boost/cstdint + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + +// Helper code - not intended to be directly called by the library users. May be changed at any time +namespace internal +{ + // Unicode constants + // Leading (high) surrogates: 0xd800 - 0xdbff + // Trailing (low) surrogates: 0xdc00 - 0xdfff + const uint16_t LEAD_SURROGATE_MIN = 0xd800u; + const uint16_t LEAD_SURROGATE_MAX = 0xdbffu; + const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u; + const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu; + const uint16_t LEAD_OFFSET = 0xd7c0u; // LEAD_SURROGATE_MIN - (0x10000 >> 10) + const uint32_t SURROGATE_OFFSET = 0xfca02400u; // 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN + + // Maximum valid value for a Unicode code point + const uint32_t CODE_POINT_MAX = 0x0010ffffu; + + template + inline uint8_t mask8(octet_type oc) + { + return static_cast(0xff & oc); + } + template + inline uint16_t mask16(u16_type oc) + { + return static_cast(0xffff & oc); + } + template + inline bool is_trail(octet_type oc) + { + return ((utf8::internal::mask8(oc) >> 6) == 0x2); + } + + template + inline bool is_lead_surrogate(u16 cp) + { + return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX); + } + + template + inline bool is_trail_surrogate(u16 cp) + { + return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); + } + + template + inline bool is_surrogate(u16 cp) + { + return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); + } + + template + inline bool is_code_point_valid(u32 cp) + { + return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp)); + } + + template + inline typename std::iterator_traits::difference_type + sequence_length(octet_iterator lead_it) + { + uint8_t lead = utf8::internal::mask8(*lead_it); + if (lead < 0x80) + return 1; + else if ((lead >> 5) == 0x6) + return 2; + else if ((lead >> 4) == 0xe) + return 3; + else if ((lead >> 3) == 0x1e) + return 4; + else + return 0; + } + + template + inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length) + { + if (cp < 0x80) { + if (length != 1) + return true; + } + else if (cp < 0x800) { + if (length != 2) + return true; + } + else if (cp < 0x10000) { + if (length != 3) + return true; + } + + return false; + } + + enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT}; + + /// Helper for get_sequence_x + template + utf_error increase_safely(octet_iterator& it, octet_iterator end) + { + if (++it == end) + return NOT_ENOUGH_ROOM; + + if (!utf8::internal::is_trail(*it)) + return INCOMPLETE_SEQUENCE; + + return UTF8_OK; + } + + #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;} + + /// get_sequence_x functions decode utf-8 sequences of the length x + template + utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + code_point = utf8::internal::mask8(*it); + + return UTF8_OK; + } + + template + utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + code_point = utf8::internal::mask8(*it); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f); + + return UTF8_OK; + } + + template + utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + code_point = utf8::internal::mask8(*it); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point += (*it) & 0x3f; + + return UTF8_OK; + } + + template + utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + code_point = utf8::internal::mask8(*it); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point += (utf8::internal::mask8(*it) << 6) & 0xfff; + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point += (*it) & 0x3f; + + return UTF8_OK; + } + + #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR + + template + utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + // Save the original value of it so we can go back in case of failure + // Of course, it does not make much sense with i.e. stream iterators + octet_iterator original_it = it; + + uint32_t cp = 0; + // Determine the sequence length based on the lead octet + typedef typename std::iterator_traits::difference_type octet_difference_type; + const octet_difference_type length = utf8::internal::sequence_length(it); + + // Get trail octets and calculate the code point + utf_error err = UTF8_OK; + switch (length) { + case 0: + return INVALID_LEAD; + case 1: + err = utf8::internal::get_sequence_1(it, end, cp); + break; + case 2: + err = utf8::internal::get_sequence_2(it, end, cp); + break; + case 3: + err = utf8::internal::get_sequence_3(it, end, cp); + break; + case 4: + err = utf8::internal::get_sequence_4(it, end, cp); + break; + } + + if (err == UTF8_OK) { + // Decoding succeeded. Now, security checks... + if (utf8::internal::is_code_point_valid(cp)) { + if (!utf8::internal::is_overlong_sequence(cp, length)){ + // Passed! Return here. + code_point = cp; + ++it; + return UTF8_OK; + } + else + err = OVERLONG_SEQUENCE; + } + else + err = INVALID_CODE_POINT; + } + + // Failure branch - restore the original value of the iterator + it = original_it; + return err; + } + + template + inline utf_error validate_next(octet_iterator& it, octet_iterator end) { + uint32_t ignored; + return utf8::internal::validate_next(it, end, ignored); + } + +} // namespace internal + + /// The library API - functions intended to be called by the users + + // Byte order mark + const uint8_t bom[] = {0xef, 0xbb, 0xbf}; + + template + octet_iterator find_invalid(octet_iterator start, octet_iterator end) + { + octet_iterator result = start; + while (result != end) { + utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end); + if (err_code != internal::UTF8_OK) + return result; + } + return result; + } + + template + inline bool is_valid(octet_iterator start, octet_iterator end) + { + return (utf8::find_invalid(start, end) == end); + } + + template + inline bool starts_with_bom (octet_iterator it, octet_iterator end) + { + return ( + ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) && + ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) && + ((it != end) && (utf8::internal::mask8(*it)) == bom[2]) + ); + } +} // namespace utf8 + +#endif // header guard + + diff --git a/src/thirdparty/utf8cpp/utf8/cpp11.h b/src/thirdparty/utf8cpp/utf8/cpp11.h new file mode 100644 index 000000000..2366f1291 --- /dev/null +++ b/src/thirdparty/utf8cpp/utf8/cpp11.h @@ -0,0 +1,103 @@ +// Copyright 2018 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1 +#define UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1 + +#include "checked.h" +#include + +namespace utf8 +{ + + inline void append(char32_t cp, std::string& s) + { + append(uint32_t(cp), std::back_inserter(s)); + } + + inline std::string utf16to8(const std::u16string& s) + { + std::string result; + utf16to8(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::u16string utf8to16(const std::string& s) + { + std::u16string result; + utf8to16(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::string utf32to8(const std::u32string& s) + { + std::string result; + utf32to8(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::u32string utf8to32(const std::string& s) + { + std::u32string result; + utf8to32(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::size_t find_invalid(const std::string& s) + { + std::string::const_iterator invalid = find_invalid(s.begin(), s.end()); + return (invalid == s.end()) ? std::string::npos : static_cast(invalid - s.begin()); + } + + inline bool is_valid(const std::string& s) + { + return is_valid(s.begin(), s.end()); + } + + inline std::string replace_invalid(const std::string& s, char32_t replacement) + { + std::string result; + replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement); + return result; + } + + inline std::string replace_invalid(const std::string& s) + { + std::string result; + replace_invalid(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline bool starts_with_bom(const std::string& s) + { + return starts_with_bom(s.begin(), s.end()); + } + +} // namespace utf8 + +#endif // header guard + diff --git a/src/thirdparty/utf8cpp/utf8/cpp17.h b/src/thirdparty/utf8cpp/utf8/cpp17.h new file mode 100644 index 000000000..32a77ce30 --- /dev/null +++ b/src/thirdparty/utf8cpp/utf8/cpp17.h @@ -0,0 +1,103 @@ +// Copyright 2018 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_7e906c01_03a3_4daf_b420_ea7ea952b3c9 +#define UTF8_FOR_CPP_7e906c01_03a3_4daf_b420_ea7ea952b3c9 + +#include "checked.h" +#include + +namespace utf8 +{ + + inline void append(char32_t cp, std::string& s) + { + append(uint32_t(cp), std::back_inserter(s)); + } + + inline std::string utf16to8(std::u16string_view s) + { + std::string result; + utf16to8(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::u16string utf8to16(std::string_view s) + { + std::u16string result; + utf8to16(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::string utf32to8(std::u32string_view s) + { + std::string result; + utf32to8(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::u32string utf8to32(std::string_view s) + { + std::u32string result; + utf8to32(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline std::size_t find_invalid(std::string_view s) + { + std::string_view::const_iterator invalid = find_invalid(s.begin(), s.end()); + return (invalid == s.end()) ? std::string_view::npos : static_cast(invalid - s.begin()); + } + + inline bool is_valid(std::string_view s) + { + return is_valid(s.begin(), s.end()); + } + + inline std::string replace_invalid(std::string_view s, char32_t replacement) + { + std::string result; + replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement); + return result; + } + + inline std::string replace_invalid(std::string_view s) + { + std::string result; + replace_invalid(s.begin(), s.end(), std::back_inserter(result)); + return result; + } + + inline bool starts_with_bom(std::string_view s) + { + return starts_with_bom(s.begin(), s.end()); + } + +} // namespace utf8 + +#endif // header guard + diff --git a/src/thirdparty/utf8cpp/utf8/unchecked.h b/src/thirdparty/utf8cpp/utf8/unchecked.h new file mode 100644 index 000000000..0e1b51cc7 --- /dev/null +++ b/src/thirdparty/utf8cpp/utf8/unchecked.h @@ -0,0 +1,274 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include "core.h" + +namespace utf8 +{ + namespace unchecked + { + template + octet_iterator append(uint32_t cp, octet_iterator result) + { + if (cp < 0x80) // one octet + *(result++) = static_cast(cp); + else if (cp < 0x800) { // two octets + *(result++) = static_cast((cp >> 6) | 0xc0); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else if (cp < 0x10000) { // three octets + *(result++) = static_cast((cp >> 12) | 0xe0); + *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else { // four octets + *(result++) = static_cast((cp >> 18) | 0xf0); + *(result++) = static_cast(((cp >> 12) & 0x3f)| 0x80); + *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + return result; + } + + template + output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement) + { + while (start != end) { + octet_iterator sequence_start = start; + internal::utf_error err_code = utf8::internal::validate_next(start, end); + switch (err_code) { + case internal::UTF8_OK : + for (octet_iterator it = sequence_start; it != start; ++it) + *out++ = *it; + break; + case internal::NOT_ENOUGH_ROOM: + out = utf8::unchecked::append (replacement, out); + start = end; + break; + case internal::INVALID_LEAD: + out = utf8::unchecked::append (replacement, out); + ++start; + break; + case internal::INCOMPLETE_SEQUENCE: + case internal::OVERLONG_SEQUENCE: + case internal::INVALID_CODE_POINT: + out = utf8::unchecked::append (replacement, out); + ++start; + // just one replacement mark for the sequence + while (start != end && utf8::internal::is_trail(*start)) + ++start; + break; + } + } + return out; + } + + template + inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out) + { + static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd); + return utf8::unchecked::replace_invalid(start, end, out, replacement_marker); + } + + template + uint32_t next(octet_iterator& it) + { + uint32_t cp = utf8::internal::mask8(*it); + typename std::iterator_traits::difference_type length = utf8::internal::sequence_length(it); + switch (length) { + case 1: + break; + case 2: + it++; + cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); + break; + case 3: + ++it; + cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); + ++it; + cp += (*it) & 0x3f; + break; + case 4: + ++it; + cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); + ++it; + cp += (utf8::internal::mask8(*it) << 6) & 0xfff; + ++it; + cp += (*it) & 0x3f; + break; + } + ++it; + return cp; + } + + template + uint32_t peek_next(octet_iterator it) + { + return utf8::unchecked::next(it); + } + + template + uint32_t prior(octet_iterator& it) + { + while (utf8::internal::is_trail(*(--it))) ; + octet_iterator temp = it; + return utf8::unchecked::next(temp); + } + + template + void advance (octet_iterator& it, distance_type n) + { + const distance_type zero(0); + if (n < zero) { + // backward + for (distance_type i = n; i < zero; ++i) + utf8::unchecked::prior(it); + } else { + // forward + for (distance_type i = zero; i < n; ++i) + utf8::unchecked::next(it); + } + } + + template + typename std::iterator_traits::difference_type + distance (octet_iterator first, octet_iterator last) + { + typename std::iterator_traits::difference_type dist; + for (dist = 0; first < last; ++dist) + utf8::unchecked::next(first); + return dist; + } + + template + octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) + { + while (start != end) { + uint32_t cp = utf8::internal::mask16(*start++); + // Take care of surrogate pairs first + if (utf8::internal::is_lead_surrogate(cp)) { + uint32_t trail_surrogate = utf8::internal::mask16(*start++); + cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; + } + result = utf8::unchecked::append(cp, result); + } + return result; + } + + template + u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) + { + while (start < end) { + uint32_t cp = utf8::unchecked::next(start); + if (cp > 0xffff) { //make a surrogate pair + *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); + *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); + } + else + *result++ = static_cast(cp); + } + return result; + } + + template + octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) + { + while (start != end) + result = utf8::unchecked::append(*(start++), result); + + return result; + } + + template + u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) + { + while (start < end) + (*result++) = utf8::unchecked::next(start); + + return result; + } + + // The iterator class + template + class iterator { + octet_iterator it; + public: + typedef uint32_t value_type; + typedef uint32_t* pointer; + typedef uint32_t& reference; + typedef std::ptrdiff_t difference_type; + typedef std::bidirectional_iterator_tag iterator_category; + iterator () {} + explicit iterator (const octet_iterator& octet_it): it(octet_it) {} + // the default "big three" are OK + octet_iterator base () const { return it; } + uint32_t operator * () const + { + octet_iterator temp = it; + return utf8::unchecked::next(temp); + } + bool operator == (const iterator& rhs) const + { + return (it == rhs.it); + } + bool operator != (const iterator& rhs) const + { + return !(operator == (rhs)); + } + iterator& operator ++ () + { + ::std::advance(it, utf8::internal::sequence_length(it)); + return *this; + } + iterator operator ++ (int) + { + iterator temp = *this; + ::std::advance(it, utf8::internal::sequence_length(it)); + return temp; + } + iterator& operator -- () + { + utf8::unchecked::prior(it); + return *this; + } + iterator operator -- (int) + { + iterator temp = *this; + utf8::unchecked::prior(it); + return temp; + } + }; // class iterator + + } // namespace utf8::unchecked +} // namespace utf8 + + +#endif // header guard + diff --git a/src/tools/ecode/plugins/autocomplete/autocompleteplugin.cpp b/src/tools/ecode/plugins/autocomplete/autocompleteplugin.cpp index ac70bc7b9..bf60d74d3 100644 --- a/src/tools/ecode/plugins/autocomplete/autocompleteplugin.cpp +++ b/src/tools/ecode/plugins/autocomplete/autocompleteplugin.cpp @@ -384,19 +384,38 @@ bool AutoCompletePlugin::onTextInput( UICodeEditor* editor, const TextInputEvent } void AutoCompletePlugin::updateDocCache( TextDocument* doc ) { - Lock l( mDocMutex ); + { + Lock lu( mDocsUpdatingMutex ); + mDocsUpdating[doc] = true; + } Clock clock; - auto docCache = mDocCache.find( doc ); - if ( docCache == mDocCache.end() || mClosing ) - return; - auto& cache = docCache->second; - cache.changeId = doc->getCurrentChangeId(); - cache.symbols = getDocumentSymbols( doc ); + std::unordered_map::iterator docCache; + { + Lock l( mDocMutex ); + docCache = mDocCache.find( doc ); + if ( docCache == mDocCache.end() || mClosing ) + return; + } + + auto changeId = doc->getCurrentChangeId(); + auto symbols = getDocumentSymbols( doc ); + + { + Lock l( mDocMutex ); + docCache = mDocCache.find( doc ); + if ( docCache == mDocCache.end() || mClosing ) + return; + auto& cache = docCache->second; + cache.changeId = changeId; + cache.symbols = std::move( symbols ); + } + std::string langName( doc->getSyntaxDefinition().getLanguageName() ); { Lock l( mLangSymbolsMutex ); auto& lang = mLangCache[langName]; lang.clear(); + Lock l2( mDocMutex ); for ( const auto& d : mDocCache ) { if ( d.first->getSyntaxDefinition().getLanguageName() == langName ) lang.insert( lang.end(), d.second.symbols.begin(), d.second.symbols.end() ); @@ -404,6 +423,10 @@ void AutoCompletePlugin::updateDocCache( TextDocument* doc ) { } Log::debug( "Dictionary for %s updated in: %.2fms", doc->getFilename().c_str(), clock.getElapsedTime().asMilliseconds() ); + { + Lock lu( mDocsUpdatingMutex ); + mDocsUpdating[doc] = false; + } } void AutoCompletePlugin::updateLangCache( const std::string& langName ) { @@ -558,6 +581,13 @@ void AutoCompletePlugin::update( UICodeEditor* ) { Lock l( mDocMutex ); for ( auto& doc : mDocs ) { if ( !doc->isLoading() && mDocCache[doc].changeId != doc->getCurrentChangeId() ) { + { + Lock lu( mDocsUpdatingMutex ); + auto du = mDocsUpdating.find( doc ); + // Dont update the document cache if it's still updating the document + if ( du != mDocsUpdating.end() && du->second == true ) + continue; + } #if AUTO_COMPLETE_THREADED mPool->run( [&, doc] { updateDocCache( doc ); }, [] {} ); #else @@ -910,7 +940,11 @@ AutoCompletePlugin::SymbolsList AutoCompletePlugin::getDocumentSymbols( TextDocu // Ignore the symbol if is actually the current symbol being written if ( matchStr.size() < 3 || ( end.line() == i && current == matchStr ) ) continue; - symbols.push_back( std::move( matchStr ) ); + if ( std::none_of( symbols.begin(), symbols.end(), + [matchStr]( const Suggestion& suggestion ) { + return suggestion.text == matchStr; + } ) ) + symbols.push_back( std::move( matchStr ) ); } if ( mClosing ) break; diff --git a/src/tools/ecode/plugins/autocomplete/autocompleteplugin.hpp b/src/tools/ecode/plugins/autocomplete/autocompleteplugin.hpp index f0c0afdc3..a5fdd2bb8 100644 --- a/src/tools/ecode/plugins/autocomplete/autocompleteplugin.hpp +++ b/src/tools/ecode/plugins/autocomplete/autocompleteplugin.hpp @@ -145,6 +145,9 @@ class AutoCompletePlugin : public UICodeEditorPlugin { Mutex mHandlesMutex; std::map> mHandles; + std::map> mDocsUpdating; + Mutex mDocsUpdatingMutex; + Float mRowHeight{ 0 }; Rectf mBoxRect; diff --git a/src/tools/ecode/plugins/linter/linterplugin.cpp b/src/tools/ecode/plugins/linter/linterplugin.cpp index 37668e4bf..b5918a7ad 100644 --- a/src/tools/ecode/plugins/linter/linterplugin.cpp +++ b/src/tools/ecode/plugins/linter/linterplugin.cpp @@ -741,8 +741,8 @@ bool LinterPlugin::onMouseMove( UICodeEditor* editor, const Vector2i& pos, const editor->setTooltipText( match.text ); editor->getTooltip()->setHorizontalAlign( UI_HALIGN_LEFT ); editor->getTooltip()->setDontAutoHideOnMouseMove( true ); - editor->getTooltip()->setPixelsPosition( pos.asFloat() + - PixelDensity::dpToPx( 1.f ) ); + editor->getTooltip()->setPixelsPosition( + editor->getTooltip()->getTooltipPosition( pos.asFloat() ) ); if ( !editor->getTooltip()->isVisible() ) editor->getTooltip()->show(); } ); diff --git a/src/tools/ecode/plugins/lsp/lspclientplugin.cpp b/src/tools/ecode/plugins/lsp/lspclientplugin.cpp index 42c5576b2..d1bc5d703 100644 --- a/src/tools/ecode/plugins/lsp/lspclientplugin.cpp +++ b/src/tools/ecode/plugins/lsp/lspclientplugin.cpp @@ -511,8 +511,21 @@ bool LSPClientPlugin::onMouseMove( UICodeEditor* editor, const Vector2i& positio editor->setTooltipText( resp.contents[0].value ); editor->getTooltip()->setHorizontalAlign( UI_HALIGN_LEFT ); editor->getTooltip()->setPixelsPosition( - position.asFloat() + PixelDensity::dpToPx( 1.f ) ); + editor->getTooltip()->getTooltipPosition( + position.asFloat() ) ); editor->getTooltip()->setDontAutoHideOnMouseMove( true ); + editor->getTooltip()->setUsingCustomStyling( true ); + + const auto& syntaxDef = + resp.contents[0].kind == LSPMarkupKind::MarkDown + ? SyntaxDefinitionManager::instance()->getByLSPName( + "markdown" ) + : editor->getSyntaxDefinition(); + + SyntaxTokenizer::tokenizeText( + syntaxDef, editor->getColorScheme(), + *editor->getTooltip()->getTextCache() ); + if ( editor->hasFocus() && !editor->getTooltip()->isVisible() ) editor->getTooltip()->show(); } diff --git a/src/tools/ecode/plugins/lsp/lspclientserver.cpp b/src/tools/ecode/plugins/lsp/lspclientserver.cpp index c35bde07a..3214167a9 100644 --- a/src/tools/ecode/plugins/lsp/lspclientserver.cpp +++ b/src/tools/ecode/plugins/lsp/lspclientserver.cpp @@ -74,6 +74,13 @@ static json newResponse( const std::string& method, const PluginIDType& id ) { return j; } +static std::string jsonString( const json& container, const std::string& member, + const std::string& def ) { + return container.is_object() && container.contains( member ) && container[member].is_string() + ? container.at( member ).get() + : def; +} + static json textDocumentURI( const URI& document ) { return json{ { MEMBER_URI, document.toString() } }; } @@ -667,23 +674,25 @@ static LSPCompletionList parseDocumentCompletion( const json& result ) { LSPCompletionList ret; if ( result.empty() ) return {}; +#ifndef EE_DEBUG try { +#endif ret.isIncomplete = result.contains( "isIncomplete" ) ? result["isIncomplete"].get() : false; const json& items = ( result.is_object() && result.contains( "items" ) ) ? result["items"] : result; for ( const auto& item : items ) { - auto label = item.value( MEMBER_LABEL, "" ); - auto detail = item.value( MEMBER_DETAIL, "" ); + auto label = jsonString( item, MEMBER_LABEL, "" ); + auto detail = jsonString( item, MEMBER_DETAIL, "" ); LSPMarkupContent doc = item.contains( MEMBER_DOCUMENTATION ) ? parseMarkupContent( item.at( MEMBER_DOCUMENTATION ) ) : LSPMarkupContent{}; - auto filterText = item.value( "filterText", label ); - auto insertText = item.value( "insertText", label ); - auto sortText = item.value( "sortText", label ); + auto filterText = jsonString( item, "filterText", label ); + auto insertText = jsonString( item, "insertText", label ); + auto sortText = jsonString( item, "sortText", label ); LSPTextEdit textEdit; - if ( item.contains( "textEdit" ) ) + if ( item.contains( "textEdit" ) && !item["textEdit"].is_null() ) textEdit = parseTextEdit( item["textEdit"] ); auto kind = static_cast( item.value( MEMBER_KIND, 1 ) ); const std::vector additionalTextEdits = @@ -694,9 +703,11 @@ static LSPCompletionList parseDocumentCompletion( const json& result ) { ret.items.push_back( { label, kind, detail, doc, sortText, insertText, filterText, textEdit, additionalTextEdits } ); } +#ifndef EE_DEBUG } catch ( const json::exception& err ) { Log::warning( "Error parsing parseDocumentCompletion: %s", err.what() ); } +#endif return ret; } @@ -735,7 +746,9 @@ static LSPSignatureInformation parseSignatureInformation( const json& json ) { static LSPSignatureHelp parseSignatureHelp( const json& sig ) { LSPSignatureHelp ret; +#ifndef EE_DEBUG try { +#endif const auto& sigInfos = sig.at( "signatures" ); for ( const auto& info : sigInfos ) ret.signatures.push_back( parseSignatureInformation( info ) ); @@ -748,9 +761,11 @@ static LSPSignatureHelp parseSignatureHelp( const json& sig ) { eemin( ret.activeParameter, (int)ret.signatures.at( ret.activeSignature ).parameters.size() ); } +#ifndef EE_DEBUG } catch ( const json::exception& err ) { Log::warning( "Error parsing parseSignatureHelp: %s", err.what() ); } +#endif return ret; } @@ -801,7 +816,7 @@ void LSPClientServer::initialize() { { "semanticTokens", semanticTokens }, { "synchronization", json{ { "didSave", true } } }, { "selectionRange", json{ { "dynamicRegistration", false } } }, - { "hover", json{ { "contentFormat", { "plaintext" } } } } }, + { "hover", json{ { "contentFormat", { "plaintext", "markdown" } } } } }, }, { "window", json{ { "workDoneProgress", true } } }, { "general", json{ { "positionEncodings", json::array( { "utf-32" } ) } } } }; @@ -1158,6 +1173,7 @@ void LSPClientServer::publishDiagnostics( const json& msg ) { } Log::debug( "LSPClientServer::publishDiagnostics: %s - returned %zu items", res.uri.toString().c_str(), res.diagnostics.size() ); + Log::info( "LSPClientServer::publishDiagnostics: %s", msg.dump().c_str() ); } void LSPClientServer::workDoneProgress( const LSPWorkDoneProgressParams& workDoneParams ) { diff --git a/src/tools/ecode/projectsearch.cpp b/src/tools/ecode/projectsearch.cpp index 44df205f8..f9bf04f28 100644 --- a/src/tools/ecode/projectsearch.cpp +++ b/src/tools/ecode/projectsearch.cpp @@ -29,7 +29,7 @@ static String textLine( const std::string& fileText, const size_t& fromPos, size nlStartPtr++; while ( ++endPtr && *endPtr != '\0' && *endPtr != '\n' ) { } - relCol = String::utf8StringLength( + relCol = String::utf8Length( fileText.substr( nlStartPtr - stringStartPtr, startPtr - nlStartPtr ) ); // if the line to substract is massive we only get the fist kilobyte of that line, since the // line is only shared for visual aid. @@ -68,7 +68,7 @@ searchInFileHorspool( const std::string& file, const std::string& text, const bo res.push_back( { str, { { (Int64)totNl, (Int64)relCol }, - { (Int64)totNl, (Int64)( relCol + String::utf8StringLength( text ) ) } }, + { (Int64)totNl, (Int64)( relCol + String::utf8Length( text ) ) } }, searchRes, static_cast( searchRes + text.size() ) } ); lSearchRes = searchRes;