From d95f2d8944aa4c3fb41bd8b3759b73f39064bf2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=ADn=20Lucas=20Golini?= Date: Sat, 23 May 2020 06:17:56 -0300 Subject: [PATCH] Syntax Highlighting WIP. --- include/eepp/ui/doc/syntaxdefinition.hpp | 44 ++ .../eepp/ui/doc/syntaxdefinitionmanager.hpp | 28 ++ include/eepp/ui/doc/syntaxhighlighter.hpp | 37 ++ include/eepp/ui/doc/syntaxstyle.hpp | 29 ++ include/eepp/ui/doc/syntaxtokenizer.hpp | 27 ++ include/eepp/ui/doc/textdocument.hpp | 4 + include/eepp/ui/uicodeeditor.hpp | 4 + premake4.lua | 11 +- premake5.lua | 11 +- projects/android-project/app/jni/eepp.mk | 5 +- projects/linux/ee.files | 10 + projects/linux/ee.includes | 2 + src/eepp/ui/doc/syntaxdefinition.cpp | 37 ++ src/eepp/ui/doc/syntaxdefinitionmanager.cpp | 52 +++ src/eepp/ui/doc/syntaxhighlighter.cpp | 49 +++ src/eepp/ui/doc/syntaxstyle.cpp | 39 ++ src/eepp/ui/doc/syntaxtokenizer.cpp | 116 +++++ src/eepp/ui/doc/textdocument.cpp | 16 +- src/eepp/ui/uicodeeditor.cpp | 20 +- src/eepp/window/engine.cpp | 3 + .../ui_hello_world/ui_hello_world.cpp | 18 +- src/thirdparty/rx-cpp/COPYING | 19 + src/thirdparty/rx-cpp/lua-str.c | 411 ++++++++++++++++++ src/thirdparty/rx-cpp/lua-str.h | 22 + src/thirdparty/rx-cpp/rx.cpp | 356 +++++++++++++++ src/thirdparty/rx-cpp/rx.h | 235 ++++++++++ 26 files changed, 1588 insertions(+), 17 deletions(-) create mode 100644 include/eepp/ui/doc/syntaxdefinition.hpp create mode 100644 include/eepp/ui/doc/syntaxdefinitionmanager.hpp create mode 100644 include/eepp/ui/doc/syntaxhighlighter.hpp create mode 100644 include/eepp/ui/doc/syntaxstyle.hpp create mode 100644 include/eepp/ui/doc/syntaxtokenizer.hpp create mode 100644 src/eepp/ui/doc/syntaxdefinition.cpp create mode 100644 src/eepp/ui/doc/syntaxdefinitionmanager.cpp create mode 100644 src/eepp/ui/doc/syntaxhighlighter.cpp create mode 100644 src/eepp/ui/doc/syntaxstyle.cpp create mode 100644 src/eepp/ui/doc/syntaxtokenizer.cpp create mode 100644 src/thirdparty/rx-cpp/COPYING create mode 100644 src/thirdparty/rx-cpp/lua-str.c create mode 100644 src/thirdparty/rx-cpp/lua-str.h create mode 100644 src/thirdparty/rx-cpp/rx.cpp create mode 100644 src/thirdparty/rx-cpp/rx.h diff --git a/include/eepp/ui/doc/syntaxdefinition.hpp b/include/eepp/ui/doc/syntaxdefinition.hpp new file mode 100644 index 000000000..eae2cbebd --- /dev/null +++ b/include/eepp/ui/doc/syntaxdefinition.hpp @@ -0,0 +1,44 @@ +#ifndef EE_UI_DOC_DEFINITION_HPP +#define EE_UI_DOC_DEFINITION_HPP + +#include +#include +#include + +namespace EE { namespace UI { namespace Doc { + +struct EE_API SyntaxPattern { + std::vector patterns; + std::string type; +}; + +class EE_API SyntaxDefinition { + public: + SyntaxDefinition(); + + SyntaxDefinition( const std::vector& files, + const std::vector& patterns, + const std::unordered_map& symbols = + std::unordered_map(), + const std::string& comment = "" ); + + const std::vector& getFiles() const; + + const std::vector& getPatterns() const; + + const std::string& getComment() const; + + const std::unordered_map& getSymbols() const; + + std::string getSymbol( const std::string& symbol ) const; + + protected: + std::vector mFiles; + std::vector mPatterns; + std::unordered_map mSymbols; + std::string mComment; +}; + +}}} // namespace EE::UI::Doc + +#endif // EE_UI_DOC_SYNTAXSTYLE_HPP diff --git a/include/eepp/ui/doc/syntaxdefinitionmanager.hpp b/include/eepp/ui/doc/syntaxdefinitionmanager.hpp new file mode 100644 index 000000000..ba12d5a51 --- /dev/null +++ b/include/eepp/ui/doc/syntaxdefinitionmanager.hpp @@ -0,0 +1,28 @@ +#ifndef EE_UI_DOC_SYNTAXSTYLEMANAGER_HPP +#define EE_UI_DOC_SYNTAXSTYLEMANAGER_HPP + +#include +#include +#include +#include + +namespace EE { namespace UI { namespace Doc { + +class EE_API SyntaxDefinitionManager { + SINGLETON_DECLARE_HEADERS( SyntaxDefinitionManager ) + public: + void add( SyntaxDefinition&& syntaxStyle ); + + const SyntaxDefinition& getPlainStyle() const; + + const SyntaxDefinition& getStyleByExtension( const std::string& fileName ) const; + + protected: + SyntaxDefinitionManager(); + + std::vector mStyles; +}; + +}}} // namespace EE::UI::Doc + +#endif // EE_UI_DOC_SYNTAXSTYLEMANAGER_HPP diff --git a/include/eepp/ui/doc/syntaxhighlighter.hpp b/include/eepp/ui/doc/syntaxhighlighter.hpp new file mode 100644 index 000000000..0a127a4e1 --- /dev/null +++ b/include/eepp/ui/doc/syntaxhighlighter.hpp @@ -0,0 +1,37 @@ +#ifndef EE_UI_DOC_SYNTAXHIGHLIGHTER_HPP +#define EE_UI_DOC_SYNTAXHIGHLIGHTER_HPP + +#include +#include +#include + +namespace EE { namespace UI { namespace Doc { + +struct TokenizedLine { + int initState; + String text; + std::vector tokens; + int state; +}; + +class EE_API SyntaxHighlighter { + public: + SyntaxHighlighter( TextDocument* doc ); + + void reset(); + + void invalidate( const size_t& line ); + + const std::vector& getLine( const size_t& index ); + + protected: + TextDocument* mDoc; + size_t mFirstInvalidLine{0}; + size_t mMaxWantedLine{0}; + std::map mLines; + TokenizedLine tokenizeLine( const size_t& line, const int& state ); +}; + +}}} // namespace EE::UI::Doc + +#endif // EE_UI_DOC_SYNTAXHIGHLIGHTER_HPP diff --git a/include/eepp/ui/doc/syntaxstyle.hpp b/include/eepp/ui/doc/syntaxstyle.hpp new file mode 100644 index 000000000..4e94fd0a6 --- /dev/null +++ b/include/eepp/ui/doc/syntaxstyle.hpp @@ -0,0 +1,29 @@ +#ifndef EE_UI_DOC_SYNTAXSTYLE_HPP +#define EE_UI_DOC_SYNTAXSTYLE_HPP + +#include +#include + +using namespace EE::System; + +namespace EE { namespace UI { namespace Doc { + +class EE_API SyntaxStyle { + public: + static SyntaxStyle getDefault(); + + SyntaxStyle(); + + const Color& getColor( const std::string& type ) const; + + void setColors( const std::unordered_map& colors ); + + void setColor( const std::string& type, const Color& color ); + + protected: + std::unordered_map mColors; +}; + +}}} // namespace EE::UI::Doc + +#endif // EE_UI_DOC_SYNTAXSTYLE_HPP diff --git a/include/eepp/ui/doc/syntaxtokenizer.hpp b/include/eepp/ui/doc/syntaxtokenizer.hpp new file mode 100644 index 000000000..39deff32c --- /dev/null +++ b/include/eepp/ui/doc/syntaxtokenizer.hpp @@ -0,0 +1,27 @@ +#ifndef EE_UI_DOC_SYNTAXTOKENIZER_HPP +#define EE_UI_DOC_SYNTAXTOKENIZER_HPP + +#include +#include +#include +#include + +namespace EE { namespace UI { namespace Doc { + +struct EE_API SyntaxToken { + std::string type; + std::string text; +}; + +#define SYNTAX_TOKENIZER_STATE_NONE ( -1 ) + +class EE_API SyntaxTokenizer { + public: + std::pair, int> static tokenize( const SyntaxDefinition& syntax, + const std::string& text, + const int& state ); +}; + +}}} // namespace EE::UI::Doc + +#endif // EE_UI_DOC_SYNTAXTOKENIZER_HPP diff --git a/include/eepp/ui/doc/textdocument.hpp b/include/eepp/ui/doc/textdocument.hpp index d202fda2e..c162ffb76 100644 --- a/include/eepp/ui/doc/textdocument.hpp +++ b/include/eepp/ui/doc/textdocument.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -191,6 +192,8 @@ class EE_API TextDocument { void redo(); + const SyntaxDefinition& getSyntaxDefinition() const; + protected: friend class UndoStack; UndoStack mUndoStack; @@ -202,6 +205,7 @@ class EE_API TextDocument { Uint32 mTabWidth{4}; IndentType mIndentType{IndentTabs}; Clock mTimer; + SyntaxDefinition mSyntaxDefinition; void notifyTextChanged(); diff --git a/include/eepp/ui/uicodeeditor.hpp b/include/eepp/ui/uicodeeditor.hpp index 945c3c1a4..bbf4c84ca 100644 --- a/include/eepp/ui/uicodeeditor.hpp +++ b/include/eepp/ui/uicodeeditor.hpp @@ -1,6 +1,8 @@ #ifndef EE_UI_UICODEEDIT_HPP #define EE_UI_UICODEEDIT_HPP +#include +#include #include #include #include @@ -107,6 +109,8 @@ class EE_API UICodeEditor : public UIWidget, public TextDocument::Client { Color mLineNumberBackgroundColor; Color mCurrentLineBackgroundColor; Color mCaretColor; + SyntaxStyle mStyle; + SyntaxHighlighter mHighlighter; void invalidateEditor(); diff --git a/premake4.lua b/premake4.lua index 3acb99ed2..6a0d9930d 100644 --- a/premake4.lua +++ b/premake4.lua @@ -536,7 +536,8 @@ function add_static_links() "zlib-static", "imageresampler-static", "pugixml-static", - "vorbis-static" + "vorbis-static", + "rx-cpp-static" } if _OPTIONS["with-mojoal"] then @@ -870,6 +871,14 @@ solution "eepp" includedirs { "src/thirdparty/freetype2/include" } build_base_configuration( "freetype" ) + project "rx-cpp-static" + kind "StaticLib" + language "C++" + set_targetdir("libs/" .. os.get_real() .. "/thirdparty/") + files { "src/thirdparty/rx-cpp/*.cpp", "src/thirdparty/rx-cpp/*.c" } + includedirs { "src/thirdparty/rx-cpp" } + build_base_cpp_configuration( "rx-cpp" ) + project "chipmunk-static" kind "StaticLib" diff --git a/premake5.lua b/premake5.lua index c14ffb262..152e72d33 100644 --- a/premake5.lua +++ b/premake5.lua @@ -309,7 +309,8 @@ function add_static_links() "zlib-static", "imageresampler-static", "pugixml-static", - "vorbis-static" + "vorbis-static", + "rx-cpp-static" } if _OPTIONS["with-mojoal"] then @@ -631,6 +632,14 @@ workspace "eepp" incdirs { "src/thirdparty/freetype2/include" } build_base_configuration( "freetype" ) + project "rx-cpp-static" + kind "StaticLib" + language "C++" + targetdir("libs/" .. os.target() .. "/thirdparty/") + files { "src/thirdparty/rx-cpp/*.cpp", "src/thirdparty/rx-cpp/*.c" } + incdirs { "src/thirdparty/rx-cpp" } + build_base_cpp_configuration( "rx-cpp" ) + project "chipmunk-static" kind "StaticLib" targetdir("libs/" .. os.target() .. "/thirdparty/") diff --git a/projects/android-project/app/jni/eepp.mk b/projects/android-project/app/jni/eepp.mk index 793e6d5e0..d8050c191 100644 --- a/projects/android-project/app/jni/eepp.mk +++ b/projects/android-project/app/jni/eepp.mk @@ -19,7 +19,8 @@ EEPP_C_INCLUDES := \ $(EEPP_THIRD_PARTY_PATH)/libvorbis/include \ $(EEPP_THIRD_PARTY_PATH)/libogg/include \ $(EEPP_THIRD_PARTY_PATH)/mbedtls/include \ - $(EEPP_THIRD_PARTY_PATH)/mojoAL + $(EEPP_THIRD_PARTY_PATH)/mojoAL \ + $(EEPP_THIRD_PARTY_PATH)/rx-cpp EEPP_C_FLAGS := \ -Wl,--undefined=Java_org_libsdl_app_SDLActivity_nativeInit \ @@ -61,6 +62,8 @@ CODE_SRCS := \ ../thirdparty/libvorbis/lib/*.c \ ../thirdparty/mbedtls/library/*.c \ ../thirdparty/mojoAL/*.c \ + ../thirdparty/rx-cpp/*.c \ + ../thirdparty/rx-cpp/*.cpp \ system/*.cpp \ system/platform/posix/*.cpp \ network/*.cpp \ diff --git a/projects/linux/ee.files b/projects/linux/ee.files index 5a8e2a21c..66f7d1adb 100644 --- a/projects/linux/ee.files +++ b/projects/linux/ee.files @@ -315,6 +315,11 @@ ../../include/eepp/ui/css/stylesheetstyle.hpp ../../include/eepp/ui/css/stylesheetvariable.hpp ../../include/eepp/ui/css/transitiondefinition.hpp +../../include/eepp/ui/doc/syntaxdefinition.hpp +../../include/eepp/ui/doc/syntaxdefinitionmanager.hpp +../../include/eepp/ui/doc/syntaxhighlighter.hpp +../../include/eepp/ui/doc/syntaxstyle.hpp +../../include/eepp/ui/doc/syntaxtokenizer.hpp ../../include/eepp/ui/doc/textdocument.hpp ../../include/eepp/ui/doc/textposition.hpp ../../include/eepp/ui/doc/textrange.hpp @@ -757,6 +762,11 @@ ../../src/eepp/ui/css/stylesheetstyle.cpp ../../src/eepp/ui/css/stylesheetvariable.cpp ../../src/eepp/ui/css/transitiondefinition.cpp +../../src/eepp/ui/doc/syntaxdefinition.cpp +../../src/eepp/ui/doc/syntaxdefinitionmanager.cpp +../../src/eepp/ui/doc/syntaxhighlighter.cpp +../../src/eepp/ui/doc/syntaxstyle.cpp +../../src/eepp/ui/doc/syntaxtokenizer.cpp ../../src/eepp/ui/doc/textdocument.cpp ../../src/eepp/ui/doc/undostack.cpp ../../src/eepp/ui/tools/textureatlaseditor.cpp diff --git a/projects/linux/ee.includes b/projects/linux/ee.includes index 0018824a1..20628943b 100644 --- a/projects/linux/ee.includes +++ b/projects/linux/ee.includes @@ -7,3 +7,5 @@ /usr/include/freetype2/ ../../src/thirdparty/mbedtls/include ../../docs/articles +../../include/eepp/ui/doc +../../src/eepp/ui/doc diff --git a/src/eepp/ui/doc/syntaxdefinition.cpp b/src/eepp/ui/doc/syntaxdefinition.cpp new file mode 100644 index 000000000..4f565677c --- /dev/null +++ b/src/eepp/ui/doc/syntaxdefinition.cpp @@ -0,0 +1,37 @@ +#include +#include + +namespace EE { namespace UI { namespace Doc { + +SyntaxDefinition::SyntaxDefinition() {} + +SyntaxDefinition::SyntaxDefinition( const std::vector& files, + const std::vector& patterns, + const std::unordered_map& symbols, + const std::string& comment ) : + mFiles( files ), mPatterns( patterns ), mSymbols( symbols ), mComment( comment ) {} + +const std::vector& SyntaxDefinition::getFiles() const { + return mFiles; +} + +const std::vector& SyntaxDefinition::getPatterns() const { + return mPatterns; +} + +const std::string& SyntaxDefinition::getComment() const { + return mComment; +} + +const std::unordered_map& SyntaxDefinition::getSymbols() const { + return mSymbols; +} + +std::string SyntaxDefinition::getSymbol( const std::string& symbol ) const { + auto it = mSymbols.find( symbol ); + if ( it != mSymbols.end() ) + return symbol; + return ""; +} + +}}} // namespace EE::UI::Doc diff --git a/src/eepp/ui/doc/syntaxdefinitionmanager.cpp b/src/eepp/ui/doc/syntaxdefinitionmanager.cpp new file mode 100644 index 000000000..d9cf30f15 --- /dev/null +++ b/src/eepp/ui/doc/syntaxdefinitionmanager.cpp @@ -0,0 +1,52 @@ +#include +#include +#include + +using namespace EE::System; + +namespace EE { namespace UI { namespace Doc { + +SINGLETON_DECLARE_IMPLEMENTATION( SyntaxDefinitionManager ) + +SyntaxDefinitionManager::SyntaxDefinitionManager() { + // Register some languages support. + // XML - HTML + add( SyntaxDefinition( {"xml", "html"}, {{{""}, "comment"}, + {{"%f[^>][^<]", "%f[<]"}, "normal"}, + {{"\"", "\"", "\\"}, "string"}, + {{"'", "'", "\\"}, "string"}, + {{"0x[%da-fA-F]+"}, "number"}, + {{"-?%d+[%d%.]*f?"}, "number"}, + {{"-?%.?%d+f?"}, "number"}, + {{"%f[^<]![%a_][%w_]*"}, "keyword2"}, + {{"%f[^<][%a_][%w_]*"}, "function"}, + {{"%f[^<]/[%a_][%w_]*"}, "function"}, + {{"[%a_][%w_]*"}, "keyword"}, + {{"[/<>=]"}, "operator"}} ) ); +} + +void SyntaxDefinitionManager::add( SyntaxDefinition&& syntaxStyle ) { + mStyles.emplace_back( std::move( syntaxStyle ) ); +} + +const SyntaxDefinition PLAIN_STYLE = SyntaxDefinition(); + +const SyntaxDefinition& SyntaxDefinitionManager::getPlainStyle() const { + return PLAIN_STYLE; +} + +const SyntaxDefinition& +SyntaxDefinitionManager::getStyleByExtension( const std::string& filePath ) const { + std::string extension( FileSystem::fileExtension( filePath ) ); + if ( !extension.empty() ) { + for ( auto& syntax : mStyles ) { + auto it = std::find( syntax.getFiles().begin(), syntax.getFiles().end(), extension ); + if ( it != syntax.getFiles().end() ) { + return syntax; + } + } + } + return PLAIN_STYLE; +} + +}}} // namespace EE::UI::Doc diff --git a/src/eepp/ui/doc/syntaxhighlighter.cpp b/src/eepp/ui/doc/syntaxhighlighter.cpp new file mode 100644 index 000000000..5acd05890 --- /dev/null +++ b/src/eepp/ui/doc/syntaxhighlighter.cpp @@ -0,0 +1,49 @@ +#include +#include + +namespace EE { namespace UI { namespace Doc { + +SyntaxHighlighter::SyntaxHighlighter( TextDocument* doc ) : mDoc( doc ) { + reset(); +} + +void SyntaxHighlighter::reset() { + mLines.clear(); + mFirstInvalidLine = 0; + mMaxWantedLine = 0; +} + +void SyntaxHighlighter::invalidate( const size_t& line ) { + mFirstInvalidLine = line; + mMaxWantedLine = eemin( mMaxWantedLine, mDoc->linesCount() ); +} + +TokenizedLine SyntaxHighlighter::tokenizeLine( const size_t& line, const int& state ) { + TokenizedLine tokenizedLine; + tokenizedLine.initState = state; + tokenizedLine.text = mDoc->line( line ); + auto res = SyntaxTokenizer::tokenize( mDoc->getSyntaxDefinition(), tokenizedLine.text, state ); + tokenizedLine.tokens = res.first; + tokenizedLine.state = res.second; + return tokenizedLine; +} + +const std::vector& SyntaxHighlighter::getLine( const size_t& index ) { + auto it = mLines.find( index ); + if ( it == mLines.end() || + ( index < mDoc->linesCount() && mDoc->line( index ) != it->second.text ) ) { + int prevState = SYNTAX_TOKENIZER_STATE_NONE; + if ( index > 0 ) { + auto prevIt = mLines.find( index - 1 ); + if ( prevIt != mLines.end() ) { + prevState = prevIt->second.state; + } + } + auto newLine = mLines.insert( std::make_pair( index, tokenizeLine( index, prevState ) ) ); + mMaxWantedLine = eemax( mMaxWantedLine, index ); + return newLine.first->second.tokens; + } + return it->second.tokens; +} + +}}} // namespace EE::UI::Doc diff --git a/src/eepp/ui/doc/syntaxstyle.cpp b/src/eepp/ui/doc/syntaxstyle.cpp new file mode 100644 index 000000000..3d8d8ee95 --- /dev/null +++ b/src/eepp/ui/doc/syntaxstyle.cpp @@ -0,0 +1,39 @@ +#include + +namespace EE { namespace UI { namespace Doc { + +SyntaxStyle SyntaxStyle::getDefault() { + SyntaxStyle style; + std::unordered_map colors; + colors["normal"] = Color( "#e1e1e6" ); + colors["symbol"] = Color( "#e1e1e6" ); + colors["comment"] = Color( "#676b6f" ); + colors["keyword"] = Color( "#E58AC9" ); + colors["keyword2"] = Color( "#F77483" ); + colors["number"] = Color( "#FFA94D" ); + colors["literal"] = Color( "#FFA94D" ); + colors["string"] = Color( "#f7c95c" ); + colors["operator"] = Color( "#93DDFA" ); + colors["function"] = Color( "#93DDFA" ); + style.setColors( colors ); + return style; +} + +SyntaxStyle::SyntaxStyle() {} + +const Color& SyntaxStyle::getColor( const std::string& type ) const { + auto it = mColors.find( type ); + if ( it != mColors.end() ) + return it->second; + return Color::White; +} + +void SyntaxStyle::setColors( const std::unordered_map& colors ) { + mColors.insert( colors.begin(), colors.end() ); +} + +void SyntaxStyle::setColor( const std::string& type, const Color& color ) { + mColors[type] = color; +} + +}}} // namespace EE::UI::Doc diff --git a/src/eepp/ui/doc/syntaxtokenizer.cpp b/src/eepp/ui/doc/syntaxtokenizer.cpp new file mode 100644 index 000000000..b7e6042cb --- /dev/null +++ b/src/eepp/ui/doc/syntaxtokenizer.cpp @@ -0,0 +1,116 @@ +#include +#include +#include + +using namespace textutil; + +namespace EE { namespace UI { namespace Doc { + +static bool allSpaces( const std::string& str ) { + for ( auto& chr : str ) + if ( ' ' != chr ) + return false; + return true; +} + +static void pushToken( std::vector& tokens, const std::string& type, + const std::string& text ) { + if ( !tokens.empty() && ( tokens[tokens.size() - 1].type == type || + allSpaces( tokens[tokens.size() - 1].text ) ) ) { + tokens[tokens.size() - 1].type = type; + tokens[tokens.size() - 1].text += text; + } else { + tokens.push_back( {type, text} ); + } +} + +bool isScaped( const std::string& text, const size_t& startIndex, const std::string& escapeStr ) { + char escapeByte = escapeStr.empty() ? '\\' : escapeStr[0]; + int count = 0; + for ( size_t i = startIndex - 1; i >= 0; i-- ) { + if ( text[i] != escapeByte ) + break; + count++; + } + return count % 2 == 1; +} + +std::pair findNonEscaped( const std::string& text, const std::string& pattern, int offset, + const std::string& escapeStr ) { + while ( true ) { + Rxp words( pattern, Rx::lua ); + int start, end; + if ( words.find( text.c_str() + offset, start, end ) ) { + start += offset; + end += offset; + if ( !escapeStr.empty() && isScaped( text, start, escapeStr ) ) { + offset = end; + } else { + return std::make_pair( start, end ); + } + } else { + return std::make_pair( -1, -1 ); + } + } +} + +std::pair, int> SyntaxTokenizer::tokenize( const SyntaxDefinition& syntax, + const std::string& text, + const int& state ) { + std::vector tokens; + if ( syntax.getPatterns().empty() ) { + pushToken( tokens, "normal", text ); + return std::make_pair( tokens, SYNTAX_TOKENIZER_STATE_NONE ); + } + + size_t i = 0; + int retState = state; + + while ( i < text.size() ) { + if ( retState != SYNTAX_TOKENIZER_STATE_NONE ) { + const SyntaxPattern& pattern = syntax.getPatterns()[retState]; + std::pair range = + findNonEscaped( text, pattern.patterns[1], i, pattern.patterns[2] ); + if ( range.first != -1 ) { + pushToken( tokens, pattern.type, text.substr( i, range.second - i ) ); + retState = SYNTAX_TOKENIZER_STATE_NONE; + i = range.second; + } else { + pushToken( tokens, pattern.type, text.substr( i ) ); + break; + } + } + + bool matched = false; + + for ( size_t patternIndex = 0; patternIndex < syntax.getPatterns().size(); + patternIndex++ ) { + const SyntaxPattern& pattern = syntax.getPatterns()[patternIndex]; + const std::string& patternStr( "^" + pattern.patterns.at( 0 ) ); + Rxp words( patternStr, Rx::lua ); + int start, end = 0; + if ( words.find( text.c_str() + i, start, end ) ) { + start += i; + end += i; + std::string patternText( text.substr( start, end - start ) ); + std::string type = syntax.getSymbol( patternText ); + pushToken( tokens, type.empty() ? pattern.type : type, patternText ); + if ( pattern.patterns.size() > 1 ) { + retState = patternIndex; + } + i = end; + matched = true; + break; + } + } + + if ( !matched ) { + pushToken( tokens, "normal", text.substr( i, 1 ) ); + i += 1; + } + } + + return std::make_pair( tokens, retState ); +} + +}}} // namespace EE::UI::Doc diff --git a/src/eepp/ui/doc/textdocument.cpp b/src/eepp/ui/doc/textdocument.cpp index e6e72b49e..dcac56ef1 100644 --- a/src/eepp/ui/doc/textdocument.cpp +++ b/src/eepp/ui/doc/textdocument.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -25,6 +26,7 @@ void TextDocument::reset() { mSelection.set( {0, 0}, {0, 0} ); mLines.clear(); mLines.emplace_back( String( "\n" ) ); + mSyntaxDefinition = SyntaxDefinitionManager::instance()->getPlainStyle(); notifyTextChanged(); notifyCursorChanged(); notifySelectionChanged(); @@ -34,6 +36,7 @@ void TextDocument::loadFromPath( const std::string& path ) { reset(); mLines.clear(); mFilename = path; + mSyntaxDefinition = SyntaxDefinitionManager::instance()->getStyleByExtension( path ); std::string line; std::ifstream file( path ); while ( std::getline( file, line ) ) { @@ -188,9 +191,6 @@ TextPosition TextDocument::insert( TextPosition position, const String::StringBa line( position.line() ) .substr( position.column(), line( position.line() ).length() - position.column() ); line( position.line() ) = line( position.line() ).substr( 0, position.column() ); - if ( newLine.empty() ) { - eePRINTL( "wtf" ); - } mLines.insert( mLines.begin() + position.line() + 1, std::move( newLine ) ); return {position.line() + 1, 0}; } @@ -702,13 +702,11 @@ void TextDocument::redo() { mUndoStack.redo(); } -void TextDocument::notifyTextChanged() { - for ( size_t i = 0; i < mLines.size(); i++ ) { - if ( mLines[i].empty() ) { - eePRINTL( "wtf" ); - } - } +const SyntaxDefinition& TextDocument::getSyntaxDefinition() const { + return mSyntaxDefinition; +} +void TextDocument::notifyTextChanged() { for ( auto& client : mClients ) { client->onDocumentTextChanged(); } diff --git a/src/eepp/ui/uicodeeditor.cpp b/src/eepp/ui/uicodeeditor.cpp index fd7306ddf..5e644108b 100644 --- a/src/eepp/ui/uicodeeditor.cpp +++ b/src/eepp/ui/uicodeeditor.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -30,7 +31,9 @@ UICodeEditor::UICodeEditor() : mLineNumberFontColor( "#525259" ), mLineNumberBackgroundColor( Color::fromString( "#2e2e32" ) ), mCurrentLineBackgroundColor( "#343438" ), - mCaretColor( "#93DDFA" ) { + mCaretColor( "#93DDFA" ), + mStyle( SyntaxStyle::getDefault() ), + mHighlighter( &mDoc ) { setBackgroundColor( Color::fromString( "#2e2e32" ) ); setFontColor( Color::fromString( "#e1e1e6" ) ); mFontStyleConfig.setFontSelectionBackColor( Color::fromString( "#48484f" ) ); @@ -125,6 +128,16 @@ void UICodeEditor::draw() { Text line( mDoc.line( i ), mFont, charSize ); line.setStyleConfig( mFontStyleConfig ); line.draw( startScroll.x, startScroll.y + lineHeight * i ); + /*Vector2f curPos( startScroll.x, startScroll.y + lineHeight * i ); + auto& tokens = mHighlighter.getLine( i ); + Text line( "", mFont, charSize ); + line.setStyleConfig( mFontStyleConfig ); + for ( auto& token : tokens ) { + line.setString( token.text ); + line.setColor( mStyle.getColor( token.type ) ); + line.draw( curPos.x, curPos.y ); + curPos.x += line.getTextWidth(); + }*/ } if ( mCursorVisible ) { @@ -165,7 +178,10 @@ void UICodeEditor::scheduledUpdate( const Time& ) { } } -void UICodeEditor::reset() {} +void UICodeEditor::reset() { + mDoc.reset(); + invalidateDraw(); +} void UICodeEditor::loadFromFile( const std::string& path ) { mDoc.loadFromPath( path ); diff --git a/src/eepp/window/engine.cpp b/src/eepp/window/engine.cpp index 1c0748bb8..56c4deda6 100644 --- a/src/eepp/window/engine.cpp +++ b/src/eepp/window/engine.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -74,6 +75,8 @@ Engine::~Engine() { CSS::StyleSheetSpecification::destroySingleton(); + Doc::SyntaxDefinitionManager::destroySingleton(); + FontManager::destroySingleton(); TextureFactory::destroySingleton(); diff --git a/src/examples/ui_hello_world/ui_hello_world.cpp b/src/examples/ui_hello_world/ui_hello_world.cpp index 24581f7d1..391f5d521 100644 --- a/src/examples/ui_hello_world/ui_hello_world.cpp +++ b/src/examples/ui_hello_world/ui_hello_world.cpp @@ -1,4 +1,6 @@ #include +#include +#include #include using namespace EE::UI::Doc; @@ -54,7 +56,7 @@ EE_MAIN_FUNC int main( int, char** ) { SceneManager::instance()->add( uiSceneNode ); // Create a very simple Hello World with a TextView and a PushButton. - uiSceneNode->loadLayoutFromString( R"xml( + std::string layout = R"xml( @@ -72,7 +74,17 @@ EE_MAIN_FUNC int main( int, char** ) { layout_weight="1" /> - )xml" ); + )xml"; + uiSceneNode->loadLayoutFromString( layout ); + + const auto& def = SyntaxDefinitionManager::instance()->getStyleByExtension( "xml" ); + Clock clock; + std::pair, int> ret = + SyntaxTokenizer::tokenize( def, layout, SYNTAX_TOKENIZER_STATE_NONE ); + + if ( ret.first.size() ) { + eePRINTL( "tokenized in: %.2fms", clock.getElapsedTime().asMilliseconds() ); + } // Set the style to our "Hello World" widgets. uiSceneNode->setStyleSheet( R"css( @@ -93,7 +105,7 @@ EE_MAIN_FUNC int main( int, char** ) { )css" ); UICodeEditor* codeEditor = uiSceneNode->find( "code_edit" ); - codeEditor->setFontSize( 11 )->loadFromFile( "../src/eepp/ui/doc/textdocument.cpp" ); + codeEditor->setFontSize( 11 )->loadFromFile( "assets/layouts/test_widgets.xml" ); win->runMainLoop( &mainLoop ); } diff --git a/src/thirdparty/rx-cpp/COPYING b/src/thirdparty/rx-cpp/COPYING new file mode 100644 index 000000000..f36b930d9 --- /dev/null +++ b/src/thirdparty/rx-cpp/COPYING @@ -0,0 +1,19 @@ +textutil-cpp: Copyright © 2015 Steve Donovan. +Lua: Copyright © 1994-2015 Lua.org, PUC-Rio. + +Permission is hereby granted, free of charge, to any person obtaining a copy of this +software and associated documentation files (the "Software"), to deal in the Software +without restriction, including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or +substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/src/thirdparty/rx-cpp/lua-str.c b/src/thirdparty/rx-cpp/lua-str.c new file mode 100644 index 000000000..faa146c51 --- /dev/null +++ b/src/thirdparty/rx-cpp/lua-str.c @@ -0,0 +1,411 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "lua-str.h" + +static FailFun s_fail_fun; + +void str_fail_func(FailFun f) { + s_fail_fun = f; +} + +/* macro to `unsign' a character */ +#define uchar(c) ((unsigned char)(c)) + + +/* +** {====================================================== +** PATTERN MATCHING +** ======================================================= +*/ + +#define LUA_MAXCAPTURES 32 + +/* maximum recursion depth for 'match' */ +#define MAXCCALLS 200 + +#define CAP_UNFINISHED (-1) +#define CAP_POSITION (-2) + +typedef struct MatchState { + int matchdepth; /* control for recursive depth (to avoid C stack overflow) */ + const char *src_init; /* init of source string */ + const char *src_end; /* end ('\0') of source string */ + const char *p_end; /* end ('\0') of pattern */ + int level; /* total number of captures (finished or unfinished) */ + struct { + const char *init; + ptrdiff_t len; + } capture[LUA_MAXCAPTURES]; +} MatchState; + +/* recursive function */ +static const char *match (MatchState *ms, const char *s, const char *p); + +#define L_ESC '%' +#define SPECIALS "^$*+?.([%-" + +// error handling, hm?? NB + +static int throw_error(const char *fmt,...) { + char buff[1024]; + va_list ap; + va_start(ap,fmt); + vsnprintf(buff,sizeof(buff),fmt,ap); + va_end(ap); + if (! s_fail_fun) { + fprintf(stderr,"%s\n",buff); + exit(1); + } else { + s_fail_fun(buff); + } + return 0; +} + +static int check_capture (MatchState *ms, int l) { + l -= '1'; + if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) + return throw_error("invalid capture index %%%d", l + 1); + return l; +} + +static int capture_to_close (MatchState *ms) { + int level = ms->level; + for (level--; level>=0; level--) + if (ms->capture[level].len == CAP_UNFINISHED) return level; + return throw_error("invalid pattern capture"); +} + + +static const char *classend (MatchState *ms, const char *p) { + switch (*p++) { + case L_ESC: { + if (p == ms->p_end) + throw_error("malformed pattern (ends with '%')"); + return p+1; + } + case '[': { + if (*p == '^') p++; + do { /* look for a `]' */ + if (p == ms->p_end) + throw_error("malformed pattern (missing ']')"); + if (*(p++) == L_ESC && p < ms->p_end) + p++; /* skip escapes (e.g. `%]') */ + } while (*p != ']'); + return p+1; + } + default: { + return p; + } + } +} + + +static int match_class (int c, int cl) { + int res; + switch (tolower(cl)) { + case 'a' : res = isalpha(c); break; + case 'c' : res = iscntrl(c); break; + case 'd' : res = isdigit(c); break; + case 'g' : res = isgraph(c); break; + case 'l' : res = islower(c); break; + case 'p' : res = ispunct(c); break; + case 's' : res = isspace(c); break; + case 'u' : res = isupper(c); break; + case 'w' : res = isalnum(c); break; + case 'x' : res = isxdigit(c); break; + case 'z' : res = (c == 0); break; /* deprecated option */ + default: return (cl == c); + } + return (islower(cl) ? res : !res); +} + + +static int matchbracketclass (int c, const char *p, const char *ec) { + int sig = 1; + if (*(p+1) == '^') { + sig = 0; + p++; /* skip the `^' */ + } + while (++p < ec) { + if (*p == L_ESC) { + p++; + if (match_class(c, uchar(*p))) + return sig; + } + else if ((*(p+1) == '-') && (p+2 < ec)) { + p+=2; + if (uchar(*(p-2)) <= c && c <= uchar(*p)) + return sig; + } + else if (uchar(*p) == c) return sig; + } + return !sig; +} + + +static int singlematch (MatchState *ms, const char *s, const char *p, + const char *ep) { + if (s >= ms->src_end) + return 0; + else { + int c = uchar(*s); + switch (*p) { + case '.': return 1; /* matches any char */ + case L_ESC: return match_class(c, uchar(*(p+1))); + case '[': return matchbracketclass(c, p, ep-1); + default: return (uchar(*p) == c); + } + } +} + + +static const char *matchbalance (MatchState *ms, const char *s, + const char *p) { + if (p >= ms->p_end - 1) + throw_error("malformed pattern " + "(missing arguments to '%b')"); + if (*s != *p) return NULL; + else { + int b = *p; + int e = *(p+1); + int cont = 1; + while (++s < ms->src_end) { + if (*s == e) { + if (--cont == 0) return s+1; + } + else if (*s == b) cont++; + } + } + return NULL; /* string ends out of balance */ +} + + +static const char *max_expand (MatchState *ms, const char *s, + const char *p, const char *ep) { + ptrdiff_t i = 0; /* counts maximum expand for item */ + while (singlematch(ms, s + i, p, ep)) + i++; + /* keeps trying to match with the maximum repetitions */ + while (i>=0) { + const char *res = match(ms, (s+i), ep+1); + if (res) return res; + i--; /* else didn't match; reduce 1 repetition to try again */ + } + return NULL; +} + + +static const char *min_expand (MatchState *ms, const char *s, + const char *p, const char *ep) { + for (;;) { + const char *res = match(ms, s, ep+1); + if (res != NULL) + return res; + else if (singlematch(ms, s, p, ep)) + s++; /* try with one more repetition */ + else return NULL; + } +} + + +static const char *start_capture (MatchState *ms, const char *s, + const char *p, int what) { + const char *res; + int level = ms->level; + if (level >= LUA_MAXCAPTURES) throw_error("too many captures"); + ms->capture[level].init = s; + ms->capture[level].len = what; + ms->level = level+1; + if ((res=match(ms, s, p)) == NULL) /* match failed? */ + ms->level--; /* undo capture */ + return res; +} + + +static const char *end_capture (MatchState *ms, const char *s, + const char *p) { + int l = capture_to_close(ms); + const char *res; + ms->capture[l].len = s - ms->capture[l].init; /* close capture */ + if ((res = match(ms, s, p)) == NULL) /* match failed? */ + ms->capture[l].len = CAP_UNFINISHED; /* undo capture */ + return res; +} + + +static const char *match_capture (MatchState *ms, const char *s, int l) { + size_t len; + l = check_capture(ms, l); + len = ms->capture[l].len; + if ((size_t)(ms->src_end-s) >= len && + memcmp(ms->capture[l].init, s, len) == 0) + return s+len; + else return NULL; +} + +static const char *match (MatchState *ms, const char *s, const char *p) { + if (ms->matchdepth-- == 0) + throw_error("pattern too complex"); + init: /* using goto's to optimize tail recursion */ + if (p != ms->p_end) { /* end of pattern? */ + switch (*p) { + case '(': { /* start capture */ + if (*(p + 1) == ')') /* position capture? */ + s = start_capture(ms, s, p + 2, CAP_POSITION); + else + s = start_capture(ms, s, p + 1, CAP_UNFINISHED); + break; + } + case ')': { /* end capture */ + s = end_capture(ms, s, p + 1); + break; + } + case '$': { + if ((p + 1) != ms->p_end) /* is the `$' the last char in pattern? */ + goto dflt; /* no; go to default */ + s = (s == ms->src_end) ? s : NULL; /* check end of string */ + break; + } + case L_ESC: { /* escaped sequences not in the format class[*+?-]? */ + switch (*(p + 1)) { + case 'b': { /* balanced string? */ + s = matchbalance(ms, s, p + 2); + if (s != NULL) { + p += 4; goto init; /* return match(ms, s, p + 4); */ + } /* else fail (s == NULL) */ + break; + } + case 'f': { /* frontier? */ + const char *ep; char previous; + p += 2; + if (*p != '[') + throw_error("missing '[' after '%f' in pattern"); + ep = classend(ms, p); /* points to what is next */ + previous = (s == ms->src_init) ? '\0' : *(s - 1); + if (!matchbracketclass(uchar(previous), p, ep - 1) && + matchbracketclass(uchar(*s), p, ep - 1)) { + p = ep; goto init; /* return match(ms, s, ep); */ + } + s = NULL; /* match failed */ + break; + } + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + case '8': case '9': { /* capture results (%0-%9)? */ + s = match_capture(ms, s, uchar(*(p + 1))); + if (s != NULL) { + p += 2; goto init; /* return match(ms, s, p + 2) */ + } + break; + } + default: goto dflt; + } + break; + } + default: dflt: { /* pattern class plus optional suffix */ + const char *ep = classend(ms, p); /* points to optional suffix */ + /* does not match at least once? */ + if (!singlematch(ms, s, p, ep)) { + if (*ep == '*' || *ep == '?' || *ep == '-') { /* accept empty? */ + p = ep + 1; goto init; /* return match(ms, s, ep + 1); */ + } + else /* '+' or no suffix */ + s = NULL; /* fail */ + } + else { /* matched once */ + switch (*ep) { /* handle optional suffix */ + case '?': { /* optional */ + const char *res; + if ((res = match(ms, s + 1, ep + 1)) != NULL) + s = res; + else { + p = ep + 1; goto init; /* else return match(ms, s, ep + 1); */ + } + break; + } + case '+': /* 1 or more repetitions */ + s++; /* 1 match already done */ + /* go through */ + case '*': /* 0 or more repetitions */ + s = max_expand(ms, s, p, ep); + break; + case '-': /* 0 or more repetitions (minimum) */ + s = min_expand(ms, s, p, ep); + break; + default: /* no suffix */ + s++; p = ep; goto init; /* return match(ms, s + 1, ep); */ + } + } + break; + } + } + } + ms->matchdepth++; + return s; +} + + + +static void push_onecapture (MatchState *ms, int i, const char *s, + const char *e, LuaMatch *mm) { + if (i >= ms->level) { + if (i == 0) { /* ms->level == 0, too */ + mm->start = 0; + mm->end = e - s ; + //lua_pushlstring(ms->L, s, e - s); /* add whole match */ + } else + throw_error("invalid capture index"); + } + else { + ptrdiff_t l = ms->capture[i].len; + if (l == CAP_UNFINISHED) throw_error("unfinished capture"); + if (l == CAP_POSITION) { + mm[i].start = ms->capture[i].init - ms->src_init + 1; + mm[i].end = mm[i].start; + } else { + mm[i].start = ms->capture[i].init - ms->src_init; + mm[i].end = mm[i].start + l; + } + } +} + + +static int push_captures (MatchState *ms, const char *s, const char *e, LuaMatch *mm) { + int i; + int nlevels = (ms->level == 0 && s) ? 1 : ms->level; + for (i = 0; i < nlevels; i++) + push_onecapture(ms, i, s, e, mm); + return nlevels; /* number of strings pushed */ +} + + +int str_match (const char *s, size_t ls, const char *p, LuaMatch *mm) { + size_t lp=strlen(p); + const char *s1 = s; + MatchState ms; + int anchor = (*p == '^'); + if (anchor) { + p++; lp--; /* skip anchor character */ + } + ms.matchdepth = MAXCCALLS; + ms.src_init = s; + ms.src_end = s + ls; + ms.p_end = p + lp; + do { + const char *res; + ms.level = 0; + if ((res=match(&ms, s1, p)) != NULL) { + mm[0].start = s1 - s; /* start */ + mm[0].end = res - s; /* end */ + return push_captures(&ms, NULL, 0, mm+1) + 1; + } + } while (s1++ < ms.src_end && !anchor); + return 0; +} + diff --git a/src/thirdparty/rx-cpp/lua-str.h b/src/thirdparty/rx-cpp/lua-str.h new file mode 100644 index 000000000..d60cc6f9b --- /dev/null +++ b/src/thirdparty/rx-cpp/lua-str.h @@ -0,0 +1,22 @@ +#ifndef _LUA_STR_H +#define _LUA_STR_H + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void (*FailFun)(const char *msg); +void str_fail_func(FailFun f); + +typedef struct LuaMatch { + int start; + int end; +} LuaMatch; + +int str_match (const char *text, size_t len, const char *pattern, LuaMatch *mm); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/thirdparty/rx-cpp/rx.cpp b/src/thirdparty/rx-cpp/rx.cpp new file mode 100644 index 000000000..72edf9037 --- /dev/null +++ b/src/thirdparty/rx-cpp/rx.cpp @@ -0,0 +1,356 @@ +// wrapping POSIX regexes +// Steve Donovan, (c) 2015 +// MIT license + +#include + +#include "rx.h" +using namespace std; + +const int MAX_DEFAULT_MATCHES = 12; + +#ifndef NO_POSIX + +static string percent_subst(string pattern) { + string res; + const char *p = pattern.c_str(); + bool inside_bracket = false; + while (*p) { + char ch = *p; + if (ch == '%') { + string klass; + ++p; // eat special char + switch(*p) { + case 's': klass = "space"; break; + case 'd': klass = "digit"; break; + case 'x': klass = "xdigit"; break; + case 'a': klass = "alpha"; break; + case 'w': klass = "alnum"; break; + case 'u': klass = "upper"; break; + case 'l': klass = "lower"; break; + case 'c': klass = "cntrl"; break; + case 'p': klass = "punct"; break; + case '%': + ch = '%'; + break; + default: + ch = '\\'; + // special case: not a bracket! + if (*p == '[' || *p == ']') { + res += '\\'; + ch = *p; + } else { + --p; // wasn't special after all... + } + } + if (klass.size() > 0) { + klass = "[:" + klass + ":]"; + if (! inside_bracket) + klass = '[' + klass + ']'; + res += klass; + } else { + res += ch; + } + } else { + if (ch == '[') { + inside_bracket = true; + } else + if (ch == ']') { + inside_bracket = false; + } + res += ch; + } + p++; + } + return res; +} +#endif + +namespace textutil { + +Rx::Rx (regex_t *R) : rx(R) { +} + +Rx::Rx (Rx&& other) { + rx = other.rx; + other.rx = NULL; +} + +// move assignment as well... +Rx& Rx::operator= (Rx&& other) { + rx = other.rx; + other.rx = NULL; + return *this; +} + +// -1 is a special match index: it means, try to pick the first submatch, otherwise +// fall back to the full match. This is the default behaviour for substitutions. +bool Rx::range(int idx, int& i1, int& i2, regmatch_t *r_matches) { + if (idx == -1) { + idx = n_matches() > 1 ? 1 : 0; + } + if (idx >= 0 && idx < n_matches()) { + i1 = r_matches[idx].rm_so; + i2 = r_matches[idx].rm_eo; + return true; + } + return false; +} + +// like Lua's string.find; also returns the range matched +bool Rx::find (const char *str, int& i1, int& i2, int idx) { + regmatch_t match_buff[MAX_DEFAULT_MATCHES]; + if (matches(str,match_buff)){ + range(idx,i1,i2,match_buff); + return true; + } else { + i1 = -1; + i2 = -1; + return false; + } +} + +Rx::match Rx::gmatch(const char *s) &{ + return Rx::match(*this,s,false); +} + +Rx::match Rx::gmatch(const std::string& s) & { + return Rx::match(*this,s.c_str(),false); +} + +// some voodoo needed here; this is how we tell whether we were +// created from a _temporary_; in which case, zero out the regex_t since +// otherwise it will die with the temporary. The match will make special +// arrangements in this case! +Rx::match Rx::gmatch(const char *s) &&{ + Rx::match M(*this,s,true); + rx = NULL; + return M; +} + +Rx::match Rx::gmatch(const std::string& s) && { + Rx::match M(*this,s.c_str(),true); + rx = NULL; + return M; +} + +string Rx::gsub(const char *text, const char *repl) { + Rx::match ms (*this,text); + string res; + while (ms.subst(res)) { + for (const char *P = repl; *P; ++P) { + if (*P == '%') { + ++P; + int ngroup = (int)*P - (int)'0'; + res += ms.group(ngroup); + } else { + res += *P; + } + } + ms.next(); + } + return res; +} + +// a match state looks after the regexp object (a thin wrapper around a regex_t pointer) +// and keeps a buffer for storing the resulting matches. +// If constructed from a temporary Rx, we create our own Rx using its regex_t pointer. +Rx::match_state::match_state(Rx* pr, bool own_rx) : ref_count(1), own_rx(own_rx) { + r_matches = new regmatch_t[10]; // pr->n_matches() + if (own_rx) { + this->pr = new Rx(pr->regexp()); + } else { + this->pr = pr; + } +} + +bool Rx::match_state::range(int idx, int &i1, int &i2) { + return pr->range(idx,i1,i2,r_matches); +} + +bool Rx::match_state::matches(const char *s, size_t len) { + return pr->matches(s,r_matches,len); +} + +Rx::match_state::~match_state() { + delete[] r_matches; + if (own_rx) + delete pr; +} + +// A match object keeps a char pointer and a ref-counted match state object +Rx::match::match(Rx& r, const char *s, bool own_rx) : s(s) { + len = strlen(s); + state = new Rx::match_state(&r,own_rx); +} + +Rx::match::match(Rx& r, const string& s, bool own_rx) { + state = new Rx::match_state(&r,own_rx); + this->s = s.c_str(); + len = s.size(); +} + +// the match state only dies when there's no state left holding it +Rx::match::~match() { + --state->ref_count; + if (state->ref_count == 0) { + delete state; + } +} + +// so each copied match shares state by incrementing the ref count +Rx::match::match(const Rx::match& other) : state(other.state), s(other.s), len(other.len) { + ++state->ref_count; +} + +Rx::match& Rx::match::operator= (const match& other) { + ++state->ref_count; + return *this; +} + +// the match operations are expressed in terms of the basic match_state operations +// matches() & range() + +// this moves the char pointer just past the end of the current match +void Rx::match::next() { + int m1,m2; + state->range(0,m1,m2); + s += m2; + len -= m2; +} + +string Rx::match::group(int idx) const { + int m1,m2; + if (state->range(idx,m1,m2)) { + return string(s+m1,m2-m1); + } + return ""; +} + +bool Rx::match::subst(string& res) { + if (! matches()) { // copy remaining tail + res.append(s); + return false; + } + int m1,m2; + state->range(0,m1,m2); + if (m1 == 0) + return true; + res.append(s,m1); + return true; +} + +#ifndef NO_POSIX + +// Rxp just wraps a regex_t struct +Rxp::Rxp (string pat, int cflags) : Rx() { + rx = new regex_t; + int flags = REG_EXTENDED; + if (cflags & icase) + flags |= REG_ICASE; + if (cflags & newline) + flags |= REG_NEWLINE; + if (cflags & lua) + pat = percent_subst(pat); + rc = regcomp(rx, pat.c_str(), flags); +} + +Rxp::~Rxp() { + if (rx) { + regfree(rx); + delete rx; + } +} + +// if the regexp compilation fails, then use this; +// if (! R) { do_something_with(R.error()); } +string Rxp::error() { + char buff[512]; + regerror(rc,rx,buff,sizeof(buff)); + return buff; +} + +// basic match operation! +bool Rxp::matches (const char *ps, regmatch_t *r_matches, size_t len) { + regmatch_t match_buff[MAX_DEFAULT_MATCHES]; + if (r_matches == NULL) + r_matches = match_buff; + int res = regexec(rx,ps, n_matches(), r_matches,0); + return res != 0 ? false : true; +} + +int Rxp::n_matches() { + return rx->re_nsub+1; +} + +#endif + +#ifndef NO_LUA + +static bool s_handler_initialized; + +static void fail_handler (const char *msg) { + throw string(msg); +} + +static const char *copy_str(const string& s) { + char *out = new char[s.size()+1]; + strcpy(out,s.c_str()); + return out; +} + +// Rxp just wraps a pattern string +Rxl::Rxl (string pat) : Rx(), err(nullptr), pat(pat) { + //rx = (regex_t*)copy_str(pat); // PASOP + rx = (regex_t*)pat.c_str(); + if (! s_handler_initialized) { + s_handler_initialized = true; + str_fail_func(fail_handler); + } + n_match = 10; +} + +Rxl::~Rxl() { + //delete[] rx; + if (err) + delete[] err; +} + +// if the regexp compilation fails, then use this; +// if (! R) { do_something_with(R.error()); } +string Rxl::error() { + return err; +} + +// basic match operation! +bool Rxl::matches (const char *ps, regmatch_t *r_matches, size_t len) { + regmatch_t match_buff[MAX_DEFAULT_MATCHES]; + if (r_matches == NULL) + r_matches = match_buff; + if (len == 0) + len = strlen(ps); + // if there's an error in the pattern, it will throw and set error state + // LuaMatch and regmatch_t are just the same under the hood + try { + n_match = str_match(ps,len,(const char *)rx, (LuaMatch*) r_matches); + } catch (const string& pattern_error) { + err = copy_str(pattern_error); + n_match = 0; + } + return n_match == 0 ? false : true; +} + +int Rxl::n_matches() { + return n_match; +} + +#endif + + +} + +// overload to_string so that we can use it in gsub with any convertable type +namespace std { + string to_string(const string& s) { return s; } +} + diff --git a/src/thirdparty/rx-cpp/rx.h b/src/thirdparty/rx-cpp/rx.h new file mode 100644 index 000000000..4bec8bf6f --- /dev/null +++ b/src/thirdparty/rx-cpp/rx.h @@ -0,0 +1,235 @@ +#ifndef __RX_H +#define __RX_H + +#include +#ifndef NO_POSIX +#include +#else +typedef struct regmatch_t { + int rm_so; + int rm_eo; +} regmatch_t; +typedef const char regex_t; +#endif +#ifndef NO_LUA +#include "lua-str.h" +#endif +#include + +namespace std { + string to_string(const string& s); +} + +namespace textutil { + +typedef const std::string& S; +template inline T from_string(S s) { return s; } +template<> inline int from_string(S s) { return std::stoi(s); } +template<> inline long from_string(S s) { return std::stoi(s); } +template<> inline unsigned long from_string(S s) { return std::stoul(s); } +template<> inline double from_string(S s) { return std::stod(s); } + +class Rx { +protected: + regex_t *rx; + +public: + + enum { + icase = 1, lua = 2, newline = 4 + }; + + struct match_state { + Rx* pr; + regmatch_t *r_matches; + size_t ref_count; + bool own_rx; + + match_state(Rx* pr, bool own_rx); + ~match_state(); + + bool range(int idx, int &i1, int &i2); + bool matches(const char *s, size_t len); + }; + + struct match { + match_state *state; + const char *s; + size_t len; + + match(Rx& r, const char *s, bool own_rx=false); + match(Rx& r, const std::string& s, bool own_rx=false); + match(const Rx::match& other); + match& operator= (const match& other); + ~match(); + + bool matches() { return state->matches(s,len); } + bool subst(std::string& res); + void next(); + std::string group(int idx = -1) const; + bool range(int idx, int &i1,int &i2) const { return state->range(idx,i1,i2); } + std::string operator[] (int idx) const { return group(idx); } + + template + void append_to(C& c) { + typedef typename C::value_type value_type; + while(matches()) { + c.push_back(from_string(group())); + next(); + } + } + + template + void fill_map(M& m) { + while(matches()) { + m[group(1)] = from_string(group(2)); + next(); + } + } + + struct iterator { + match *pm; + + iterator(match *pm) : pm(pm) { + if (pm != NULL) { + if (! pm->matches()) + pm = NULL; + } + } + + bool operator != (const iterator& other) { + return pm != other.pm; + } + + bool operator == (const iterator& other) { + return pm == other.pm; + } + + const match& operator* () const { return *pm; } + + iterator& operator ++() { + pm->next(); + if (! pm->matches()) { + pm = NULL; + } + return *this; + } + }; + + + iterator begin() { return iterator(this);} + iterator end() { return iterator(NULL); } + }; + + + Rx() : rx(nullptr) { } + Rx (Rx&& other); + Rx (regex_t *R); + Rx& operator= (Rx&& other); + + regex_t *regexp() { return rx; } + + // the above ctor may fail to compile the regex. + // The suggested idiom is 'if (! R) do_something_with(R.error());' + virtual bool operator! () { return true; } + + virtual ~Rx() {} + virtual std::string error() { return ""; } + virtual int n_matches() { return 0; } + virtual bool matches (const char *ps, regmatch_t *r_matches = NULL, size_t len = 0) { return false; } + + virtual bool matches (const std::string& s, regmatch_t *r_matches = NULL) { + return matches(s.c_str(),r_matches, s.size()); + } + + // like Lua's std::string.find; also returns the range matched + bool find (const char *str, int& i1, int& i2, int idx = 0); + bool find (const std::string& s, int& i1, int& i2, int idx = 0) { + return find(s.c_str(),i1,i2,idx); + } + + bool range(int idx, int& i1, int& i2, regmatch_t *r_matches); + + std::string gsub(const char *text, const char *repl); + std::string gsub(const std::string& text, const std::string& repl) { + return gsub(text.c_str(),repl.c_str()); + } + + match gmatch(const char *s) &; + match gmatch(const char *s) &&; + match gmatch(const std::string& s) &; + match gmatch(const std::string& s) &&; + + template + std::string gsub(const char *text, M& map_object) { + Rx::match ms (*this,text); + std::string res; + while (ms.subst(res)) { + res.append(std::to_string(map_object[ms.group()])); + ms.next(); + } + return res; + } + + template + std::string gsub_fun(const char *text, F fun_object) { + Rx::match ms (*this,text); + std::string res; + while (ms.subst(res)) { + res.append(std::to_string(fun_object(ms))); + ms.next(); + } + return res; + } + +}; + +#ifndef NO_POSIX +class Rxp: public Rx { + int rc; +public: + Rxp (std::string pat, int cflags = 0); + Rxp (Rxp&& other) { + rx = other.rx; + other.rx = NULL; + } + + virtual bool operator! () { return rc != 0; } + virtual ~Rxp(); + virtual std::string error(); + virtual int n_matches(); + virtual bool matches (const char *ps, regmatch_t *r_matches = NULL,size_t len=0); +}; + +inline Rx operator"" _R (const char *pat, size_t) { return Rxp(pat); } + +#endif + +#ifndef NO_LUA +class Rxl: public Rx { + const char *err; + std::string pat; + int n_match; + +public: + Rxl (std::string pat); + Rxl (Rxl&& other) { + rx = other.rx; + other.rx = NULL; + } + + virtual bool operator! () { return err != nullptr; } + virtual ~Rxl(); + virtual std::string error(); + virtual int n_matches(); + virtual bool matches (const char *ps, regmatch_t *r_matches = NULL,size_t len=0); +}; + +inline Rx operator"" _L (const char *pat, size_t) { return Rxl(pat); } + +#endif + + + +} +#endif