diff --git a/include/eepp/ui/doc/syntaxdefinition.hpp b/include/eepp/ui/doc/syntaxdefinition.hpp index 5349603ad..f0c99603b 100644 --- a/include/eepp/ui/doc/syntaxdefinition.hpp +++ b/include/eepp/ui/doc/syntaxdefinition.hpp @@ -2,6 +2,7 @@ #define EE_UI_DOC_DEFINITION_HPP #include +#include #include #include #include @@ -11,6 +12,7 @@ namespace EE { namespace UI { namespace Doc { struct EE_API SyntaxPattern { std::vector patterns; std::string type; + std::string syntax{ "" }; }; class EE_API SyntaxDefinition { @@ -26,6 +28,8 @@ class EE_API SyntaxDefinition { const std::string& getLanguageName() const; + const String::HashType& getLanguageId() const; + const std::vector& getFiles() const; std::string getFileExtension() const; @@ -63,6 +67,7 @@ class EE_API SyntaxDefinition { protected: std::string mLanguageName; + String::HashType mLanguageId; std::vector mFiles; std::vector mPatterns; std::unordered_map mSymbols; diff --git a/include/eepp/ui/doc/syntaxdefinitionmanager.hpp b/include/eepp/ui/doc/syntaxdefinitionmanager.hpp index 7393fcbb3..00c84caa6 100644 --- a/include/eepp/ui/doc/syntaxdefinitionmanager.hpp +++ b/include/eepp/ui/doc/syntaxdefinitionmanager.hpp @@ -25,6 +25,8 @@ class EE_API SyntaxDefinitionManager { const SyntaxDefinition& getStyleByLanguageName( const std::string& name ) const; + const SyntaxDefinition& getStyleByLanguageId( const String::HashType& id) const; + SyntaxDefinition& getStyleByLanguageNameRef( const std::string& name ); std::vector getLanguageNames() const; diff --git a/include/eepp/ui/doc/syntaxhighlighter.hpp b/include/eepp/ui/doc/syntaxhighlighter.hpp index 7f55ce66e..c508909f6 100644 --- a/include/eepp/ui/doc/syntaxhighlighter.hpp +++ b/include/eepp/ui/doc/syntaxhighlighter.hpp @@ -8,10 +8,10 @@ namespace EE { namespace UI { namespace Doc { struct TokenizedLine { - int initState; + Uint64 initState; String::HashType hash; std::vector tokens; - int state; + Uint64 state; }; class EE_API SyntaxHighlighter { @@ -37,7 +37,7 @@ class EE_API SyntaxHighlighter { std::map mLines; Int64 mFirstInvalidLine; Int64 mMaxWantedLine; - TokenizedLine tokenizeLine( const size_t& line, const int& state ); + TokenizedLine tokenizeLine( const size_t& line, const Uint64& state ); }; }}} // namespace EE::UI::Doc diff --git a/include/eepp/ui/doc/syntaxtokenizer.hpp b/include/eepp/ui/doc/syntaxtokenizer.hpp index db26377e2..2f03fb554 100644 --- a/include/eepp/ui/doc/syntaxtokenizer.hpp +++ b/include/eepp/ui/doc/syntaxtokenizer.hpp @@ -13,14 +13,14 @@ struct EE_API SyntaxToken { std::string text; }; -#define SYNTAX_TOKENIZER_STATE_NONE ( -1 ) +#define SYNTAX_TOKENIZER_STATE_NONE ( 0 ) class EE_API SyntaxTokenizer { public: - std::pair, int> static tokenize( const SyntaxDefinition& syntax, - const std::string& text, - const int& state, - const size_t& startIndex = 0 ); + static std::pair, Uint64> tokenize( const SyntaxDefinition& syntax, + const std::string& text, + const Uint64& state, + const size_t& startIndex = 0 ); }; }}} // namespace EE::UI::Doc diff --git a/src/eepp/ui/doc/syntaxdefinition.cpp b/src/eepp/ui/doc/syntaxdefinition.cpp index c77bdcfe6..03f5f8b70 100644 --- a/src/eepp/ui/doc/syntaxdefinition.cpp +++ b/src/eepp/ui/doc/syntaxdefinition.cpp @@ -13,6 +13,7 @@ SyntaxDefinition::SyntaxDefinition( const std::string& languageName, const std::string& comment, const std::vector headers ) : mLanguageName( languageName ), + mLanguageId( String::hash( languageName ) ), mFiles( files ), mPatterns( patterns ), mSymbols( symbols ), @@ -67,7 +68,7 @@ SyntaxDefinition& SyntaxDefinition::addPatternToFront( const SyntaxPattern& patt auto patterns = mPatterns; mPatterns.clear(); mPatterns.push_back( pattern ); - for ( auto pa : patterns ) + for ( const auto& pa : patterns ) mPatterns.push_back( pa ); return *this; } @@ -112,4 +113,8 @@ const std::string& SyntaxDefinition::getLanguageName() const { return mLanguageName; } +const String::HashType& SyntaxDefinition::getLanguageId() const { + return mLanguageId; +} + }}} // namespace EE::UI::Doc diff --git a/src/eepp/ui/doc/syntaxdefinitionmanager.cpp b/src/eepp/ui/doc/syntaxdefinitionmanager.cpp index f59e38e05..ad6569d4f 100644 --- a/src/eepp/ui/doc/syntaxdefinitionmanager.cpp +++ b/src/eepp/ui/doc/syntaxdefinitionmanager.cpp @@ -19,9 +19,9 @@ SyntaxDefinitionManager::SyntaxDefinitionManager() { // Plain text add( { "Plain Text", { "%.txt$" }, {} } ); - // XML - HTML + // XML add( { "XML", - { "%.xml$", "%.html?$", "%.svg$" }, + { "%.xml$", "%.svg$" }, { { { "" }, "comment" }, { { "%f[^>][^<]", "%f[<]" }, "normal" }, @@ -38,7 +38,39 @@ SyntaxDefinitionManager::SyntaxDefinitionManager() { }, {}, "", - { "<%?xml", "" } } ); + { "<%?xml" } } ); + + // HTML + add( { "HTML", + { "%.html?$" }, + { + { { "<%s*[sS][cC][rR][iI][pP][tT]%s+[tT][yY][pP][eE]%s*=%s*['\"]%a+/" + "[jJ][aA][vV][aA][sS][cC][rR][iI][pP][tT]['\"]%s*>", + "<%s*/[sS][cC][rR][iI][pP][tT]>" }, + "function", + "JavaScript" }, + { { "<%s*[sS][cC][rR][iI][pP][tT]%s*>", "<%s*/%s*[sS][cC][rR][iI][pP][tT]>" }, + "function", + "JavaScript" }, + { { "<%s*[sS][tT][yY][lL][eE][^>]*>", "<%s*/%s*[sS][tT][yY][lL][eE]%s*>" }, + "function", + "CSS" }, + { { "" }, "comment" }, + { { "%f[^>][^<]", "%f[<]" }, "normal" }, + { { "\"", "\"", "\\" }, "string" }, + { { "'", "'", "\\" }, "string" }, + { { "0x[%da-fA-F]+" }, "number" }, + { { "-?%d+[%d%.]*f?" }, "number" }, + { { "-?%.?%d+f?" }, "number" }, + { { "%f[^<]![%a_][%w_]*" }, "keyword2" }, + { { "%f[^<][%a_][%w_]*" }, "function" }, + { { "%f[^<]/[%a_][%w_]*" }, "function" }, + { { "[%a_][%w_]*" }, "keyword" }, + { { "[/<>=]" }, "operator" }, + }, + {}, + "", + { "" } } ); // CSS add( { "CSS", @@ -55,9 +87,9 @@ SyntaxDefinitionManager::SyntaxDefinitionManager() { { { "-?%d+[%d%.]*deg" }, "number" }, { { "-?%d+[%d%.]*" }, "number" }, { { "[%a_][%w_]*" }, "symbol" }, - { { "#[%a][%w_-]*" }, "keyword2" }, + { { "#[_-%a][%w_-]*" }, "keyword2" }, { { "@[%a][%w_-]*" }, "keyword2" }, - { { "%.[%a][%w_-]*" }, "keyword2" }, + { { "%.[_-%a][%w_-]*" }, "keyword2" }, { { "[{}:]" }, "operator" }, } } ) .addSymbols( UIWidgetCreator::getWidgetNames(), "keyword2" ); @@ -67,6 +99,35 @@ SyntaxDefinitionManager::SyntaxDefinitionManager() { { "%.md$", "%.markdown$" }, { { { "\\." }, "normal" }, + { { "```[Xx][Mm][Ll]", "```" }, "function", "XML" }, + { { "```[Hh][Tt][Mm][Ll]", "```" }, "function", "html" }, + { { "```[Cc]++", "```" }, "function", "C++" }, + { { "```[Cc][Pp][Pp]", "```" }, "function", "C++" }, + { { "```[Cc]%#", "```" }, "function", "C#" }, + { { "```[Cc][Ss][Ss]", "```" }, "function", "CSS" }, + { { "```[Cc]", "```" }, "function", "C" }, + { { "```[Dd]", "```" }, "function", "D" }, + { { "```[Ll]ua", "```" }, "function", "Lua" }, + { { "```[Ja]va[Ss]cript", "```" }, "function", "JavaScript" }, + { { "```[Tt]ype[Ss]cript", "```" }, "function", "TypeScript" }, + { { "```[Pp]ython", "```" }, "function", "Python" }, + { { "```[Bb]ash", "```" }, "function", "Bash" }, + { { "```[Pp][Hh][Pp]", "```" }, "function", "PHP" }, + { { "```[Ss][Qq][Ll]", "```" }, "function", "SQL" }, + { { "```[Gg][Ll][Ss][Ll]", "```" }, "function", "GLSL" }, + { { "```[Ii][Nn][Ii]", "```" }, "function", "Config File" }, + { { "```[Mm]makefile", "```" }, "function", "Makefile" }, + { { "```[Gg][Oo]", "```" }, "function", "Go" }, + { { "```[Rr]ust", "```" }, "function", "Rust" }, + { { "```[Gg][Dd][Ss]cript", "```" }, "function", "GSCript" }, + { { "```[Jj]ava", "```" }, "function", "java" }, + { { "```[Ss]wift", "```" }, "function", "Swift" }, + { { "```[Dd]art", "```" }, "function", "Dart" }, + { { "```[Oo]bjective[Cc]", "```" }, "function", "Objective-C" }, + { { "```[Yy][Aa][Mm][Ll]", "```" }, "function", "YAML" }, + { { "```[Kk]otlin", "```" }, "function", "Kotlin" }, + { { "```[Ss]olidity", "```" }, "function", "Solidity" }, + { { "```[Hh]askell", "```" }, "function", "Haskell" }, { { "" }, "comment" }, { { "```", "```" }, "string" }, { { "``", "``" }, "string" }, @@ -2261,6 +2322,15 @@ SyntaxDefinitionManager::getStyleByLanguageName( const std::string& name ) const return mStyles[0]; } +const SyntaxDefinition& +SyntaxDefinitionManager::getStyleByLanguageId( const String::HashType& id ) const { + for ( auto& style : mStyles ) { + if ( style.getLanguageId() == id ) + return style; + } + return mStyles[0]; +} + SyntaxDefinition& SyntaxDefinitionManager::getStyleByLanguageNameRef( const std::string& name ) { return const_cast( getStyleByLanguageName( name ) ); } diff --git a/src/eepp/ui/doc/syntaxhighlighter.cpp b/src/eepp/ui/doc/syntaxhighlighter.cpp index d3dab381b..6633d65eb 100644 --- a/src/eepp/ui/doc/syntaxhighlighter.cpp +++ b/src/eepp/ui/doc/syntaxhighlighter.cpp @@ -25,11 +25,11 @@ void SyntaxHighlighter::invalidate( Int64 lineIndex ) { mMaxWantedLine = eemin( mMaxWantedLine, (Int64)mDoc->linesCount() - 1 ); } -TokenizedLine SyntaxHighlighter::tokenizeLine( const size_t& line, const int& state ) { +TokenizedLine SyntaxHighlighter::tokenizeLine( const size_t& line, const Uint64& state ) { TokenizedLine tokenizedLine; tokenizedLine.initState = state; tokenizedLine.hash = mDoc->line( line ).getHash(); - std::pair, int> res = SyntaxTokenizer::tokenize( + std::pair, Uint64> res = SyntaxTokenizer::tokenize( mDoc->getSyntaxDefinition(), mDoc->line( line ).toUtf8(), state ); tokenizedLine.tokens = std::move( res.first ); tokenizedLine.state = std::move( res.second ); @@ -70,7 +70,7 @@ bool SyntaxHighlighter::updateDirty( int visibleLinesCount ) { Int64 max = eemax( 0LL, eemin( mFirstInvalidLine + visibleLinesCount, mMaxWantedLine ) ); for ( Int64 index = mFirstInvalidLine; index <= max; index++ ) { - int state = SYNTAX_TOKENIZER_STATE_NONE; + Uint64 state = SYNTAX_TOKENIZER_STATE_NONE; if ( index > 0 ) { auto prevIt = mLines.find( index - 1 ); if ( prevIt != mLines.end() ) { diff --git a/src/eepp/ui/doc/syntaxtokenizer.cpp b/src/eepp/ui/doc/syntaxtokenizer.cpp index 0520656bb..cb43251cb 100644 --- a/src/eepp/ui/doc/syntaxtokenizer.cpp +++ b/src/eepp/ui/doc/syntaxtokenizer.cpp @@ -10,19 +10,11 @@ namespace EE { namespace UI { namespace Doc { // tokenizer. This allows eepp to support the same color schemes and syntax definitions from // lite. Making much easier to implement a complete code editor. -/*static bool allSpaces( const std::string& str ) { - for ( auto& chr : str ) - if ( ' ' != chr ) - return false; - return true; -}*/ - #define MAX_TOKEN_SIZE ( 512 ) static void pushToken( std::vector& tokens, const std::string& type, const std::string& text ) { - if ( !tokens.empty() && ( tokens[tokens.size() - 1].type == type /*|| - allSpaces( tokens[tokens.size() - 1].text )*/ ) ) { + if ( !tokens.empty() && ( tokens[tokens.size() - 1].type == type ) ) { tokens[tokens.size() - 1].type = type; tokens[tokens.size() - 1].text += text; } else { @@ -69,10 +61,45 @@ std::pair findNonEscaped( const std::string& text, const std::string& } } -std::pair, int> SyntaxTokenizer::tokenize( const SyntaxDefinition& syntax, - const std::string& text, - const int& state, - const size_t& startIndex ) { +struct SyntaxState { + const SyntaxDefinition* currentSyntax{ nullptr }; + const SyntaxPattern* subsyntaxInfo{ nullptr }; + Uint64 currentPatternIdx{ 0 }; + Uint64 currentLevel{ 0 }; +}; + +SyntaxState retrieveSyntaxState( const SyntaxDefinition& syntax, const Uint64& state ) { + SyntaxState syntaxState{ &syntax, nullptr, state, 0 }; + if ( state > 0 && + ( state > 255 || + ( state < syntaxState.currentSyntax->getPatterns().size() && + !syntaxState.currentSyntax->getPatterns()[state - 1].syntax.empty() ) ) ) { + for ( size_t i = 0; i <= 2; ++i ) { + Uint64 target = ( state >> ( 8 * i ) ) & 0xFF; + if ( target != SYNTAX_TOKENIZER_STATE_NONE ) { + if ( target < syntaxState.currentSyntax->getPatterns().size() && + !syntaxState.currentSyntax->getPatterns()[target - 1].syntax.empty() ) { + syntaxState.subsyntaxInfo = + &syntaxState.currentSyntax->getPatterns()[target - 1]; + syntaxState.currentSyntax = + &SyntaxDefinitionManager::instance()->getStyleByLanguageName( + syntaxState.subsyntaxInfo->syntax ); + syntaxState.currentPatternIdx = SYNTAX_TOKENIZER_STATE_NONE; + syntaxState.currentLevel++; + } else { + syntaxState.currentPatternIdx = target; + } + } else { + break; + } + } + } + return syntaxState; +} + +std::pair, Uint64> +SyntaxTokenizer::tokenize( const SyntaxDefinition& syntax, const std::string& text, + const Uint64& state, const size_t& startIndex ) { std::vector tokens; if ( syntax.getPatterns().empty() ) { pushToken( tokens, "normal", text ); @@ -81,28 +108,89 @@ std::pair, int> SyntaxTokenizer::tokenize( const Syntax size_t i = startIndex; int retState = state; + SyntaxState curState = retrieveSyntaxState( syntax, state ); + + auto setSubsyntaxPatternIdx = [&curState, &retState]( const Uint64& patternIndex ) { + curState.currentPatternIdx = patternIndex; + retState &= ~( 0xFF << ( curState.currentLevel * 8 ) ); + retState |= ( patternIndex << ( curState.currentLevel * 8 ) ); + }; + + auto pushSubsyntax = [&setSubsyntaxPatternIdx, &curState]( + const SyntaxPattern& enteringSubsyntax, const Uint64& patternIndex ) { + setSubsyntaxPatternIdx( patternIndex ); + curState.currentLevel++; + curState.subsyntaxInfo = &enteringSubsyntax; + curState.currentSyntax = &SyntaxDefinitionManager::instance()->getStyleByLanguageName( + curState.subsyntaxInfo->syntax ); + setSubsyntaxPatternIdx( SYNTAX_TOKENIZER_STATE_NONE ); + }; + + auto popSubsyntax = [&setSubsyntaxPatternIdx, &curState, &syntax, &retState]() { + setSubsyntaxPatternIdx( SYNTAX_TOKENIZER_STATE_NONE ); + curState.currentLevel--; + setSubsyntaxPatternIdx( SYNTAX_TOKENIZER_STATE_NONE ); + curState = retrieveSyntaxState( syntax, retState ); + }; while ( i < text.size() ) { - if ( retState != SYNTAX_TOKENIZER_STATE_NONE ) { - const SyntaxPattern& pattern = syntax.getPatterns()[retState]; + if ( curState.currentPatternIdx != SYNTAX_TOKENIZER_STATE_NONE ) { + const SyntaxPattern& pattern = + curState.currentSyntax->getPatterns()[curState.currentPatternIdx - 1]; std::pair range = findNonEscaped( text, pattern.patterns[1], i, pattern.patterns.size() >= 3 ? pattern.patterns[2] : "" ); - if ( range.first != -1 ) { - pushToken( tokens, pattern.type, text.substr( i, range.second - i ) ); - retState = SYNTAX_TOKENIZER_STATE_NONE; - i = range.second; - } else { - pushToken( tokens, pattern.type, text.substr( i ) ); - break; + + bool skip = false; + + if ( curState.subsyntaxInfo != nullptr ) { + std::pair rangeSubsyntax = + findNonEscaped( text, curState.subsyntaxInfo->patterns[1], i, + curState.subsyntaxInfo->patterns.size() >= 3 + ? curState.subsyntaxInfo->patterns[2] + : "" ); + + if ( rangeSubsyntax.first != -1 && + ( range.first == -1 || rangeSubsyntax.first < range.first ) ) { + pushToken( tokens, curState.subsyntaxInfo->type, + text.substr( i, rangeSubsyntax.second - i ) ); + popSubsyntax(); + i = rangeSubsyntax.second; + skip = true; + } + } + + if ( !skip ) { + if ( range.first != -1 ) { + pushToken( tokens, pattern.type, text.substr( i, range.second - i ) ); + setSubsyntaxPatternIdx( SYNTAX_TOKENIZER_STATE_NONE ); + i = range.second; + } else { + pushToken( tokens, pattern.type, text.substr( i ) ); + break; + } + } + } + + if ( curState.subsyntaxInfo != nullptr ) { + std::pair rangeSubsyntax = findNonEscaped( + text, "^" + curState.subsyntaxInfo->patterns[1], i, + curState.subsyntaxInfo->patterns.size() >= 3 ? curState.subsyntaxInfo->patterns[2] + : "" ); + + if ( rangeSubsyntax.first != -1 ) { + pushToken( tokens, curState.subsyntaxInfo->type, + text.substr( i, rangeSubsyntax.second - i ) ); + popSubsyntax(); + i = rangeSubsyntax.second; } } bool matched = false; - for ( size_t patternIndex = 0; patternIndex < syntax.getPatterns().size(); + for ( size_t patternIndex = 0; patternIndex < curState.currentSyntax->getPatterns().size(); patternIndex++ ) { - const SyntaxPattern& pattern = syntax.getPatterns()[patternIndex]; + const SyntaxPattern& pattern = curState.currentSyntax->getPatterns()[patternIndex]; if ( i != 0 && pattern.patterns[0][0] == '^' ) continue; const std::string& patternStr( @@ -114,10 +202,12 @@ std::pair, int> SyntaxTokenizer::tokenize( const Syntax text[i - 1] == pattern.patterns[2][0] ) continue; std::string patternText( text.substr( start, end - start ) ); - std::string type = syntax.getSymbol( patternText ); + std::string type = curState.currentSyntax->getSymbol( patternText ); pushToken( tokens, type.empty() ? pattern.type : type, patternText ); - if ( pattern.patterns.size() > 1 ) { - retState = patternIndex; + if ( !pattern.syntax.empty() ) { + pushSubsyntax( pattern, patternIndex + 1 ); + } else if ( pattern.patterns.size() > 1 ) { + setSubsyntaxPatternIdx( patternIndex + 1 ); } i = end; matched = true;