From af381e400a94a8e5b075f5cbc9aa3e537ddd8cff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=ADn=20Lucas=20Golini?= Date: Sat, 10 May 2025 21:34:02 -0300 Subject: [PATCH] More work to support TextMate grammars. --- include/eepp/system/regex.hpp | 10 +- include/eepp/ui/doc/syntaxdefinition.hpp | 8 +- src/eepp/ui/doc/languages/cpp.cpp | 2 +- src/eepp/ui/doc/syntaxdefinition.cpp | 44 ++++- src/eepp/ui/doc/syntaxdefinitionmanager.cpp | 162 ++++++++++------ src/eepp/ui/doc/syntaxtokenizer.cpp | 193 +++++++++++++++----- 6 files changed, 309 insertions(+), 110 deletions(-) diff --git a/include/eepp/system/regex.hpp b/include/eepp/system/regex.hpp index 17bbc450b..b9eee75cc 100644 --- a/include/eepp/system/regex.hpp +++ b/include/eepp/system/regex.hpp @@ -58,6 +58,9 @@ class EE_API RegEx : public PatternMatcher { ExtendedMore = 0x01000000u, // C Literal = 0x02000000u, // C MatchInvalidUtf = 0x04000000u, // J M D + Anchored = 0x80000000u, + NoUtfCheck = 0x40000000u, + Endanchored = 0x20000000u, FilterOutCaptures = 0x08000000u, // It will filter out repeated captures and same range captures }; @@ -83,9 +86,10 @@ class EE_API RegEx : public PatternMatcher { mutable size_t mMatchNum; void* mCompiledPattern; int mCaptureCount; - bool mValid{ false }; - bool mCached{ false }; - bool mFilterOutCaptures{ false }; + bool mValid : 1 { false }; + bool mCached : 1 { false }; + bool mFilterOutCaptures : 1 { false }; + bool mAnchored : 1 { false }; }; }} // namespace EE::System diff --git a/include/eepp/ui/doc/syntaxdefinition.hpp b/include/eepp/ui/doc/syntaxdefinition.hpp index 4f2f29c60..a212dab03 100644 --- a/include/eepp/ui/doc/syntaxdefinition.hpp +++ b/include/eepp/ui/doc/syntaxdefinition.hpp @@ -59,7 +59,9 @@ struct EE_API SyntaxPattern { const SyntaxDefinition* def{ nullptr }; Uint16 flags{ 0 }; Uint16 repositoryIdx{ 0 }; - std::vector subPatterns; + std::vector contentPatterns; + String::HashType contentScopeRepoHash{ + 0 }; // Hash of the repository containing this rule's content patterns SyntaxPattern( std::vector&& _patterns, const std::string& _type, const std::string& _syntax = "", @@ -122,6 +124,8 @@ struct EE_API SyntaxPattern { inline bool checkIsRepositoryInclude() const { return checkIsIncludePattern() && patterns[1][0] == '#'; } + + inline bool hasContentScope() const { return contentScopeRepoHash != 0; } }; class EE_API SyntaxDefinition { @@ -245,6 +249,8 @@ class EE_API SyntaxDefinition { const SyntaxPattern* getPatternFromState( const SyntaxStateType& state ) const; + void compile(); + protected: friend class SyntaxDefinitionManager; diff --git a/src/eepp/ui/doc/languages/cpp.cpp b/src/eepp/ui/doc/languages/cpp.cpp index 13895c5c7..ac4d475f7 100644 --- a/src/eepp/ui/doc/languages/cpp.cpp +++ b/src/eepp/ui/doc/languages/cpp.cpp @@ -101,7 +101,7 @@ void addCPP() { { "#endif", "keyword" }, { "#include", "keyword" }, { "#define", "keyword" }, { "#undef", "keyword" }, { "#line", "keyword" }, { "#error", "keyword" }, - { "#pragma", "keyword" }, + { "#pragma", "keyword" }, { "signed", "keyword2" }, }, "//", diff --git a/src/eepp/ui/doc/syntaxdefinition.cpp b/src/eepp/ui/doc/syntaxdefinition.cpp index f04c5aa69..fa64b8e65 100644 --- a/src/eepp/ui/doc/syntaxdefinition.cpp +++ b/src/eepp/ui/doc/syntaxdefinition.cpp @@ -11,6 +11,27 @@ namespace EE { namespace UI { namespace Doc { SyntaxDefMap SyntaxPattern::SyntaxStyleTypeCache = {}; +static void liftContentPatternsRecursive( SyntaxDefinition& def, SyntaxPattern& pattern, + std::string_view namePrefixSeed, + Uint64& uniqueIdCounter ) { + for ( Uint64 i = 0; i < pattern.contentPatterns.size(); ++i ) { + // Pass a new prefix seed for children to ensure unique names + std::string childPrefixSeed = namePrefixSeed + "_cp" + String::toString( i ); + liftContentPatternsRecursive( def, pattern.contentPatterns[i], childPrefixSeed, + uniqueIdCounter ); + } + + if ( !pattern.contentPatterns.empty() ) { + // Generate a unique repository name for this pattern's content scope + std::string contentRepoName = "$CONTENT_" + def.getLanguageNameForFileSystem() + "_" + + namePrefixSeed + "_uid" + + String::toString( uniqueIdCounter++ ); + + def.addRepository( contentRepoName, std::move( pattern.contentPatterns ) ); + pattern.contentScopeRepoHash = String::hash( contentRepoName ); + } +} + template void updateCache( const SyntaxPattern& ptrn ) { if constexpr ( std::is_same_v ) { return; @@ -66,7 +87,7 @@ static void updatePatternsState( SyntaxDefinition& def, std::vector&& _patterns, endTypesNames( std::move( _endTypes ) ), syntax( _syntax ), matchType( matchType ), - subPatterns( std::move( _subPatterns ) ) { + contentPatterns( std::move( _subPatterns ) ) { + eeASSERT( patterns.size() < std::numeric_limits::max() - 1 ); updateCache( *this ); } @@ -372,6 +394,7 @@ SyntaxPattern::SyntaxPattern( std::vector&& _patterns, endTypesNames( std::move( _endTypes ) ), syntax( _syntax ), matchType( matchType ) { + eeASSERT( patterns.size() < std::numeric_limits::max() - 1 ); updateCache( *this ); } @@ -382,6 +405,7 @@ SyntaxPattern::SyntaxPattern( std::vector&& _patterns, const std::s typesNames( { _type } ), dynSyntax( std::move( _syntax ) ), matchType( matchType ) { + eeASSERT( patterns.size() < std::numeric_limits::max() - 1 ); updateCache( *this ); } @@ -393,6 +417,7 @@ SyntaxPattern::SyntaxPattern( std::vector&& _patterns, typesNames( std::move( _types ) ), dynSyntax( std::move( _syntax ) ), matchType( matchType ) { + eeASSERT( patterns.size() < std::numeric_limits::max() - 1 ); updateCache( *this ); } @@ -407,11 +432,13 @@ SyntaxPattern::SyntaxPattern( std::vector&& _patterns, endTypesNames( std::move( _endTypes ) ), dynSyntax( std::move( _syntax ) ), matchType( matchType ) { + eeASSERT( patterns.size() < std::numeric_limits::max() - 1 ); updateCache( *this ); } SyntaxDefinition& SyntaxDefinition::addRepository( const std::string& name, std::vector&& patterns ) { + eeASSERT( patterns.size() < std::numeric_limits::max() - 1 ); auto hash = String::hash( name ); mRepositoryIndex[hash] = ++mRepositoryIndexCounter; mRepositoryNames[hash] = name; @@ -486,4 +513,17 @@ std::string SyntaxDefinition::getRepositoryName( String::HashType hash ) const { return it != mRepositoryNames.end() ? it->second : ""; } +void SyntaxDefinition::compile() { + Uint64 uniqueIdCounter = 0; + for ( SyntaxPattern& p : mPatterns ) + liftContentPatternsRecursive( *this, p, "root", uniqueIdCounter ); + + for ( auto& repoPair : mRepository ) { + for ( SyntaxPattern& p : repoPair.second ) { + liftContentPatternsRecursive( *this, p, mRepositoryNames[repoPair.first], + uniqueIdCounter ); + } + } +} + }}} // namespace EE::UI::Doc diff --git a/src/eepp/ui/doc/syntaxdefinitionmanager.cpp b/src/eepp/ui/doc/syntaxdefinitionmanager.cpp index 02a2dca51..c96018494 100644 --- a/src/eepp/ui/doc/syntaxdefinitionmanager.cpp +++ b/src/eepp/ui/doc/syntaxdefinitionmanager.cpp @@ -48,16 +48,27 @@ class TextMateScopeMapper { { "entity.name.tag", "keyword2" }, // HTML/XML tag name { "keyword.control", "keyword" }, // if, else, for, while, return etc. { "keyword.operator", "operator" }, // +, -, =, and, or, etc. - { "punctuation.definition.tag", "operator" }, // <, >, , , & SyntaxDefinitionManager::getDefinitions() c return mDefinitions; } +static std::optional serializePattern( const SyntaxPattern& ptrn, + const SyntaxDefinition& def ) { + json pattern; + auto ptrnType = + ptrn.matchType == SyntaxPatternMatchType::RegEx + ? "regex" + : ( ptrn.matchType == SyntaxPatternMatchType::Parser ? "parser" : "pattern" ); + + // Do not export injected patterns + if ( ptrn.matchType == SyntaxPatternMatchType::LuaPattern && ptrn.patterns.size() == 1 && + ( ptrn.patterns[0] == "%s+" || ptrn.patterns[0] == "%w+%f[%s]" ) ) + return {}; + + bool hasInclude = false; + if ( ptrn.patterns.size() == 2 && ptrn.patterns[0] == "include" ) { + hasInclude = true; + pattern["include"] = ptrn.patterns[1]; + } else if ( ptrn.patterns.size() == 1 ) { + pattern[ptrnType] = ptrn.patterns[0]; + } else if ( ptrn.patterns.size() ) { + pattern[ptrnType] = ptrn.patterns; + } + + if ( !hasInclude ) { + if ( ptrn.typesNames.size() == 1 ) { + pattern["type"] = ptrn.typesNames[0]; + } else if ( ptrn.typesNames.size() ) { + pattern["type"] = ptrn.typesNames; + } + if ( ptrn.endTypesNames.size() == 1 ) { + pattern["end_type"] = ptrn.endTypesNames[0]; + } else if ( ptrn.endTypesNames.size() ) { + pattern["end_type"] = ptrn.endTypesNames; + } + if ( !ptrn.syntax.empty() ) + pattern["syntax"] = ptrn.syntax == def.getLanguageName() ? "$self" : ptrn.syntax; + } + + if ( ptrn.hasContentScope() ) { + const auto& contentPatterns = def.getRepository( ptrn.contentScopeRepoHash ); + nlohmann::json ptrns = nlohmann::json::array(); + for ( const auto& pattern : contentPatterns ) { + auto ojptrn = serializePattern( pattern, def ); + if ( ojptrn ) + ptrns.emplace_back( std::move( *ojptrn ) ); + } + if ( !ptrns.empty() ) + pattern["patterns"] = std::move( ptrns ); + } + + return pattern; +} + static json toJson( const SyntaxDefinition& def ) { - const auto serializePattern = - [&def]( const SyntaxPattern& ptrn ) -> std::optional { - json pattern; - auto ptrnType = - ptrn.matchType == SyntaxPatternMatchType::RegEx - ? "regex" - : ( ptrn.matchType == SyntaxPatternMatchType::Parser ? "parser" : "pattern" ); - - // Do not export injected patterns - if ( ptrn.matchType == SyntaxPatternMatchType::LuaPattern && ptrn.patterns.size() == 1 && - ( ptrn.patterns[0] == "%s+" || ptrn.patterns[0] == "%w+%f[%s]" ) ) - return {}; - - bool hasInclude = false; - if ( ptrn.patterns.size() == 2 && ptrn.patterns[0] == "include" ) { - hasInclude = true; - pattern["include"] = ptrn.patterns[1]; - } else if ( ptrn.patterns.size() == 1 ) { - pattern[ptrnType] = ptrn.patterns[0]; - } else if ( ptrn.patterns.size() ) { - pattern[ptrnType] = ptrn.patterns; - } - - if ( !hasInclude ) { - if ( ptrn.typesNames.size() == 1 ) { - pattern["type"] = ptrn.typesNames[0]; - } else if ( ptrn.typesNames.size() ) { - pattern["type"] = ptrn.typesNames; - } - if ( ptrn.endTypesNames.size() == 1 ) { - pattern["end_type"] = ptrn.endTypesNames[0]; - } else if ( ptrn.endTypesNames.size() ) { - pattern["end_type"] = ptrn.endTypesNames; - } - if ( !ptrn.syntax.empty() ) - pattern["syntax"] = ptrn.syntax == def.getLanguageName() ? "$self" : ptrn.syntax; - } - return pattern; - }; json j; j["name"] = def.getLanguageName(); @@ -211,7 +246,7 @@ static json toJson( const SyntaxDefinition& def ) { if ( !def.getPatterns().empty() ) { j["patterns"] = json::array(); for ( const auto& ptrn : def.getPatterns() ) { - auto pattern = serializePattern( ptrn ); + auto pattern = serializePattern( ptrn, def ); if ( pattern ) j["patterns"].emplace_back( std::move( *pattern ) ); } @@ -251,9 +286,12 @@ static json toJson( const SyntaxDefinition& def ) { for ( const auto& [hash, patterns] : def.getRepositories() ) { std::string name = def.getRepositoryName( hash ); + if ( name.starts_with( "$CONTENT_" ) ) + continue; + nlohmann::json repo; for ( const auto& pattern : patterns ) { - auto ojptrn = serializePattern( pattern ); + auto ojptrn = serializePattern( pattern, def ); if ( ojptrn ) repo.emplace_back( std::move( *ojptrn ) ); } @@ -432,6 +470,7 @@ namespace EE { namespace UI { namespace Doc { namespace Language { SyntaxDefinition& SyntaxDefinitionManager::add( SyntaxDefinition&& syntaxStyle ) { syntaxStyle.mLanguageIndex = mDefinitions.size(); + syntaxStyle.compile(); mDefinitions.emplace_back( std::move( syntaxStyle ) ); return mDefinitions.back(); } @@ -551,7 +590,8 @@ static SyntaxPattern parsePattern( const nlohmann::json& pattern ) { auto ctype = SyntaxPatternMatchType::LuaPattern; std::string syntax; - const auto fillTypes = []( const nlohmann::json& captures, std::vector& type ) { + const auto fillTypes = []( const nlohmann::json& captures, std::vector& type, + const nlohmann::json& parent ) { Uint64 totalCaptures = 0; for ( const auto& [capNumStr, _] : captures.items() ) { Uint64 num; @@ -564,6 +604,8 @@ static SyntaxPattern parsePattern( const nlohmann::json& pattern ) { if ( captures.contains( capNumStr ) && captures[capNumStr].contains( "name" ) ) { type.emplace_back( TextMateScopeMapper::scopeToType( captures[capNumStr].value( "name", "" ) ) ); + } else if ( parent.contains( "name" ) ) { + type.emplace_back( TextMateScopeMapper::scopeToType( parent.value( "name", "" ) ) ); } else { type.emplace_back( "normal" ); } @@ -571,17 +613,18 @@ static SyntaxPattern parsePattern( const nlohmann::json& pattern ) { }; // Assume TextMate pattern - if ( pattern.contains( "name" ) || pattern.contains( "begin" ) ) { + if ( pattern.contains( "name" ) || pattern.contains( "begin" ) || + pattern.contains( "match" ) ) { ctype = SyntaxPatternMatchType::RegEx; if ( pattern.contains( "beginCaptures" ) ) - fillTypes( pattern["beginCaptures"], type ); + fillTypes( pattern["beginCaptures"], type, pattern ); if ( pattern.contains( "endCaptures" ) ) - fillTypes( pattern["endCaptures"], endType ); + fillTypes( pattern["endCaptures"], endType, pattern ); if ( type.empty() && pattern.contains( "captures" ) ) - fillTypes( pattern["captures"], type ); + fillTypes( pattern["captures"], type, pattern ); if ( type.empty() && pattern.contains( "name" ) ) { type.emplace_back( TextMateScopeMapper::scopeToType( pattern.value( "name", "" ) ) ); @@ -609,7 +652,9 @@ static SyntaxPattern parsePattern( const nlohmann::json& pattern ) { patterns[0].value( "include", "" ) == "$self" ) { syntax = "$self"; } else if ( patterns[0].contains( "name" ) && patterns[0].contains( "match" ) && - patterns[0].value( "name", "" ).starts_with( "constant.character.escape" ) ) { + patterns[0] + .value( "name", "" ) + .starts_with( "constant.character.escape" ) ) { ptrns.emplace_back( patterns[0].value( "match", "" ) ); } else { subPatterns.push_back( parsePattern( patterns[0] ) ); @@ -621,6 +666,10 @@ static SyntaxPattern parsePattern( const nlohmann::json& pattern ) { } } + if ( type.empty() && ( pattern.contains( "name" ) || pattern.contains( "begin" ) ) ) { + type.emplace_back( "normal" ); + } + } else { if ( pattern.contains( "syntax" ) && pattern["syntax"].is_string() ) syntax = pattern.value( "syntax", "" ); @@ -699,6 +748,7 @@ static SyntaxDefinition loadTextMateLanguage( const nlohmann::json& json, Syntax const auto& scopeName = json.value( "scopeName", "" ); def.addFileType( "%." + FileSystem::fileExtension( scopeName ) + "$" ); } + def.compile(); return def; } @@ -814,6 +864,8 @@ static SyntaxDefinition loadLanguage( const nlohmann::json& json ) { def.setFoldBraces( folds ); } + + def.compile(); } catch ( const json::exception& e ) { Log::error( "SyntaxDefinition loadLanguage failed:\n%s", e.what() ); } diff --git a/src/eepp/ui/doc/syntaxtokenizer.cpp b/src/eepp/ui/doc/syntaxtokenizer.cpp index a85b66131..2ddd0e03f 100644 --- a/src/eepp/ui/doc/syntaxtokenizer.cpp +++ b/src/eepp/ui/doc/syntaxtokenizer.cpp @@ -28,7 +28,7 @@ struct PatternStackItem { // large line. This will help the editor to cull the rendering only for the visible tokens #define MAX_TOKEN_SIZE ( 512 ) -#define MAX_MATCHES ( 12 ) +#define MAX_MATCHES ( 64 ) #define MAX_PATTERN_STACK_SIZE ( 16 ) @@ -284,17 +284,19 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax std::optional shouldCloseSubSyntax; const auto matchPattern = [&]( const SyntaxPattern& pattern, size_t& startIdx, - SyntaxStateType patternIndex ) -> bool { + SyntaxStateType patternIndex, + NonEscapedMatch* endRange = nullptr ) -> bool { int start = 0, end = 0; patternStr = - pattern.matchType != SyntaxPatternMatchType::Parser + pattern.matchType == SyntaxPatternMatchType::LuaPattern ? pattern.patterns[0][0] == '^' ? pattern.patterns[0] : "^" + pattern.patterns[0] : pattern.patterns[0]; std::variant wordsVar = pattern.matchType == SyntaxPatternMatchType::LuaPattern ? std::variant( LuaPattern( patternStr ) ) : ( pattern.matchType == SyntaxPatternMatchType::RegEx - ? std::variant( RegEx( patternStr ) ) + ? std::variant( + RegEx( patternStr, RegEx::Options::Utf | RegEx::Options::Anchored ) ) : std::variant( ParserMatcher( patternStr ) ) ); PatternMatcher& words = std::visit( @@ -317,9 +319,16 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax shouldCloseSubSyntax = {}; } + if ( numMatches > MAX_MATCHES ) + numMatches = MAX_MATCHES; + if ( numMatches > 1 ) { int fullMatchStart = matches[0].start; int fullMatchEnd = matches[0].end; + + if ( endRange && fullMatchEnd >= endRange->range.first ) + fullMatchEnd = endRange->range.first; + if ( pattern.matchType == SyntaxPatternMatchType::RegEx ) { priorityMap.clear(); priorityMap.resize( fullMatchEnd - fullMatchStart, 0 ); @@ -328,6 +337,10 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax int capStart = matches[captureIndex].start; int capEnd = matches[captureIndex].end; + if ( endRange && capEnd >= endRange->range.first ) { + capEnd = endRange->range.first; + } + if ( capStart < fullMatchStart || capEnd > fullMatchEnd || capStart >= capEnd ) continue; @@ -473,6 +486,13 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax String::utf8Next( strEnd ); end = start + ( strEnd - strStart ); } + + if ( endRange && end >= endRange->range.first ) { + end = endRange->range.first; + if ( start == end ) + return false; + } + patternText = textv.substr( start, end - start ); SyntaxStyleType type = ( pattern.types[0] == SyntaxStyleTypes::Symbol || @@ -499,6 +519,7 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax size_t size = text.size(); size_t startIdx = startIndex; + const SyntaxPattern* activePattern = nullptr; static constexpr auto PATTERN_STACK_BUFFER = MAX_PATTERN_STACK_SIZE * sizeof( PatternStackItem ); @@ -510,54 +531,129 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax while ( startIdx < size ) { bool matched = false; patternStack.clear(); + activePattern = nullptr; if ( curState.currentPatternIdx.state != SYNTAX_TOKENIZER_STATE_NONE ) { - const SyntaxPattern& pattern = - *curState.currentSyntax->getPatternFromState( curState.currentPatternIdx ); - auto range = findNonEscaped( text, pattern.patterns[1], startIdx, - pattern.patterns.size() >= 3 ? pattern.patterns[2] : "", - pattern.matchType ) - .range; + activePattern = + curState.currentSyntax->getPatternFromState( curState.currentPatternIdx ); + eeASSERT( activePattern ); + } - bool skip = false; + if ( activePattern ) { + auto endRange = findNonEscaped( + text, activePattern->patterns[1], startIdx, + activePattern->patterns.size() >= 3 ? activePattern->patterns[2] : "", + activePattern->matchType ); - if ( curState.subsyntaxInfo != nullptr && curState.subsyntaxInfo->patterns.size() > 1 && - curState.currentSyntax->getLanguageIndex() != syntax.getLanguageIndex() ) { - auto rangeSubsyntax = - findNonEscaped( text, curState.subsyntaxInfo->patterns[1], startIdx, - curState.subsyntaxInfo->patterns.size() >= 3 - ? curState.subsyntaxInfo->patterns[2] - : "", - pattern.matchType ); - - if ( rangeSubsyntax.range.first != -1 && - ( range.first == -1 || rangeSubsyntax.range.first < range.first ) ) { - if ( !skipSubSyntaxSeparator ) { - pushTokensToOpenCloseSubsyntax( startIdx, textv, curState.subsyntaxInfo, - rangeSubsyntax, tokens, true ); - } - popSubsyntax( curState, retState, syntax ); - startIdx = rangeSubsyntax.range.second; - skip = true; - } - } - - if ( !skip ) { - if ( range.first != -1 ) { - if ( range.second > range.first && pattern.types.size() >= 3 ) { - pushToken( tokens, pattern.types[0], - textv.substr( startIdx, range.first - startIdx ) ); - pushToken( tokens, pattern.types[pattern.types.size() - 1], - textv.substr( range.first, range.second - range.first ) ); - } else { - pushToken( tokens, pattern.types[0], - textv.substr( startIdx, range.second - startIdx ) ); - } + if ( activePattern->hasContentScope() ) { + if ( endRange.range.first == static_cast( startIdx ) ) { + pushTokensToOpenCloseSubsyntax( startIdx, textv, activePattern, endRange, + tokens, true ); setSubsyntaxPatternIdx( curState, retState, SyntaxStateType{} ); - startIdx = range.second; - } else { - pushToken( tokens, pattern.types[0], textv.substr( startIdx ) ); - break; + startIdx = endRange.range.second; + continue; + } + + const auto& contentScopePatterns = + curState.currentSyntax->getRepository( activePattern->contentScopeRepoHash ); + + auto contentScopeRepoGlobalIndex = curState.currentSyntax->getRepositoryIndex( + activePattern->contentScopeRepoHash ); + + patternStack.push_back( { &contentScopePatterns, 0, + static_cast( contentScopeRepoGlobalIndex ) } ); + + while ( !patternStack.empty() && !matched ) { + PatternStackItem& current = patternStack.back(); + if ( current.index >= current.patterns->size() ) { + patternStack.pop_back(); + continue; + } + const SyntaxPattern* innerPtrn = ¤t.patterns->data()[current.index]; + SyntaxStateType patternState = { static_cast( current.index + 1 ), + current.repositoryIdx }; + current.index++; + + if ( innerPtrn->isRepositoryInclude() ) { + if ( patternStack.size() + 1 >= MAX_PATTERN_STACK_SIZE ) + break; + const auto& targetRepo = + curState.currentSyntax->getRepository( innerPtrn->getRepositoryName() ); + patternStack.push_back( + { &targetRepo, 0, static_cast( innerPtrn->repositoryIdx ) } ); + continue; + } else if ( innerPtrn->isRootSelfInclude() ) { + if ( patternStack.size() + 1 >= MAX_PATTERN_STACK_SIZE ) + break; + patternStack.push_back( { &curState.currentSyntax->getPatterns(), 0, 0 } ); + continue; + } + + if ( startIdx != 0 && + innerPtrn->matchType == SyntaxPatternMatchType::LuaPattern && + innerPtrn->patterns[0][0] == '^' ) + continue; + + if ( matchPattern( *innerPtrn, startIdx, patternState, + endRange.numMatches ? &endRange : nullptr ) ) { + matched = true; + } + } + + if ( matched ) + continue; + + if ( !matched && startIdx < text.size() ) { + char* strStart = const_cast( text.c_str() + startIdx ); + char* strEnd = strStart; + String::utf8Next( strEnd ); + int dist = strEnd - strStart; + if ( dist > 0 ) { + pushToken( tokens, activePattern->types[0], text.substr( startIdx, dist ) ); + startIdx += dist; + } else { + Log::error( "Error parsing \"%s\" using syntax: %s", text.c_str(), + syntax.getLSPName().c_str() ); + break; + } + continue; + } + } else { + bool skip = false; + + if ( curState.subsyntaxInfo != nullptr && + curState.subsyntaxInfo->patterns.size() > 1 && + curState.currentSyntax->getLanguageIndex() != syntax.getLanguageIndex() ) { + auto rangeSubsyntax = + findNonEscaped( text, curState.subsyntaxInfo->patterns[1], startIdx, + curState.subsyntaxInfo->patterns.size() >= 3 + ? curState.subsyntaxInfo->patterns[2] + : "", + activePattern->matchType ); + + if ( rangeSubsyntax.range.first != -1 && + ( endRange.range.first == -1 || + rangeSubsyntax.range.first < endRange.range.first ) ) { + if ( !skipSubSyntaxSeparator ) { + pushTokensToOpenCloseSubsyntax( startIdx, textv, curState.subsyntaxInfo, + rangeSubsyntax, tokens, true ); + } + popSubsyntax( curState, retState, syntax ); + startIdx = rangeSubsyntax.range.second; + skip = true; + } + } + + if ( !skip ) { + if ( endRange.range.first != -1 ) { + pushTokensToOpenCloseSubsyntax( startIdx, textv, activePattern, endRange, + tokens, true ); + setSubsyntaxPatternIdx( curState, retState, SyntaxStateType{} ); + startIdx = endRange.range.second; + } else { + pushToken( tokens, activePattern->types[0], textv.substr( startIdx ) ); + break; + } } } } @@ -599,7 +695,8 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax patternStack.push_back( { &curState.currentSyntax->getPatterns(), 0, 0 } ); } else { - if ( startIdx != 0 && pattern->patterns[0][0] == '^' ) + if ( startIdx != 0 && pattern->matchType == SyntaxPatternMatchType::LuaPattern && + pattern->patterns[0][0] == '^' ) continue; SyntaxStateType patternIndex = { static_cast( current.index ),