More work to support TextMate grammars.

This commit is contained in:
Martín Lucas Golini
2025-05-10 21:34:02 -03:00
parent 51430ae87b
commit af381e400a
6 changed files with 309 additions and 110 deletions

View File

@@ -58,6 +58,9 @@ class EE_API RegEx : public PatternMatcher {
ExtendedMore = 0x01000000u, // C
Literal = 0x02000000u, // C
MatchInvalidUtf = 0x04000000u, // J M D
Anchored = 0x80000000u,
NoUtfCheck = 0x40000000u,
Endanchored = 0x20000000u,
FilterOutCaptures =
0x08000000u, // It will filter out repeated captures and same range captures
};
@@ -83,9 +86,10 @@ class EE_API RegEx : public PatternMatcher {
mutable size_t mMatchNum;
void* mCompiledPattern;
int mCaptureCount;
bool mValid{ false };
bool mCached{ false };
bool mFilterOutCaptures{ false };
bool mValid : 1 { false };
bool mCached : 1 { false };
bool mFilterOutCaptures : 1 { false };
bool mAnchored : 1 { false };
};
}} // namespace EE::System

View File

@@ -59,7 +59,9 @@ struct EE_API SyntaxPattern {
const SyntaxDefinition* def{ nullptr };
Uint16 flags{ 0 };
Uint16 repositoryIdx{ 0 };
std::vector<SyntaxPattern> subPatterns;
std::vector<SyntaxPattern> contentPatterns;
String::HashType contentScopeRepoHash{
0 }; // Hash of the repository containing this rule's content patterns
SyntaxPattern( std::vector<std::string>&& _patterns, const std::string& _type,
const std::string& _syntax = "",
@@ -122,6 +124,8 @@ struct EE_API SyntaxPattern {
inline bool checkIsRepositoryInclude() const {
return checkIsIncludePattern() && patterns[1][0] == '#';
}
inline bool hasContentScope() const { return contentScopeRepoHash != 0; }
};
class EE_API SyntaxDefinition {
@@ -245,6 +249,8 @@ class EE_API SyntaxDefinition {
const SyntaxPattern* getPatternFromState( const SyntaxStateType& state ) const;
void compile();
protected:
friend class SyntaxDefinitionManager;

View File

@@ -101,7 +101,7 @@ void addCPP() {
{ "#endif", "keyword" }, { "#include", "keyword" },
{ "#define", "keyword" }, { "#undef", "keyword" },
{ "#line", "keyword" }, { "#error", "keyword" },
{ "#pragma", "keyword" },
{ "#pragma", "keyword" }, { "signed", "keyword2" },
},
"//",

View File

@@ -11,6 +11,27 @@ namespace EE { namespace UI { namespace Doc {
SyntaxDefMap<SyntaxStyleType, std::string> SyntaxPattern::SyntaxStyleTypeCache = {};
static void liftContentPatternsRecursive( SyntaxDefinition& def, SyntaxPattern& pattern,
std::string_view namePrefixSeed,
Uint64& uniqueIdCounter ) {
for ( Uint64 i = 0; i < pattern.contentPatterns.size(); ++i ) {
// Pass a new prefix seed for children to ensure unique names
std::string childPrefixSeed = namePrefixSeed + "_cp" + String::toString( i );
liftContentPatternsRecursive( def, pattern.contentPatterns[i], childPrefixSeed,
uniqueIdCounter );
}
if ( !pattern.contentPatterns.empty() ) {
// Generate a unique repository name for this pattern's content scope
std::string contentRepoName = "$CONTENT_" + def.getLanguageNameForFileSystem() + "_" +
namePrefixSeed + "_uid" +
String::toString( uniqueIdCounter++ );
def.addRepository( contentRepoName, std::move( pattern.contentPatterns ) );
pattern.contentScopeRepoHash = String::hash( contentRepoName );
}
}
template <typename SyntaxStyleType> void updateCache( const SyntaxPattern& ptrn ) {
if constexpr ( std::is_same_v<SyntaxStyleType, std::string> ) {
return;
@@ -66,7 +87,7 @@ static void updatePatternsState( SyntaxDefinition& def, std::vector<SyntaxPatter
for ( auto& ptrn : ptrns ) {
updatePatternState( def, ptrn );
for ( auto& subPattern : ptrn.subPatterns )
for ( auto& subPattern : ptrn.contentPatterns )
updatePatternState( def, subPattern );
}
}
@@ -357,7 +378,8 @@ SyntaxPattern::SyntaxPattern( std::vector<std::string>&& _patterns,
endTypesNames( std::move( _endTypes ) ),
syntax( _syntax ),
matchType( matchType ),
subPatterns( std::move( _subPatterns ) ) {
contentPatterns( std::move( _subPatterns ) ) {
eeASSERT( patterns.size() < std::numeric_limits<Uint8>::max() - 1 );
updateCache<SyntaxStyleType>( *this );
}
@@ -372,6 +394,7 @@ SyntaxPattern::SyntaxPattern( std::vector<std::string>&& _patterns,
endTypesNames( std::move( _endTypes ) ),
syntax( _syntax ),
matchType( matchType ) {
eeASSERT( patterns.size() < std::numeric_limits<Uint8>::max() - 1 );
updateCache<SyntaxStyleType>( *this );
}
@@ -382,6 +405,7 @@ SyntaxPattern::SyntaxPattern( std::vector<std::string>&& _patterns, const std::s
typesNames( { _type } ),
dynSyntax( std::move( _syntax ) ),
matchType( matchType ) {
eeASSERT( patterns.size() < std::numeric_limits<Uint8>::max() - 1 );
updateCache<SyntaxStyleType>( *this );
}
@@ -393,6 +417,7 @@ SyntaxPattern::SyntaxPattern( std::vector<std::string>&& _patterns,
typesNames( std::move( _types ) ),
dynSyntax( std::move( _syntax ) ),
matchType( matchType ) {
eeASSERT( patterns.size() < std::numeric_limits<Uint8>::max() - 1 );
updateCache<SyntaxStyleType>( *this );
}
@@ -407,11 +432,13 @@ SyntaxPattern::SyntaxPattern( std::vector<std::string>&& _patterns,
endTypesNames( std::move( _endTypes ) ),
dynSyntax( std::move( _syntax ) ),
matchType( matchType ) {
eeASSERT( patterns.size() < std::numeric_limits<Uint8>::max() - 1 );
updateCache<SyntaxStyleType>( *this );
}
SyntaxDefinition& SyntaxDefinition::addRepository( const std::string& name,
std::vector<SyntaxPattern>&& patterns ) {
eeASSERT( patterns.size() < std::numeric_limits<Uint8>::max() - 1 );
auto hash = String::hash( name );
mRepositoryIndex[hash] = ++mRepositoryIndexCounter;
mRepositoryNames[hash] = name;
@@ -486,4 +513,17 @@ std::string SyntaxDefinition::getRepositoryName( String::HashType hash ) const {
return it != mRepositoryNames.end() ? it->second : "";
}
void SyntaxDefinition::compile() {
Uint64 uniqueIdCounter = 0;
for ( SyntaxPattern& p : mPatterns )
liftContentPatternsRecursive( *this, p, "root", uniqueIdCounter );
for ( auto& repoPair : mRepository ) {
for ( SyntaxPattern& p : repoPair.second ) {
liftContentPatternsRecursive( *this, p, mRepositoryNames[repoPair.first],
uniqueIdCounter );
}
}
}
}}} // namespace EE::UI::Doc

View File

@@ -48,16 +48,27 @@ class TextMateScopeMapper {
{ "entity.name.tag", "keyword2" }, // HTML/XML tag name
{ "keyword.control", "keyword" }, // if, else, for, while, return etc.
{ "keyword.operator", "operator" }, // +, -, =, and, or, etc.
{ "punctuation.definition.tag", "operator" }, // <, >, </ in HTML/XML
{ "support.function", "function" }, // Built-in functions (print, len)
{ "support.type", "keyword2" }, // Built-in types (string, list)
{ "support.class", "keyword2" }, // Built-in classes
{ "storage.modifier", "keyword" }, // public, private, static, const etc.
{ "constant.numeric", "number" }, // Numbers
{ "constant.language", "literal" }, // true, false, null etc.
{ "comment.unused", "normal" }, // unused comments pattern
{ "punctuation.definition.string", "string" }, // <, >, </ in HTML/XML
{ "punctuation.definition.tag", "operator" }, // <, >, </ in HTML/XML
{ "support.function", "function" }, // Built-in functions (print, len)
{ "support.type", "keyword2" }, // Built-in types (string, list)
{ "support.class", "keyword2" }, // Built-in classes
{ "storage.modifier", "keyword" }, // public, private, static, const etc.
{ "constant.numeric", "number" }, // Numbers
{ "constant.language", "literal" }, // true, false, null etc.
{ "comment.unused", "normal" }, // unused comments pattern
{ "declaration.package", "literal" },
{ "declaration.import", "literal" },
// -- General Categories --
{ "declaration", "literal" },
{ "identifier", "literal" },
{ "function", "function" },
{ "type", "keyword2" },
{ "scope", "operator" },
{ "invalid", "error" },
{ "external", "keyword" },
{ "tag", "operator" },
{ "comment", "comment" }, // Comments
{ "string", "string" }, // Strings
{ "keyword", "keyword" }, // Any other keyword
@@ -111,6 +122,16 @@ class TextMateScopeMapper {
}
}
if ( scopeName.starts_with( "meta." ) )
return scopeToType( scopeName.substr( 5 ) );
for ( const auto& mapping : scope_map_ ) {
const std::string_view tmPrefix = mapping.first;
if ( String::contains( scopeName, tmPrefix ) ) {
return std::string( mapping.second );
}
}
// If no prefix matched, return the default type
return "normal";
}
@@ -160,46 +181,60 @@ const std::vector<SyntaxDefinition>& SyntaxDefinitionManager::getDefinitions() c
return mDefinitions;
}
static std::optional<nlohmann::json> serializePattern( const SyntaxPattern& ptrn,
const SyntaxDefinition& def ) {
json pattern;
auto ptrnType =
ptrn.matchType == SyntaxPatternMatchType::RegEx
? "regex"
: ( ptrn.matchType == SyntaxPatternMatchType::Parser ? "parser" : "pattern" );
// Do not export injected patterns
if ( ptrn.matchType == SyntaxPatternMatchType::LuaPattern && ptrn.patterns.size() == 1 &&
( ptrn.patterns[0] == "%s+" || ptrn.patterns[0] == "%w+%f[%s]" ) )
return {};
bool hasInclude = false;
if ( ptrn.patterns.size() == 2 && ptrn.patterns[0] == "include" ) {
hasInclude = true;
pattern["include"] = ptrn.patterns[1];
} else if ( ptrn.patterns.size() == 1 ) {
pattern[ptrnType] = ptrn.patterns[0];
} else if ( ptrn.patterns.size() ) {
pattern[ptrnType] = ptrn.patterns;
}
if ( !hasInclude ) {
if ( ptrn.typesNames.size() == 1 ) {
pattern["type"] = ptrn.typesNames[0];
} else if ( ptrn.typesNames.size() ) {
pattern["type"] = ptrn.typesNames;
}
if ( ptrn.endTypesNames.size() == 1 ) {
pattern["end_type"] = ptrn.endTypesNames[0];
} else if ( ptrn.endTypesNames.size() ) {
pattern["end_type"] = ptrn.endTypesNames;
}
if ( !ptrn.syntax.empty() )
pattern["syntax"] = ptrn.syntax == def.getLanguageName() ? "$self" : ptrn.syntax;
}
if ( ptrn.hasContentScope() ) {
const auto& contentPatterns = def.getRepository( ptrn.contentScopeRepoHash );
nlohmann::json ptrns = nlohmann::json::array();
for ( const auto& pattern : contentPatterns ) {
auto ojptrn = serializePattern( pattern, def );
if ( ojptrn )
ptrns.emplace_back( std::move( *ojptrn ) );
}
if ( !ptrns.empty() )
pattern["patterns"] = std::move( ptrns );
}
return pattern;
}
static json toJson( const SyntaxDefinition& def ) {
const auto serializePattern =
[&def]( const SyntaxPattern& ptrn ) -> std::optional<nlohmann::json> {
json pattern;
auto ptrnType =
ptrn.matchType == SyntaxPatternMatchType::RegEx
? "regex"
: ( ptrn.matchType == SyntaxPatternMatchType::Parser ? "parser" : "pattern" );
// Do not export injected patterns
if ( ptrn.matchType == SyntaxPatternMatchType::LuaPattern && ptrn.patterns.size() == 1 &&
( ptrn.patterns[0] == "%s+" || ptrn.patterns[0] == "%w+%f[%s]" ) )
return {};
bool hasInclude = false;
if ( ptrn.patterns.size() == 2 && ptrn.patterns[0] == "include" ) {
hasInclude = true;
pattern["include"] = ptrn.patterns[1];
} else if ( ptrn.patterns.size() == 1 ) {
pattern[ptrnType] = ptrn.patterns[0];
} else if ( ptrn.patterns.size() ) {
pattern[ptrnType] = ptrn.patterns;
}
if ( !hasInclude ) {
if ( ptrn.typesNames.size() == 1 ) {
pattern["type"] = ptrn.typesNames[0];
} else if ( ptrn.typesNames.size() ) {
pattern["type"] = ptrn.typesNames;
}
if ( ptrn.endTypesNames.size() == 1 ) {
pattern["end_type"] = ptrn.endTypesNames[0];
} else if ( ptrn.endTypesNames.size() ) {
pattern["end_type"] = ptrn.endTypesNames;
}
if ( !ptrn.syntax.empty() )
pattern["syntax"] = ptrn.syntax == def.getLanguageName() ? "$self" : ptrn.syntax;
}
return pattern;
};
json j;
j["name"] = def.getLanguageName();
@@ -211,7 +246,7 @@ static json toJson( const SyntaxDefinition& def ) {
if ( !def.getPatterns().empty() ) {
j["patterns"] = json::array();
for ( const auto& ptrn : def.getPatterns() ) {
auto pattern = serializePattern( ptrn );
auto pattern = serializePattern( ptrn, def );
if ( pattern )
j["patterns"].emplace_back( std::move( *pattern ) );
}
@@ -251,9 +286,12 @@ static json toJson( const SyntaxDefinition& def ) {
for ( const auto& [hash, patterns] : def.getRepositories() ) {
std::string name = def.getRepositoryName( hash );
if ( name.starts_with( "$CONTENT_" ) )
continue;
nlohmann::json repo;
for ( const auto& pattern : patterns ) {
auto ojptrn = serializePattern( pattern );
auto ojptrn = serializePattern( pattern, def );
if ( ojptrn )
repo.emplace_back( std::move( *ojptrn ) );
}
@@ -432,6 +470,7 @@ namespace EE { namespace UI { namespace Doc { namespace Language {
SyntaxDefinition& SyntaxDefinitionManager::add( SyntaxDefinition&& syntaxStyle ) {
syntaxStyle.mLanguageIndex = mDefinitions.size();
syntaxStyle.compile();
mDefinitions.emplace_back( std::move( syntaxStyle ) );
return mDefinitions.back();
}
@@ -551,7 +590,8 @@ static SyntaxPattern parsePattern( const nlohmann::json& pattern ) {
auto ctype = SyntaxPatternMatchType::LuaPattern;
std::string syntax;
const auto fillTypes = []( const nlohmann::json& captures, std::vector<std::string>& type ) {
const auto fillTypes = []( const nlohmann::json& captures, std::vector<std::string>& type,
const nlohmann::json& parent ) {
Uint64 totalCaptures = 0;
for ( const auto& [capNumStr, _] : captures.items() ) {
Uint64 num;
@@ -564,6 +604,8 @@ static SyntaxPattern parsePattern( const nlohmann::json& pattern ) {
if ( captures.contains( capNumStr ) && captures[capNumStr].contains( "name" ) ) {
type.emplace_back(
TextMateScopeMapper::scopeToType( captures[capNumStr].value( "name", "" ) ) );
} else if ( parent.contains( "name" ) ) {
type.emplace_back( TextMateScopeMapper::scopeToType( parent.value( "name", "" ) ) );
} else {
type.emplace_back( "normal" );
}
@@ -571,17 +613,18 @@ static SyntaxPattern parsePattern( const nlohmann::json& pattern ) {
};
// Assume TextMate pattern
if ( pattern.contains( "name" ) || pattern.contains( "begin" ) ) {
if ( pattern.contains( "name" ) || pattern.contains( "begin" ) ||
pattern.contains( "match" ) ) {
ctype = SyntaxPatternMatchType::RegEx;
if ( pattern.contains( "beginCaptures" ) )
fillTypes( pattern["beginCaptures"], type );
fillTypes( pattern["beginCaptures"], type, pattern );
if ( pattern.contains( "endCaptures" ) )
fillTypes( pattern["endCaptures"], endType );
fillTypes( pattern["endCaptures"], endType, pattern );
if ( type.empty() && pattern.contains( "captures" ) )
fillTypes( pattern["captures"], type );
fillTypes( pattern["captures"], type, pattern );
if ( type.empty() && pattern.contains( "name" ) ) {
type.emplace_back( TextMateScopeMapper::scopeToType( pattern.value( "name", "" ) ) );
@@ -609,7 +652,9 @@ static SyntaxPattern parsePattern( const nlohmann::json& pattern ) {
patterns[0].value( "include", "" ) == "$self" ) {
syntax = "$self";
} else if ( patterns[0].contains( "name" ) && patterns[0].contains( "match" ) &&
patterns[0].value( "name", "" ).starts_with( "constant.character.escape" ) ) {
patterns[0]
.value( "name", "" )
.starts_with( "constant.character.escape" ) ) {
ptrns.emplace_back( patterns[0].value( "match", "" ) );
} else {
subPatterns.push_back( parsePattern( patterns[0] ) );
@@ -621,6 +666,10 @@ static SyntaxPattern parsePattern( const nlohmann::json& pattern ) {
}
}
if ( type.empty() && ( pattern.contains( "name" ) || pattern.contains( "begin" ) ) ) {
type.emplace_back( "normal" );
}
} else {
if ( pattern.contains( "syntax" ) && pattern["syntax"].is_string() )
syntax = pattern.value( "syntax", "" );
@@ -699,6 +748,7 @@ static SyntaxDefinition loadTextMateLanguage( const nlohmann::json& json, Syntax
const auto& scopeName = json.value( "scopeName", "" );
def.addFileType( "%." + FileSystem::fileExtension( scopeName ) + "$" );
}
def.compile();
return def;
}
@@ -814,6 +864,8 @@ static SyntaxDefinition loadLanguage( const nlohmann::json& json ) {
def.setFoldBraces( folds );
}
def.compile();
} catch ( const json::exception& e ) {
Log::error( "SyntaxDefinition loadLanguage failed:\n%s", e.what() );
}

View File

@@ -28,7 +28,7 @@ struct PatternStackItem {
// large line. This will help the editor to cull the rendering only for the visible tokens
#define MAX_TOKEN_SIZE ( 512 )
#define MAX_MATCHES ( 12 )
#define MAX_MATCHES ( 64 )
#define MAX_PATTERN_STACK_SIZE ( 16 )
@@ -284,17 +284,19 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
std::optional<NonEscapedMatch> shouldCloseSubSyntax;
const auto matchPattern = [&]( const SyntaxPattern& pattern, size_t& startIdx,
SyntaxStateType patternIndex ) -> bool {
SyntaxStateType patternIndex,
NonEscapedMatch* endRange = nullptr ) -> bool {
int start = 0, end = 0;
patternStr =
pattern.matchType != SyntaxPatternMatchType::Parser
pattern.matchType == SyntaxPatternMatchType::LuaPattern
? pattern.patterns[0][0] == '^' ? pattern.patterns[0] : "^" + pattern.patterns[0]
: pattern.patterns[0];
std::variant<RegEx, LuaPattern, ParserMatcher> wordsVar =
pattern.matchType == SyntaxPatternMatchType::LuaPattern
? std::variant<RegEx, LuaPattern, ParserMatcher>( LuaPattern( patternStr ) )
: ( pattern.matchType == SyntaxPatternMatchType::RegEx
? std::variant<RegEx, LuaPattern, ParserMatcher>( RegEx( patternStr ) )
? std::variant<RegEx, LuaPattern, ParserMatcher>(
RegEx( patternStr, RegEx::Options::Utf | RegEx::Options::Anchored ) )
: std::variant<RegEx, LuaPattern, ParserMatcher>(
ParserMatcher( patternStr ) ) );
PatternMatcher& words = std::visit(
@@ -317,9 +319,16 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
shouldCloseSubSyntax = {};
}
if ( numMatches > MAX_MATCHES )
numMatches = MAX_MATCHES;
if ( numMatches > 1 ) {
int fullMatchStart = matches[0].start;
int fullMatchEnd = matches[0].end;
if ( endRange && fullMatchEnd >= endRange->range.first )
fullMatchEnd = endRange->range.first;
if ( pattern.matchType == SyntaxPatternMatchType::RegEx ) {
priorityMap.clear();
priorityMap.resize( fullMatchEnd - fullMatchStart, 0 );
@@ -328,6 +337,10 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
int capStart = matches[captureIndex].start;
int capEnd = matches[captureIndex].end;
if ( endRange && capEnd >= endRange->range.first ) {
capEnd = endRange->range.first;
}
if ( capStart < fullMatchStart || capEnd > fullMatchEnd ||
capStart >= capEnd )
continue;
@@ -473,6 +486,13 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
String::utf8Next( strEnd );
end = start + ( strEnd - strStart );
}
if ( endRange && end >= endRange->range.first ) {
end = endRange->range.first;
if ( start == end )
return false;
}
patternText = textv.substr( start, end - start );
SyntaxStyleType type =
( pattern.types[0] == SyntaxStyleTypes::Symbol ||
@@ -499,6 +519,7 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
size_t size = text.size();
size_t startIdx = startIndex;
const SyntaxPattern* activePattern = nullptr;
static constexpr auto PATTERN_STACK_BUFFER =
MAX_PATTERN_STACK_SIZE * sizeof( PatternStackItem );
@@ -510,54 +531,129 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
while ( startIdx < size ) {
bool matched = false;
patternStack.clear();
activePattern = nullptr;
if ( curState.currentPatternIdx.state != SYNTAX_TOKENIZER_STATE_NONE ) {
const SyntaxPattern& pattern =
*curState.currentSyntax->getPatternFromState( curState.currentPatternIdx );
auto range = findNonEscaped( text, pattern.patterns[1], startIdx,
pattern.patterns.size() >= 3 ? pattern.patterns[2] : "",
pattern.matchType )
.range;
activePattern =
curState.currentSyntax->getPatternFromState( curState.currentPatternIdx );
eeASSERT( activePattern );
}
bool skip = false;
if ( activePattern ) {
auto endRange = findNonEscaped(
text, activePattern->patterns[1], startIdx,
activePattern->patterns.size() >= 3 ? activePattern->patterns[2] : "",
activePattern->matchType );
if ( curState.subsyntaxInfo != nullptr && curState.subsyntaxInfo->patterns.size() > 1 &&
curState.currentSyntax->getLanguageIndex() != syntax.getLanguageIndex() ) {
auto rangeSubsyntax =
findNonEscaped( text, curState.subsyntaxInfo->patterns[1], startIdx,
curState.subsyntaxInfo->patterns.size() >= 3
? curState.subsyntaxInfo->patterns[2]
: "",
pattern.matchType );
if ( rangeSubsyntax.range.first != -1 &&
( range.first == -1 || rangeSubsyntax.range.first < range.first ) ) {
if ( !skipSubSyntaxSeparator ) {
pushTokensToOpenCloseSubsyntax( startIdx, textv, curState.subsyntaxInfo,
rangeSubsyntax, tokens, true );
}
popSubsyntax( curState, retState, syntax );
startIdx = rangeSubsyntax.range.second;
skip = true;
}
}
if ( !skip ) {
if ( range.first != -1 ) {
if ( range.second > range.first && pattern.types.size() >= 3 ) {
pushToken( tokens, pattern.types[0],
textv.substr( startIdx, range.first - startIdx ) );
pushToken( tokens, pattern.types[pattern.types.size() - 1],
textv.substr( range.first, range.second - range.first ) );
} else {
pushToken( tokens, pattern.types[0],
textv.substr( startIdx, range.second - startIdx ) );
}
if ( activePattern->hasContentScope() ) {
if ( endRange.range.first == static_cast<Int64>( startIdx ) ) {
pushTokensToOpenCloseSubsyntax( startIdx, textv, activePattern, endRange,
tokens, true );
setSubsyntaxPatternIdx( curState, retState, SyntaxStateType{} );
startIdx = range.second;
} else {
pushToken( tokens, pattern.types[0], textv.substr( startIdx ) );
break;
startIdx = endRange.range.second;
continue;
}
const auto& contentScopePatterns =
curState.currentSyntax->getRepository( activePattern->contentScopeRepoHash );
auto contentScopeRepoGlobalIndex = curState.currentSyntax->getRepositoryIndex(
activePattern->contentScopeRepoHash );
patternStack.push_back( { &contentScopePatterns, 0,
static_cast<Uint8>( contentScopeRepoGlobalIndex ) } );
while ( !patternStack.empty() && !matched ) {
PatternStackItem& current = patternStack.back();
if ( current.index >= current.patterns->size() ) {
patternStack.pop_back();
continue;
}
const SyntaxPattern* innerPtrn = &current.patterns->data()[current.index];
SyntaxStateType patternState = { static_cast<Uint8>( current.index + 1 ),
current.repositoryIdx };
current.index++;
if ( innerPtrn->isRepositoryInclude() ) {
if ( patternStack.size() + 1 >= MAX_PATTERN_STACK_SIZE )
break;
const auto& targetRepo =
curState.currentSyntax->getRepository( innerPtrn->getRepositoryName() );
patternStack.push_back(
{ &targetRepo, 0, static_cast<Uint8>( innerPtrn->repositoryIdx ) } );
continue;
} else if ( innerPtrn->isRootSelfInclude() ) {
if ( patternStack.size() + 1 >= MAX_PATTERN_STACK_SIZE )
break;
patternStack.push_back( { &curState.currentSyntax->getPatterns(), 0, 0 } );
continue;
}
if ( startIdx != 0 &&
innerPtrn->matchType == SyntaxPatternMatchType::LuaPattern &&
innerPtrn->patterns[0][0] == '^' )
continue;
if ( matchPattern( *innerPtrn, startIdx, patternState,
endRange.numMatches ? &endRange : nullptr ) ) {
matched = true;
}
}
if ( matched )
continue;
if ( !matched && startIdx < text.size() ) {
char* strStart = const_cast<char*>( text.c_str() + startIdx );
char* strEnd = strStart;
String::utf8Next( strEnd );
int dist = strEnd - strStart;
if ( dist > 0 ) {
pushToken( tokens, activePattern->types[0], text.substr( startIdx, dist ) );
startIdx += dist;
} else {
Log::error( "Error parsing \"%s\" using syntax: %s", text.c_str(),
syntax.getLSPName().c_str() );
break;
}
continue;
}
} else {
bool skip = false;
if ( curState.subsyntaxInfo != nullptr &&
curState.subsyntaxInfo->patterns.size() > 1 &&
curState.currentSyntax->getLanguageIndex() != syntax.getLanguageIndex() ) {
auto rangeSubsyntax =
findNonEscaped( text, curState.subsyntaxInfo->patterns[1], startIdx,
curState.subsyntaxInfo->patterns.size() >= 3
? curState.subsyntaxInfo->patterns[2]
: "",
activePattern->matchType );
if ( rangeSubsyntax.range.first != -1 &&
( endRange.range.first == -1 ||
rangeSubsyntax.range.first < endRange.range.first ) ) {
if ( !skipSubSyntaxSeparator ) {
pushTokensToOpenCloseSubsyntax( startIdx, textv, curState.subsyntaxInfo,
rangeSubsyntax, tokens, true );
}
popSubsyntax( curState, retState, syntax );
startIdx = rangeSubsyntax.range.second;
skip = true;
}
}
if ( !skip ) {
if ( endRange.range.first != -1 ) {
pushTokensToOpenCloseSubsyntax( startIdx, textv, activePattern, endRange,
tokens, true );
setSubsyntaxPatternIdx( curState, retState, SyntaxStateType{} );
startIdx = endRange.range.second;
} else {
pushToken( tokens, activePattern->types[0], textv.substr( startIdx ) );
break;
}
}
}
}
@@ -599,7 +695,8 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
patternStack.push_back( { &curState.currentSyntax->getPatterns(), 0, 0 } );
} else {
if ( startIdx != 0 && pattern->patterns[0][0] == '^' )
if ( startIdx != 0 && pattern->matchType == SyntaxPatternMatchType::LuaPattern &&
pattern->patterns[0][0] == '^' )
continue;
SyntaxStateType patternIndex = { static_cast<Uint8>( current.index ),