mirror of
https://github.com/SpartanJ/eepp.git
synced 2026-05-29 17:46:29 +03:00
More work to support TextMate grammars.
This commit is contained in:
@@ -58,6 +58,9 @@ class EE_API RegEx : public PatternMatcher {
|
||||
ExtendedMore = 0x01000000u, // C
|
||||
Literal = 0x02000000u, // C
|
||||
MatchInvalidUtf = 0x04000000u, // J M D
|
||||
Anchored = 0x80000000u,
|
||||
NoUtfCheck = 0x40000000u,
|
||||
Endanchored = 0x20000000u,
|
||||
FilterOutCaptures =
|
||||
0x08000000u, // It will filter out repeated captures and same range captures
|
||||
};
|
||||
@@ -83,9 +86,10 @@ class EE_API RegEx : public PatternMatcher {
|
||||
mutable size_t mMatchNum;
|
||||
void* mCompiledPattern;
|
||||
int mCaptureCount;
|
||||
bool mValid{ false };
|
||||
bool mCached{ false };
|
||||
bool mFilterOutCaptures{ false };
|
||||
bool mValid : 1 { false };
|
||||
bool mCached : 1 { false };
|
||||
bool mFilterOutCaptures : 1 { false };
|
||||
bool mAnchored : 1 { false };
|
||||
};
|
||||
|
||||
}} // namespace EE::System
|
||||
|
||||
@@ -59,7 +59,9 @@ struct EE_API SyntaxPattern {
|
||||
const SyntaxDefinition* def{ nullptr };
|
||||
Uint16 flags{ 0 };
|
||||
Uint16 repositoryIdx{ 0 };
|
||||
std::vector<SyntaxPattern> subPatterns;
|
||||
std::vector<SyntaxPattern> contentPatterns;
|
||||
String::HashType contentScopeRepoHash{
|
||||
0 }; // Hash of the repository containing this rule's content patterns
|
||||
|
||||
SyntaxPattern( std::vector<std::string>&& _patterns, const std::string& _type,
|
||||
const std::string& _syntax = "",
|
||||
@@ -122,6 +124,8 @@ struct EE_API SyntaxPattern {
|
||||
inline bool checkIsRepositoryInclude() const {
|
||||
return checkIsIncludePattern() && patterns[1][0] == '#';
|
||||
}
|
||||
|
||||
inline bool hasContentScope() const { return contentScopeRepoHash != 0; }
|
||||
};
|
||||
|
||||
class EE_API SyntaxDefinition {
|
||||
@@ -245,6 +249,8 @@ class EE_API SyntaxDefinition {
|
||||
|
||||
const SyntaxPattern* getPatternFromState( const SyntaxStateType& state ) const;
|
||||
|
||||
void compile();
|
||||
|
||||
protected:
|
||||
friend class SyntaxDefinitionManager;
|
||||
|
||||
|
||||
@@ -101,7 +101,7 @@ void addCPP() {
|
||||
{ "#endif", "keyword" }, { "#include", "keyword" },
|
||||
{ "#define", "keyword" }, { "#undef", "keyword" },
|
||||
{ "#line", "keyword" }, { "#error", "keyword" },
|
||||
{ "#pragma", "keyword" },
|
||||
{ "#pragma", "keyword" }, { "signed", "keyword2" },
|
||||
|
||||
},
|
||||
"//",
|
||||
|
||||
@@ -11,6 +11,27 @@ namespace EE { namespace UI { namespace Doc {
|
||||
|
||||
SyntaxDefMap<SyntaxStyleType, std::string> SyntaxPattern::SyntaxStyleTypeCache = {};
|
||||
|
||||
static void liftContentPatternsRecursive( SyntaxDefinition& def, SyntaxPattern& pattern,
|
||||
std::string_view namePrefixSeed,
|
||||
Uint64& uniqueIdCounter ) {
|
||||
for ( Uint64 i = 0; i < pattern.contentPatterns.size(); ++i ) {
|
||||
// Pass a new prefix seed for children to ensure unique names
|
||||
std::string childPrefixSeed = namePrefixSeed + "_cp" + String::toString( i );
|
||||
liftContentPatternsRecursive( def, pattern.contentPatterns[i], childPrefixSeed,
|
||||
uniqueIdCounter );
|
||||
}
|
||||
|
||||
if ( !pattern.contentPatterns.empty() ) {
|
||||
// Generate a unique repository name for this pattern's content scope
|
||||
std::string contentRepoName = "$CONTENT_" + def.getLanguageNameForFileSystem() + "_" +
|
||||
namePrefixSeed + "_uid" +
|
||||
String::toString( uniqueIdCounter++ );
|
||||
|
||||
def.addRepository( contentRepoName, std::move( pattern.contentPatterns ) );
|
||||
pattern.contentScopeRepoHash = String::hash( contentRepoName );
|
||||
}
|
||||
}
|
||||
|
||||
template <typename SyntaxStyleType> void updateCache( const SyntaxPattern& ptrn ) {
|
||||
if constexpr ( std::is_same_v<SyntaxStyleType, std::string> ) {
|
||||
return;
|
||||
@@ -66,7 +87,7 @@ static void updatePatternsState( SyntaxDefinition& def, std::vector<SyntaxPatter
|
||||
for ( auto& ptrn : ptrns ) {
|
||||
updatePatternState( def, ptrn );
|
||||
|
||||
for ( auto& subPattern : ptrn.subPatterns )
|
||||
for ( auto& subPattern : ptrn.contentPatterns )
|
||||
updatePatternState( def, subPattern );
|
||||
}
|
||||
}
|
||||
@@ -357,7 +378,8 @@ SyntaxPattern::SyntaxPattern( std::vector<std::string>&& _patterns,
|
||||
endTypesNames( std::move( _endTypes ) ),
|
||||
syntax( _syntax ),
|
||||
matchType( matchType ),
|
||||
subPatterns( std::move( _subPatterns ) ) {
|
||||
contentPatterns( std::move( _subPatterns ) ) {
|
||||
eeASSERT( patterns.size() < std::numeric_limits<Uint8>::max() - 1 );
|
||||
updateCache<SyntaxStyleType>( *this );
|
||||
}
|
||||
|
||||
@@ -372,6 +394,7 @@ SyntaxPattern::SyntaxPattern( std::vector<std::string>&& _patterns,
|
||||
endTypesNames( std::move( _endTypes ) ),
|
||||
syntax( _syntax ),
|
||||
matchType( matchType ) {
|
||||
eeASSERT( patterns.size() < std::numeric_limits<Uint8>::max() - 1 );
|
||||
updateCache<SyntaxStyleType>( *this );
|
||||
}
|
||||
|
||||
@@ -382,6 +405,7 @@ SyntaxPattern::SyntaxPattern( std::vector<std::string>&& _patterns, const std::s
|
||||
typesNames( { _type } ),
|
||||
dynSyntax( std::move( _syntax ) ),
|
||||
matchType( matchType ) {
|
||||
eeASSERT( patterns.size() < std::numeric_limits<Uint8>::max() - 1 );
|
||||
updateCache<SyntaxStyleType>( *this );
|
||||
}
|
||||
|
||||
@@ -393,6 +417,7 @@ SyntaxPattern::SyntaxPattern( std::vector<std::string>&& _patterns,
|
||||
typesNames( std::move( _types ) ),
|
||||
dynSyntax( std::move( _syntax ) ),
|
||||
matchType( matchType ) {
|
||||
eeASSERT( patterns.size() < std::numeric_limits<Uint8>::max() - 1 );
|
||||
updateCache<SyntaxStyleType>( *this );
|
||||
}
|
||||
|
||||
@@ -407,11 +432,13 @@ SyntaxPattern::SyntaxPattern( std::vector<std::string>&& _patterns,
|
||||
endTypesNames( std::move( _endTypes ) ),
|
||||
dynSyntax( std::move( _syntax ) ),
|
||||
matchType( matchType ) {
|
||||
eeASSERT( patterns.size() < std::numeric_limits<Uint8>::max() - 1 );
|
||||
updateCache<SyntaxStyleType>( *this );
|
||||
}
|
||||
|
||||
SyntaxDefinition& SyntaxDefinition::addRepository( const std::string& name,
|
||||
std::vector<SyntaxPattern>&& patterns ) {
|
||||
eeASSERT( patterns.size() < std::numeric_limits<Uint8>::max() - 1 );
|
||||
auto hash = String::hash( name );
|
||||
mRepositoryIndex[hash] = ++mRepositoryIndexCounter;
|
||||
mRepositoryNames[hash] = name;
|
||||
@@ -486,4 +513,17 @@ std::string SyntaxDefinition::getRepositoryName( String::HashType hash ) const {
|
||||
return it != mRepositoryNames.end() ? it->second : "";
|
||||
}
|
||||
|
||||
void SyntaxDefinition::compile() {
|
||||
Uint64 uniqueIdCounter = 0;
|
||||
for ( SyntaxPattern& p : mPatterns )
|
||||
liftContentPatternsRecursive( *this, p, "root", uniqueIdCounter );
|
||||
|
||||
for ( auto& repoPair : mRepository ) {
|
||||
for ( SyntaxPattern& p : repoPair.second ) {
|
||||
liftContentPatternsRecursive( *this, p, mRepositoryNames[repoPair.first],
|
||||
uniqueIdCounter );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}}} // namespace EE::UI::Doc
|
||||
|
||||
@@ -48,16 +48,27 @@ class TextMateScopeMapper {
|
||||
{ "entity.name.tag", "keyword2" }, // HTML/XML tag name
|
||||
{ "keyword.control", "keyword" }, // if, else, for, while, return etc.
|
||||
{ "keyword.operator", "operator" }, // +, -, =, and, or, etc.
|
||||
{ "punctuation.definition.tag", "operator" }, // <, >, </ in HTML/XML
|
||||
{ "support.function", "function" }, // Built-in functions (print, len)
|
||||
{ "support.type", "keyword2" }, // Built-in types (string, list)
|
||||
{ "support.class", "keyword2" }, // Built-in classes
|
||||
{ "storage.modifier", "keyword" }, // public, private, static, const etc.
|
||||
{ "constant.numeric", "number" }, // Numbers
|
||||
{ "constant.language", "literal" }, // true, false, null etc.
|
||||
{ "comment.unused", "normal" }, // unused comments pattern
|
||||
{ "punctuation.definition.string", "string" }, // <, >, </ in HTML/XML
|
||||
{ "punctuation.definition.tag", "operator" }, // <, >, </ in HTML/XML
|
||||
{ "support.function", "function" }, // Built-in functions (print, len)
|
||||
{ "support.type", "keyword2" }, // Built-in types (string, list)
|
||||
{ "support.class", "keyword2" }, // Built-in classes
|
||||
{ "storage.modifier", "keyword" }, // public, private, static, const etc.
|
||||
{ "constant.numeric", "number" }, // Numbers
|
||||
{ "constant.language", "literal" }, // true, false, null etc.
|
||||
{ "comment.unused", "normal" }, // unused comments pattern
|
||||
{ "declaration.package", "literal" },
|
||||
{ "declaration.import", "literal" },
|
||||
|
||||
// -- General Categories --
|
||||
{ "declaration", "literal" },
|
||||
{ "identifier", "literal" },
|
||||
{ "function", "function" },
|
||||
{ "type", "keyword2" },
|
||||
{ "scope", "operator" },
|
||||
{ "invalid", "error" },
|
||||
{ "external", "keyword" },
|
||||
{ "tag", "operator" },
|
||||
{ "comment", "comment" }, // Comments
|
||||
{ "string", "string" }, // Strings
|
||||
{ "keyword", "keyword" }, // Any other keyword
|
||||
@@ -111,6 +122,16 @@ class TextMateScopeMapper {
|
||||
}
|
||||
}
|
||||
|
||||
if ( scopeName.starts_with( "meta." ) )
|
||||
return scopeToType( scopeName.substr( 5 ) );
|
||||
|
||||
for ( const auto& mapping : scope_map_ ) {
|
||||
const std::string_view tmPrefix = mapping.first;
|
||||
if ( String::contains( scopeName, tmPrefix ) ) {
|
||||
return std::string( mapping.second );
|
||||
}
|
||||
}
|
||||
|
||||
// If no prefix matched, return the default type
|
||||
return "normal";
|
||||
}
|
||||
@@ -160,46 +181,60 @@ const std::vector<SyntaxDefinition>& SyntaxDefinitionManager::getDefinitions() c
|
||||
return mDefinitions;
|
||||
}
|
||||
|
||||
static std::optional<nlohmann::json> serializePattern( const SyntaxPattern& ptrn,
|
||||
const SyntaxDefinition& def ) {
|
||||
json pattern;
|
||||
auto ptrnType =
|
||||
ptrn.matchType == SyntaxPatternMatchType::RegEx
|
||||
? "regex"
|
||||
: ( ptrn.matchType == SyntaxPatternMatchType::Parser ? "parser" : "pattern" );
|
||||
|
||||
// Do not export injected patterns
|
||||
if ( ptrn.matchType == SyntaxPatternMatchType::LuaPattern && ptrn.patterns.size() == 1 &&
|
||||
( ptrn.patterns[0] == "%s+" || ptrn.patterns[0] == "%w+%f[%s]" ) )
|
||||
return {};
|
||||
|
||||
bool hasInclude = false;
|
||||
if ( ptrn.patterns.size() == 2 && ptrn.patterns[0] == "include" ) {
|
||||
hasInclude = true;
|
||||
pattern["include"] = ptrn.patterns[1];
|
||||
} else if ( ptrn.patterns.size() == 1 ) {
|
||||
pattern[ptrnType] = ptrn.patterns[0];
|
||||
} else if ( ptrn.patterns.size() ) {
|
||||
pattern[ptrnType] = ptrn.patterns;
|
||||
}
|
||||
|
||||
if ( !hasInclude ) {
|
||||
if ( ptrn.typesNames.size() == 1 ) {
|
||||
pattern["type"] = ptrn.typesNames[0];
|
||||
} else if ( ptrn.typesNames.size() ) {
|
||||
pattern["type"] = ptrn.typesNames;
|
||||
}
|
||||
if ( ptrn.endTypesNames.size() == 1 ) {
|
||||
pattern["end_type"] = ptrn.endTypesNames[0];
|
||||
} else if ( ptrn.endTypesNames.size() ) {
|
||||
pattern["end_type"] = ptrn.endTypesNames;
|
||||
}
|
||||
if ( !ptrn.syntax.empty() )
|
||||
pattern["syntax"] = ptrn.syntax == def.getLanguageName() ? "$self" : ptrn.syntax;
|
||||
}
|
||||
|
||||
if ( ptrn.hasContentScope() ) {
|
||||
const auto& contentPatterns = def.getRepository( ptrn.contentScopeRepoHash );
|
||||
nlohmann::json ptrns = nlohmann::json::array();
|
||||
for ( const auto& pattern : contentPatterns ) {
|
||||
auto ojptrn = serializePattern( pattern, def );
|
||||
if ( ojptrn )
|
||||
ptrns.emplace_back( std::move( *ojptrn ) );
|
||||
}
|
||||
if ( !ptrns.empty() )
|
||||
pattern["patterns"] = std::move( ptrns );
|
||||
}
|
||||
|
||||
return pattern;
|
||||
}
|
||||
|
||||
static json toJson( const SyntaxDefinition& def ) {
|
||||
const auto serializePattern =
|
||||
[&def]( const SyntaxPattern& ptrn ) -> std::optional<nlohmann::json> {
|
||||
json pattern;
|
||||
auto ptrnType =
|
||||
ptrn.matchType == SyntaxPatternMatchType::RegEx
|
||||
? "regex"
|
||||
: ( ptrn.matchType == SyntaxPatternMatchType::Parser ? "parser" : "pattern" );
|
||||
|
||||
// Do not export injected patterns
|
||||
if ( ptrn.matchType == SyntaxPatternMatchType::LuaPattern && ptrn.patterns.size() == 1 &&
|
||||
( ptrn.patterns[0] == "%s+" || ptrn.patterns[0] == "%w+%f[%s]" ) )
|
||||
return {};
|
||||
|
||||
bool hasInclude = false;
|
||||
if ( ptrn.patterns.size() == 2 && ptrn.patterns[0] == "include" ) {
|
||||
hasInclude = true;
|
||||
pattern["include"] = ptrn.patterns[1];
|
||||
} else if ( ptrn.patterns.size() == 1 ) {
|
||||
pattern[ptrnType] = ptrn.patterns[0];
|
||||
} else if ( ptrn.patterns.size() ) {
|
||||
pattern[ptrnType] = ptrn.patterns;
|
||||
}
|
||||
|
||||
if ( !hasInclude ) {
|
||||
if ( ptrn.typesNames.size() == 1 ) {
|
||||
pattern["type"] = ptrn.typesNames[0];
|
||||
} else if ( ptrn.typesNames.size() ) {
|
||||
pattern["type"] = ptrn.typesNames;
|
||||
}
|
||||
if ( ptrn.endTypesNames.size() == 1 ) {
|
||||
pattern["end_type"] = ptrn.endTypesNames[0];
|
||||
} else if ( ptrn.endTypesNames.size() ) {
|
||||
pattern["end_type"] = ptrn.endTypesNames;
|
||||
}
|
||||
if ( !ptrn.syntax.empty() )
|
||||
pattern["syntax"] = ptrn.syntax == def.getLanguageName() ? "$self" : ptrn.syntax;
|
||||
}
|
||||
return pattern;
|
||||
};
|
||||
|
||||
json j;
|
||||
j["name"] = def.getLanguageName();
|
||||
@@ -211,7 +246,7 @@ static json toJson( const SyntaxDefinition& def ) {
|
||||
if ( !def.getPatterns().empty() ) {
|
||||
j["patterns"] = json::array();
|
||||
for ( const auto& ptrn : def.getPatterns() ) {
|
||||
auto pattern = serializePattern( ptrn );
|
||||
auto pattern = serializePattern( ptrn, def );
|
||||
if ( pattern )
|
||||
j["patterns"].emplace_back( std::move( *pattern ) );
|
||||
}
|
||||
@@ -251,9 +286,12 @@ static json toJson( const SyntaxDefinition& def ) {
|
||||
|
||||
for ( const auto& [hash, patterns] : def.getRepositories() ) {
|
||||
std::string name = def.getRepositoryName( hash );
|
||||
if ( name.starts_with( "$CONTENT_" ) )
|
||||
continue;
|
||||
|
||||
nlohmann::json repo;
|
||||
for ( const auto& pattern : patterns ) {
|
||||
auto ojptrn = serializePattern( pattern );
|
||||
auto ojptrn = serializePattern( pattern, def );
|
||||
if ( ojptrn )
|
||||
repo.emplace_back( std::move( *ojptrn ) );
|
||||
}
|
||||
@@ -432,6 +470,7 @@ namespace EE { namespace UI { namespace Doc { namespace Language {
|
||||
|
||||
SyntaxDefinition& SyntaxDefinitionManager::add( SyntaxDefinition&& syntaxStyle ) {
|
||||
syntaxStyle.mLanguageIndex = mDefinitions.size();
|
||||
syntaxStyle.compile();
|
||||
mDefinitions.emplace_back( std::move( syntaxStyle ) );
|
||||
return mDefinitions.back();
|
||||
}
|
||||
@@ -551,7 +590,8 @@ static SyntaxPattern parsePattern( const nlohmann::json& pattern ) {
|
||||
auto ctype = SyntaxPatternMatchType::LuaPattern;
|
||||
std::string syntax;
|
||||
|
||||
const auto fillTypes = []( const nlohmann::json& captures, std::vector<std::string>& type ) {
|
||||
const auto fillTypes = []( const nlohmann::json& captures, std::vector<std::string>& type,
|
||||
const nlohmann::json& parent ) {
|
||||
Uint64 totalCaptures = 0;
|
||||
for ( const auto& [capNumStr, _] : captures.items() ) {
|
||||
Uint64 num;
|
||||
@@ -564,6 +604,8 @@ static SyntaxPattern parsePattern( const nlohmann::json& pattern ) {
|
||||
if ( captures.contains( capNumStr ) && captures[capNumStr].contains( "name" ) ) {
|
||||
type.emplace_back(
|
||||
TextMateScopeMapper::scopeToType( captures[capNumStr].value( "name", "" ) ) );
|
||||
} else if ( parent.contains( "name" ) ) {
|
||||
type.emplace_back( TextMateScopeMapper::scopeToType( parent.value( "name", "" ) ) );
|
||||
} else {
|
||||
type.emplace_back( "normal" );
|
||||
}
|
||||
@@ -571,17 +613,18 @@ static SyntaxPattern parsePattern( const nlohmann::json& pattern ) {
|
||||
};
|
||||
|
||||
// Assume TextMate pattern
|
||||
if ( pattern.contains( "name" ) || pattern.contains( "begin" ) ) {
|
||||
if ( pattern.contains( "name" ) || pattern.contains( "begin" ) ||
|
||||
pattern.contains( "match" ) ) {
|
||||
ctype = SyntaxPatternMatchType::RegEx;
|
||||
|
||||
if ( pattern.contains( "beginCaptures" ) )
|
||||
fillTypes( pattern["beginCaptures"], type );
|
||||
fillTypes( pattern["beginCaptures"], type, pattern );
|
||||
|
||||
if ( pattern.contains( "endCaptures" ) )
|
||||
fillTypes( pattern["endCaptures"], endType );
|
||||
fillTypes( pattern["endCaptures"], endType, pattern );
|
||||
|
||||
if ( type.empty() && pattern.contains( "captures" ) )
|
||||
fillTypes( pattern["captures"], type );
|
||||
fillTypes( pattern["captures"], type, pattern );
|
||||
|
||||
if ( type.empty() && pattern.contains( "name" ) ) {
|
||||
type.emplace_back( TextMateScopeMapper::scopeToType( pattern.value( "name", "" ) ) );
|
||||
@@ -609,7 +652,9 @@ static SyntaxPattern parsePattern( const nlohmann::json& pattern ) {
|
||||
patterns[0].value( "include", "" ) == "$self" ) {
|
||||
syntax = "$self";
|
||||
} else if ( patterns[0].contains( "name" ) && patterns[0].contains( "match" ) &&
|
||||
patterns[0].value( "name", "" ).starts_with( "constant.character.escape" ) ) {
|
||||
patterns[0]
|
||||
.value( "name", "" )
|
||||
.starts_with( "constant.character.escape" ) ) {
|
||||
ptrns.emplace_back( patterns[0].value( "match", "" ) );
|
||||
} else {
|
||||
subPatterns.push_back( parsePattern( patterns[0] ) );
|
||||
@@ -621,6 +666,10 @@ static SyntaxPattern parsePattern( const nlohmann::json& pattern ) {
|
||||
}
|
||||
}
|
||||
|
||||
if ( type.empty() && ( pattern.contains( "name" ) || pattern.contains( "begin" ) ) ) {
|
||||
type.emplace_back( "normal" );
|
||||
}
|
||||
|
||||
} else {
|
||||
if ( pattern.contains( "syntax" ) && pattern["syntax"].is_string() )
|
||||
syntax = pattern.value( "syntax", "" );
|
||||
@@ -699,6 +748,7 @@ static SyntaxDefinition loadTextMateLanguage( const nlohmann::json& json, Syntax
|
||||
const auto& scopeName = json.value( "scopeName", "" );
|
||||
def.addFileType( "%." + FileSystem::fileExtension( scopeName ) + "$" );
|
||||
}
|
||||
def.compile();
|
||||
return def;
|
||||
}
|
||||
|
||||
@@ -814,6 +864,8 @@ static SyntaxDefinition loadLanguage( const nlohmann::json& json ) {
|
||||
|
||||
def.setFoldBraces( folds );
|
||||
}
|
||||
|
||||
def.compile();
|
||||
} catch ( const json::exception& e ) {
|
||||
Log::error( "SyntaxDefinition loadLanguage failed:\n%s", e.what() );
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ struct PatternStackItem {
|
||||
// large line. This will help the editor to cull the rendering only for the visible tokens
|
||||
#define MAX_TOKEN_SIZE ( 512 )
|
||||
|
||||
#define MAX_MATCHES ( 12 )
|
||||
#define MAX_MATCHES ( 64 )
|
||||
|
||||
#define MAX_PATTERN_STACK_SIZE ( 16 )
|
||||
|
||||
@@ -284,17 +284,19 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
|
||||
std::optional<NonEscapedMatch> shouldCloseSubSyntax;
|
||||
|
||||
const auto matchPattern = [&]( const SyntaxPattern& pattern, size_t& startIdx,
|
||||
SyntaxStateType patternIndex ) -> bool {
|
||||
SyntaxStateType patternIndex,
|
||||
NonEscapedMatch* endRange = nullptr ) -> bool {
|
||||
int start = 0, end = 0;
|
||||
patternStr =
|
||||
pattern.matchType != SyntaxPatternMatchType::Parser
|
||||
pattern.matchType == SyntaxPatternMatchType::LuaPattern
|
||||
? pattern.patterns[0][0] == '^' ? pattern.patterns[0] : "^" + pattern.patterns[0]
|
||||
: pattern.patterns[0];
|
||||
std::variant<RegEx, LuaPattern, ParserMatcher> wordsVar =
|
||||
pattern.matchType == SyntaxPatternMatchType::LuaPattern
|
||||
? std::variant<RegEx, LuaPattern, ParserMatcher>( LuaPattern( patternStr ) )
|
||||
: ( pattern.matchType == SyntaxPatternMatchType::RegEx
|
||||
? std::variant<RegEx, LuaPattern, ParserMatcher>( RegEx( patternStr ) )
|
||||
? std::variant<RegEx, LuaPattern, ParserMatcher>(
|
||||
RegEx( patternStr, RegEx::Options::Utf | RegEx::Options::Anchored ) )
|
||||
: std::variant<RegEx, LuaPattern, ParserMatcher>(
|
||||
ParserMatcher( patternStr ) ) );
|
||||
PatternMatcher& words = std::visit(
|
||||
@@ -317,9 +319,16 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
|
||||
shouldCloseSubSyntax = {};
|
||||
}
|
||||
|
||||
if ( numMatches > MAX_MATCHES )
|
||||
numMatches = MAX_MATCHES;
|
||||
|
||||
if ( numMatches > 1 ) {
|
||||
int fullMatchStart = matches[0].start;
|
||||
int fullMatchEnd = matches[0].end;
|
||||
|
||||
if ( endRange && fullMatchEnd >= endRange->range.first )
|
||||
fullMatchEnd = endRange->range.first;
|
||||
|
||||
if ( pattern.matchType == SyntaxPatternMatchType::RegEx ) {
|
||||
priorityMap.clear();
|
||||
priorityMap.resize( fullMatchEnd - fullMatchStart, 0 );
|
||||
@@ -328,6 +337,10 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
|
||||
int capStart = matches[captureIndex].start;
|
||||
int capEnd = matches[captureIndex].end;
|
||||
|
||||
if ( endRange && capEnd >= endRange->range.first ) {
|
||||
capEnd = endRange->range.first;
|
||||
}
|
||||
|
||||
if ( capStart < fullMatchStart || capEnd > fullMatchEnd ||
|
||||
capStart >= capEnd )
|
||||
continue;
|
||||
@@ -473,6 +486,13 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
|
||||
String::utf8Next( strEnd );
|
||||
end = start + ( strEnd - strStart );
|
||||
}
|
||||
|
||||
if ( endRange && end >= endRange->range.first ) {
|
||||
end = endRange->range.first;
|
||||
if ( start == end )
|
||||
return false;
|
||||
}
|
||||
|
||||
patternText = textv.substr( start, end - start );
|
||||
SyntaxStyleType type =
|
||||
( pattern.types[0] == SyntaxStyleTypes::Symbol ||
|
||||
@@ -499,6 +519,7 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
|
||||
|
||||
size_t size = text.size();
|
||||
size_t startIdx = startIndex;
|
||||
const SyntaxPattern* activePattern = nullptr;
|
||||
|
||||
static constexpr auto PATTERN_STACK_BUFFER =
|
||||
MAX_PATTERN_STACK_SIZE * sizeof( PatternStackItem );
|
||||
@@ -510,54 +531,129 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
|
||||
while ( startIdx < size ) {
|
||||
bool matched = false;
|
||||
patternStack.clear();
|
||||
activePattern = nullptr;
|
||||
|
||||
if ( curState.currentPatternIdx.state != SYNTAX_TOKENIZER_STATE_NONE ) {
|
||||
const SyntaxPattern& pattern =
|
||||
*curState.currentSyntax->getPatternFromState( curState.currentPatternIdx );
|
||||
auto range = findNonEscaped( text, pattern.patterns[1], startIdx,
|
||||
pattern.patterns.size() >= 3 ? pattern.patterns[2] : "",
|
||||
pattern.matchType )
|
||||
.range;
|
||||
activePattern =
|
||||
curState.currentSyntax->getPatternFromState( curState.currentPatternIdx );
|
||||
eeASSERT( activePattern );
|
||||
}
|
||||
|
||||
bool skip = false;
|
||||
if ( activePattern ) {
|
||||
auto endRange = findNonEscaped(
|
||||
text, activePattern->patterns[1], startIdx,
|
||||
activePattern->patterns.size() >= 3 ? activePattern->patterns[2] : "",
|
||||
activePattern->matchType );
|
||||
|
||||
if ( curState.subsyntaxInfo != nullptr && curState.subsyntaxInfo->patterns.size() > 1 &&
|
||||
curState.currentSyntax->getLanguageIndex() != syntax.getLanguageIndex() ) {
|
||||
auto rangeSubsyntax =
|
||||
findNonEscaped( text, curState.subsyntaxInfo->patterns[1], startIdx,
|
||||
curState.subsyntaxInfo->patterns.size() >= 3
|
||||
? curState.subsyntaxInfo->patterns[2]
|
||||
: "",
|
||||
pattern.matchType );
|
||||
|
||||
if ( rangeSubsyntax.range.first != -1 &&
|
||||
( range.first == -1 || rangeSubsyntax.range.first < range.first ) ) {
|
||||
if ( !skipSubSyntaxSeparator ) {
|
||||
pushTokensToOpenCloseSubsyntax( startIdx, textv, curState.subsyntaxInfo,
|
||||
rangeSubsyntax, tokens, true );
|
||||
}
|
||||
popSubsyntax( curState, retState, syntax );
|
||||
startIdx = rangeSubsyntax.range.second;
|
||||
skip = true;
|
||||
}
|
||||
}
|
||||
|
||||
if ( !skip ) {
|
||||
if ( range.first != -1 ) {
|
||||
if ( range.second > range.first && pattern.types.size() >= 3 ) {
|
||||
pushToken( tokens, pattern.types[0],
|
||||
textv.substr( startIdx, range.first - startIdx ) );
|
||||
pushToken( tokens, pattern.types[pattern.types.size() - 1],
|
||||
textv.substr( range.first, range.second - range.first ) );
|
||||
} else {
|
||||
pushToken( tokens, pattern.types[0],
|
||||
textv.substr( startIdx, range.second - startIdx ) );
|
||||
}
|
||||
if ( activePattern->hasContentScope() ) {
|
||||
if ( endRange.range.first == static_cast<Int64>( startIdx ) ) {
|
||||
pushTokensToOpenCloseSubsyntax( startIdx, textv, activePattern, endRange,
|
||||
tokens, true );
|
||||
setSubsyntaxPatternIdx( curState, retState, SyntaxStateType{} );
|
||||
startIdx = range.second;
|
||||
} else {
|
||||
pushToken( tokens, pattern.types[0], textv.substr( startIdx ) );
|
||||
break;
|
||||
startIdx = endRange.range.second;
|
||||
continue;
|
||||
}
|
||||
|
||||
const auto& contentScopePatterns =
|
||||
curState.currentSyntax->getRepository( activePattern->contentScopeRepoHash );
|
||||
|
||||
auto contentScopeRepoGlobalIndex = curState.currentSyntax->getRepositoryIndex(
|
||||
activePattern->contentScopeRepoHash );
|
||||
|
||||
patternStack.push_back( { &contentScopePatterns, 0,
|
||||
static_cast<Uint8>( contentScopeRepoGlobalIndex ) } );
|
||||
|
||||
while ( !patternStack.empty() && !matched ) {
|
||||
PatternStackItem& current = patternStack.back();
|
||||
if ( current.index >= current.patterns->size() ) {
|
||||
patternStack.pop_back();
|
||||
continue;
|
||||
}
|
||||
const SyntaxPattern* innerPtrn = ¤t.patterns->data()[current.index];
|
||||
SyntaxStateType patternState = { static_cast<Uint8>( current.index + 1 ),
|
||||
current.repositoryIdx };
|
||||
current.index++;
|
||||
|
||||
if ( innerPtrn->isRepositoryInclude() ) {
|
||||
if ( patternStack.size() + 1 >= MAX_PATTERN_STACK_SIZE )
|
||||
break;
|
||||
const auto& targetRepo =
|
||||
curState.currentSyntax->getRepository( innerPtrn->getRepositoryName() );
|
||||
patternStack.push_back(
|
||||
{ &targetRepo, 0, static_cast<Uint8>( innerPtrn->repositoryIdx ) } );
|
||||
continue;
|
||||
} else if ( innerPtrn->isRootSelfInclude() ) {
|
||||
if ( patternStack.size() + 1 >= MAX_PATTERN_STACK_SIZE )
|
||||
break;
|
||||
patternStack.push_back( { &curState.currentSyntax->getPatterns(), 0, 0 } );
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( startIdx != 0 &&
|
||||
innerPtrn->matchType == SyntaxPatternMatchType::LuaPattern &&
|
||||
innerPtrn->patterns[0][0] == '^' )
|
||||
continue;
|
||||
|
||||
if ( matchPattern( *innerPtrn, startIdx, patternState,
|
||||
endRange.numMatches ? &endRange : nullptr ) ) {
|
||||
matched = true;
|
||||
}
|
||||
}
|
||||
|
||||
if ( matched )
|
||||
continue;
|
||||
|
||||
if ( !matched && startIdx < text.size() ) {
|
||||
char* strStart = const_cast<char*>( text.c_str() + startIdx );
|
||||
char* strEnd = strStart;
|
||||
String::utf8Next( strEnd );
|
||||
int dist = strEnd - strStart;
|
||||
if ( dist > 0 ) {
|
||||
pushToken( tokens, activePattern->types[0], text.substr( startIdx, dist ) );
|
||||
startIdx += dist;
|
||||
} else {
|
||||
Log::error( "Error parsing \"%s\" using syntax: %s", text.c_str(),
|
||||
syntax.getLSPName().c_str() );
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
bool skip = false;
|
||||
|
||||
if ( curState.subsyntaxInfo != nullptr &&
|
||||
curState.subsyntaxInfo->patterns.size() > 1 &&
|
||||
curState.currentSyntax->getLanguageIndex() != syntax.getLanguageIndex() ) {
|
||||
auto rangeSubsyntax =
|
||||
findNonEscaped( text, curState.subsyntaxInfo->patterns[1], startIdx,
|
||||
curState.subsyntaxInfo->patterns.size() >= 3
|
||||
? curState.subsyntaxInfo->patterns[2]
|
||||
: "",
|
||||
activePattern->matchType );
|
||||
|
||||
if ( rangeSubsyntax.range.first != -1 &&
|
||||
( endRange.range.first == -1 ||
|
||||
rangeSubsyntax.range.first < endRange.range.first ) ) {
|
||||
if ( !skipSubSyntaxSeparator ) {
|
||||
pushTokensToOpenCloseSubsyntax( startIdx, textv, curState.subsyntaxInfo,
|
||||
rangeSubsyntax, tokens, true );
|
||||
}
|
||||
popSubsyntax( curState, retState, syntax );
|
||||
startIdx = rangeSubsyntax.range.second;
|
||||
skip = true;
|
||||
}
|
||||
}
|
||||
|
||||
if ( !skip ) {
|
||||
if ( endRange.range.first != -1 ) {
|
||||
pushTokensToOpenCloseSubsyntax( startIdx, textv, activePattern, endRange,
|
||||
tokens, true );
|
||||
setSubsyntaxPatternIdx( curState, retState, SyntaxStateType{} );
|
||||
startIdx = endRange.range.second;
|
||||
} else {
|
||||
pushToken( tokens, activePattern->types[0], textv.substr( startIdx ) );
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -599,7 +695,8 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax
|
||||
|
||||
patternStack.push_back( { &curState.currentSyntax->getPatterns(), 0, 0 } );
|
||||
} else {
|
||||
if ( startIdx != 0 && pattern->patterns[0][0] == '^' )
|
||||
if ( startIdx != 0 && pattern->matchType == SyntaxPatternMatchType::LuaPattern &&
|
||||
pattern->patterns[0][0] == '^' )
|
||||
continue;
|
||||
|
||||
SyntaxStateType patternIndex = { static_cast<Uint8>( current.index ),
|
||||
|
||||
Reference in New Issue
Block a user