diff --git a/include/eepp/system/regex.hpp b/include/eepp/system/regex.hpp index d50df2a56..0261a9258 100644 --- a/include/eepp/system/regex.hpp +++ b/include/eepp/system/regex.hpp @@ -62,8 +62,7 @@ class EE_API RegEx : public PatternMatcher { 0x08000000u, // It will filter out repeated captures and same range captures }; - RegEx( const std::string_view& pattern, - Uint32 options = Options::Utf | Options::FilterOutCaptures, bool useCache = true ); + RegEx( const std::string_view& pattern, Uint32 options = Options::Utf, bool useCache = true ); virtual ~RegEx(); diff --git a/src/eepp/ui/doc/syntaxtokenizer.cpp b/src/eepp/ui/doc/syntaxtokenizer.cpp index 613b207fe..80dc98d0f 100644 --- a/src/eepp/ui/doc/syntaxtokenizer.cpp +++ b/src/eepp/ui/doc/syntaxtokenizer.cpp @@ -271,6 +271,7 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax std::string_view patternText; std::string patternTextStr; std::optional shouldCloseSubSyntax; + static thread_local std::vector priorityMap; while ( i < size ) { if ( curState.currentPatternIdx != SYNTAX_TOKENIZER_STATE_NONE ) { @@ -356,101 +357,174 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax if ( !words.isValid() ) // Skip invalid patterns continue; if ( words.matches( text, matches, i ) && ( numMatches = words.getNumMatches() ) > 0 ) { - if ( numMatches > 1 ) { - int patternMatchStart = matches[0].start; - int patternMatchEnd = matches[0].end; - auto patternType = pattern.types[0]; - int lastStart = patternMatchStart; - int lastEnd = patternMatchEnd; - - if ( shouldCloseSubSyntax ) { - if ( shouldCloseSubSyntax->range.second >= patternMatchEnd ) { - if ( !skipSubSyntaxSeparator ) { - pushTokensToOpenCloseSubsyntax( i, textv, curState.subsyntaxInfo, - *shouldCloseSubSyntax, tokens ); - } - popSubsyntax( curState, retState, syntax ); - i = shouldCloseSubSyntax->range.second; - matched = true; - shouldCloseSubSyntax = {}; - break; + if ( shouldCloseSubSyntax ) { + if ( shouldCloseSubSyntax->range.second >= matches[0].end ) { + if ( !skipSubSyntaxSeparator ) { + pushTokensToOpenCloseSubsyntax( i, textv, curState.subsyntaxInfo, + *shouldCloseSubSyntax, tokens ); } + popSubsyntax( curState, retState, syntax ); + i = shouldCloseSubSyntax->range.second; + matched = true; shouldCloseSubSyntax = {}; + break; } + shouldCloseSubSyntax = {}; + } - for ( size_t curMatch = 1; curMatch < numMatches; curMatch++ ) { - start = matches[curMatch].start; - end = matches[curMatch].end; - if ( start == end || start < 0 || end < 0 ) - continue; - if ( pattern.patterns.size() >= 3 && i > 0 && - text[i - 1] == pattern.patterns[2][0] ) - continue; - Uint8 lead = ( 0xff & ( text[start] ) ); - if ( !( lead < 0x80 ) ) { - char* strStart = const_cast( text.c_str() + start ); - char* strEnd = strStart; - String::utf8Next( strEnd ); - end = start + ( strEnd - strStart ); - } - if ( curMatch == 1 && start > lastStart ) { - pushToken( - tokens, patternType, - textv.substr( patternMatchStart, start - patternMatchStart ) ); - } else if ( start > lastEnd ) { - pushToken( tokens, patternType, - textv.substr( lastEnd, start - lastEnd ) ); + if ( numMatches > 1 ) { + int fullMatchStart = matches[0].start; + int fullMatchEnd = matches[0].end; + if ( pattern.matchType == SyntaxPatternMatchType::RegEx ) { + priorityMap.clear(); + priorityMap.resize( fullMatchEnd - fullMatchStart, 0 ); + + for ( size_t captureIndex = 1; captureIndex < numMatches; ++captureIndex ) { + int capStart = matches[captureIndex].start; + int capEnd = matches[captureIndex].end; + + if ( capStart < fullMatchStart || capEnd > fullMatchEnd || + capStart >= capEnd ) + continue; + + for ( int k = capStart; k < capEnd; ++k ) + priorityMap[k - fullMatchStart] = captureIndex; } - patternText = textv.substr( start, end - start ); - SyntaxStyleType type = - curMatch < pattern.types.size() && - ( pattern.types[curMatch] == SyntaxStyleTypes::Symbol || - pattern.types[curMatch] == SyntaxStyleTypes::Normal ) - ? curState.currentSyntax->getSymbol( - ( patternTextStr = patternText ) ) - : SyntaxStyleEmpty(); + int currentBytePos = fullMatchStart; + while ( currentBytePos < fullMatchEnd ) { + size_t currentCaptureIndex = + priorityMap[currentBytePos - fullMatchStart]; + SyntaxStyleType currentType = SyntaxStyleEmpty(); - if ( !skipSubSyntaxSeparator || !pattern.hasSyntax() ) { - pushToken( tokens, - type == SyntaxStyleEmpty() - ? ( curMatch < pattern.types.size() - ? pattern.types[curMatch] - : pattern.types[0] ) - : type, - patternText ); + if ( currentCaptureIndex < pattern.types.size() ) { + currentType = pattern.types[currentCaptureIndex]; + } else { + currentType = pattern.types.empty() ? SyntaxStyleTypes::Normal + : pattern.types[0]; + } + + int segmentEndBytePos = currentBytePos + 1; + while ( segmentEndBytePos < fullMatchEnd && + priorityMap[segmentEndBytePos - fullMatchStart] == + currentCaptureIndex ) { + segmentEndBytePos++; + } + + std::string_view segmentText = + textv.substr( currentBytePos, segmentEndBytePos - currentBytePos ); + + if ( currentType == SyntaxStyleTypes::Symbol || + currentType == SyntaxStyleTypes::Normal ) { + patternTextStr = segmentText; // Need a std::string for lookup + SyntaxStyleType symbolType = + curState.currentSyntax->getSymbol( patternTextStr ); + if ( symbolType != SyntaxStyleEmpty() ) { + currentType = symbolType; + } else if ( currentType == SyntaxStyleTypes::Symbol ) { + currentType = SyntaxStyleTypes::Normal; + } + } + + bool skipThisToken = skipSubSyntaxSeparator && pattern.hasSyntax(); + + if ( !skipThisToken ) + pushToken( tokens, currentType, segmentText ); + + currentBytePos = segmentEndBytePos; } - if ( pattern.hasSyntax() && curMatch == numMatches - 1 && - end == patternMatchEnd ) { - pushSubsyntax( - curState, retState, pattern, patternIndex + 1, - ( patternTextStr = textv.substr( - patternMatchStart, patternMatchEnd - patternMatchStart ) ) ); + patternTextStr = + textv.substr( fullMatchStart, fullMatchEnd - fullMatchStart ); + + if ( pattern.hasSyntax() ) { + pushSubsyntax( curState, retState, pattern, patternIndex + 1, + patternTextStr ); } else if ( pattern.patterns.size() > 1 ) { setSubsyntaxPatternIdx( curState, retState, patternIndex + 1 ); } - i = end; - - if ( curMatch == numMatches - 1 && end < patternMatchEnd ) { - pushToken( tokens, patternType, - textv.substr( end, patternMatchEnd - end ) ); - i = patternMatchEnd; - - if ( pattern.hasSyntax() && curMatch == numMatches - 1 ) { - pushSubsyntax( curState, retState, pattern, patternIndex + 1, - ( patternTextStr = textv.substr( - patternMatchStart, - patternMatchEnd - patternMatchStart ) ) ); - } - } - + i = fullMatchEnd; matched = true; - lastStart = start; - lastEnd = end; + break; + } else { + auto patternType = pattern.types[0]; + int lastStart = fullMatchStart; + int lastEnd = fullMatchEnd; + + for ( size_t curMatch = 1; curMatch < numMatches; curMatch++ ) { + start = matches[curMatch].start; + end = matches[curMatch].end; + if ( start == end || start < 0 || end < 0 ) + continue; + if ( pattern.patterns.size() >= 3 && i > 0 && + text[i - 1] == pattern.patterns[2][0] ) + continue; + Uint8 lead = ( 0xff & ( text[start] ) ); + if ( !( lead < 0x80 ) ) { + char* strStart = const_cast( text.c_str() + start ); + char* strEnd = strStart; + String::utf8Next( strEnd ); + end = start + ( strEnd - strStart ); + } + if ( curMatch == 1 && start > lastStart ) { + pushToken( tokens, patternType, + textv.substr( fullMatchStart, start - fullMatchStart ) ); + } else if ( start > lastEnd ) { + pushToken( tokens, patternType, + textv.substr( lastEnd, start - lastEnd ) ); + } + + patternText = textv.substr( start, end - start ); + SyntaxStyleType type = + curMatch < pattern.types.size() && + ( pattern.types[curMatch] == SyntaxStyleTypes::Symbol || + pattern.types[curMatch] == SyntaxStyleTypes::Normal ) + ? curState.currentSyntax->getSymbol( + ( patternTextStr = patternText ) ) + : SyntaxStyleEmpty(); + + if ( !skipSubSyntaxSeparator || !pattern.hasSyntax() ) { + pushToken( tokens, + type == SyntaxStyleEmpty() + ? ( curMatch < pattern.types.size() + ? pattern.types[curMatch] + : pattern.types[0] ) + : type, + patternText ); + } + + if ( pattern.hasSyntax() && curMatch == numMatches - 1 && + end == fullMatchEnd ) { + pushSubsyntax( + curState, retState, pattern, patternIndex + 1, + ( patternTextStr = textv.substr( + fullMatchStart, fullMatchEnd - fullMatchStart ) ) ); + } else if ( pattern.patterns.size() > 1 ) { + setSubsyntaxPatternIdx( curState, retState, patternIndex + 1 ); + } + + i = end; + + if ( curMatch == numMatches - 1 && end < fullMatchEnd ) { + pushToken( tokens, patternType, + textv.substr( end, fullMatchEnd - end ) ); + i = fullMatchEnd; + + if ( pattern.hasSyntax() && curMatch == numMatches - 1 ) { + pushSubsyntax( + curState, retState, pattern, patternIndex + 1, + ( patternTextStr = textv.substr( + fullMatchStart, fullMatchEnd - fullMatchStart ) ) ); + } + } + + matched = true; + lastStart = start; + lastEnd = end; + } + break; } - break; } else { start = matches[0].start; end = matches[0].end; @@ -471,21 +545,6 @@ _tokenize( const SyntaxDefinition& syntax, const std::string& text, const Syntax ? curState.currentSyntax->getSymbol( ( patternTextStr = patternText ) ) : SyntaxStyleEmpty(); - if ( shouldCloseSubSyntax ) { - if ( shouldCloseSubSyntax->range.second >= end ) { - if ( !skipSubSyntaxSeparator ) { - pushTokensToOpenCloseSubsyntax( i, textv, curState.subsyntaxInfo, - *shouldCloseSubSyntax, tokens ); - } - popSubsyntax( curState, retState, syntax ); - i = shouldCloseSubSyntax->range.second; - matched = true; - shouldCloseSubSyntax = {}; - break; - } - shouldCloseSubSyntax = {}; - } - if ( !skipSubSyntaxSeparator || !pattern.hasSyntax() ) { pushToken( tokens, type == SyntaxStyleEmpty() ? pattern.types[0] : type, patternText ); diff --git a/src/modules/languages-syntax-highlighting/src/eepp/ui/doc/languages/c3.cpp b/src/modules/languages-syntax-highlighting/src/eepp/ui/doc/languages/c3.cpp index 1b37628c0..baab7533d 100644 --- a/src/modules/languages-syntax-highlighting/src/eepp/ui/doc/languages/c3.cpp +++ b/src/modules/languages-syntax-highlighting/src/eepp/ui/doc/languages/c3.cpp @@ -8,7 +8,7 @@ void addC3() { auto& sd = SyntaxDefinitionManager::instance()->add( { "C3", - { "%.c3$" }, + { "%.c3t?$" }, { { { "//.-\n" }, "comment" }, { { "/%*", "%*/" }, "comment" }, diff --git a/src/modules/languages-syntax-highlighting/src/eepp/ui/doc/languages/vala.cpp b/src/modules/languages-syntax-highlighting/src/eepp/ui/doc/languages/vala.cpp index 880d54c5f..9368f623f 100644 --- a/src/modules/languages-syntax-highlighting/src/eepp/ui/doc/languages/vala.cpp +++ b/src/modules/languages-syntax-highlighting/src/eepp/ui/doc/languages/vala.cpp @@ -18,8 +18,17 @@ void addVala() { { { "'", "'" }, "string" }, { { "-?0x%x+" }, "number" }, { { "-?%.?%d+[uUlLfFdDmM]?" }, "number" }, + { { "(class|interface|struct)\\s+([A-Za-z]\\w*)" }, + { "keyword", "keyword", "keyword2" }, + "", + SyntaxPatternMatchType::RegEx }, + { { "(class|interface|struct)\\s+([A-Za-z]\\w*)\\s*:\\s*([A-Za-z]\\w*)" }, + { "keyword", "keyword", "keyword2", "keyword2" }, + "", + SyntaxPatternMatchType::RegEx }, { { "-?%d+[%d%.eE]*f?" }, "number" }, { { "[%+%-/%*%<>!=%^&|?~:;%.%(%)%[%]{}]" }, "operator" }, + { { "[%a_][%w_]*%f[(]" }, "function" }, { { "[%a_][%w_]*" }, "symbol" }, }, { @@ -37,7 +46,7 @@ void addVala() { { "as", "keyword" }, { "base", "keyword" }, { "break", "keyword" }, { "case", "keyword" }, { "catch", "keyword" }, { "construct", "keyword" }, { "continue", "keyword" }, { "default", "keyword" }, { "delete", "keyword" }, - { "do", "keyword" }, + { "do", "keyword" }, { "owned", "keyword" }, { "yield", "keyword" }, { "else", "keyword" }, { "ensures", "keyword" }, { "finally", "keyword" }, { "for", "keyword" }, { "foreach", "keyword" }, { "get", "keyword" }, @@ -50,7 +59,7 @@ void addVala() { { "typeof", "keyword" }, { "value", "keyword" }, { "var", "keyword" }, { "void", "keyword" }, - { "while", "keyword" }, + { "while", "keyword" }, { "async", "keyword" }, { "internal", "keyword" }, { "null", "keyword" }, { "true", "keyword" }, { "false", "keyword" }, @@ -62,9 +71,9 @@ void addVala() { { "size_t", "keyword2" }, { "ssize_t", "keyword2" }, { "string", "keyword2" }, { "uchar", "keyword2" }, { "uint", "keyword2" }, { "uint8", "keyword2" }, { "uint16", "keyword2" }, { "uint32", "keyword2" }, { "uint64", "keyword2" }, - { "ulong", "keyword2" }, + { "ulong", "keyword2" }, { "IOError", "keyword2" }, { "Object", "keyword2" }, - { "unichar", "keyword2" }, { "ushort", "keyword2" }, + { "unichar", "keyword2" }, { "ushort", "keyword2" }, { "Error", "keyword2" }, }, "//",