From 04e5cd5fb622a8d318636b1a23d9ecf34ffaf7af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=ADn=20Lucas=20Golini?= Date: Sat, 21 Sep 2024 22:29:23 -0300 Subject: [PATCH] Add RegEx class. Refactor PatternMatcher. --- include/eepp/system/luapattern.hpp | 126 +--------- include/eepp/system/patternmatcher.hpp | 137 +++++++++++ include/eepp/system/regex.hpp | 46 ++++ premake4.lua | 4 + premake5.lua | 4 + src/eepp/system/luapattern.cpp | 215 +++--------------- src/eepp/system/patternmatcher.cpp | 186 +++++++++++++++ src/eepp/system/regex.cpp | 106 +++++++++ src/eepp/system/time.cpp | 6 +- src/eepp/window/engine.cpp | 2 +- src/modules/eterm/src/eterm/ui/uiterminal.cpp | 2 +- src/tests/unit_tests/regex.cpp | 34 +++ src/tests/unit_tests/textformat.cpp | 2 +- src/tools/ecode/docsearchcontroller.cpp | 2 +- src/tools/ecode/globalsearchcontroller.cpp | 4 +- .../autocomplete/autocompleteplugin.cpp | 6 +- .../plugins/formatter/formatterplugin.cpp | 2 +- src/tools/ecode/plugins/git/gitplugin.cpp | 2 +- .../ecode/plugins/linter/linterplugin.cpp | 2 +- .../plugins/lsp/lspclientservermanager.cpp | 2 +- src/tools/ecode/terminalmanager.cpp | 6 +- 21 files changed, 573 insertions(+), 323 deletions(-) create mode 100644 include/eepp/system/patternmatcher.hpp create mode 100644 include/eepp/system/regex.hpp create mode 100644 src/eepp/system/patternmatcher.cpp create mode 100644 src/eepp/system/regex.cpp create mode 100644 src/tests/unit_tests/regex.cpp diff --git a/include/eepp/system/luapattern.hpp b/include/eepp/system/luapattern.hpp index a480fd078..cdaaa8cbd 100644 --- a/include/eepp/system/luapattern.hpp +++ b/include/eepp/system/luapattern.hpp @@ -1,143 +1,37 @@ #ifndef EE_SYSTEM_LUAPATTERNMATCHER_HPP #define EE_SYSTEM_LUAPATTERNMATCHER_HPP -#include -#include -#include +#include #include namespace EE { namespace System { -// Adapted from rx-cpp (https://github.com/stevedonovan/rx-cpp/). -// This implementation removes all the regexp related stuffs, only leaves the Lua implementation. -class EE_API LuaPattern { +class EE_API LuaPattern : public PatternMatcher { public: static std::string_view getURLPattern(); static std::string_view getURIPattern(); - struct EE_API Range { - int start{ -1 }; - int end{ -1 }; - bool isValid() { return -1 != start && -1 != end; } - }; - - class EE_API State { - public: - bool range( int index, int& start, int& end ); - - bool matches( const char* string, size_t length ); - - protected: - friend class LuaPattern; - LuaPattern* mPattern; - Range* mRanges; - size_t mRefCount; - bool mOwnPattern; - - State( LuaPattern* pattern, bool ownPattern ); - - ~State(); - }; - - class EE_API Match { - public: - ~Match(); - - Match( LuaPattern& r, const char* string, bool ownPattern = false ); - - Match( LuaPattern& r, const std::string& string, bool ownPattern = false ); - - Match( const LuaPattern::Match& other ); - - Match& operator=( const Match& other ); - - bool matches(); - - bool subst( std::string& res ); - - void next(); - - std::string group( int idx = -1 ) const; - - std::string_view groupView( int idx = -1 ) const; - - bool range( int idx, int& start, int& end ) const; - - std::string operator[]( int index ) const; - - class iterator { - public: - iterator( Match* pm ) : mMatcher( pm ) { - if ( mMatcher != nullptr && !mMatcher->matches() ) - mMatcher = nullptr; - } - bool operator!=( const iterator& other ) { return mMatcher != other.mMatcher; } - bool operator==( const iterator& other ) { return mMatcher == other.mMatcher; } - const Match& operator*() const { return *mMatcher; } - iterator& operator++() { - mMatcher->next(); - if ( !mMatcher->matches() ) - mMatcher = nullptr; - return *this; - } - - protected: - Match* mMatcher; - }; - - iterator begin() { return iterator( this ); } - - iterator end() { return iterator( nullptr ); } - - protected: - friend class LuaPattern; - LuaPattern::State* mState{ nullptr }; - const char* mString{ nullptr }; - size_t mLength{ 0 }; - }; - static std::string matchesAny( const std::vector& stringvec, const std::string_view& pattern ); static std::string match( const std::string& string, const std::string_view& pattern ); - static Range find( const std::string& string, const std::string_view& pattern ); + static Range firstMatch( const std::string& string, const std::string_view& pattern ); - static bool matches( const std::string& string, const std::string_view& pattern ); + static bool hasMatches( const std::string& string, const std::string_view& pattern ); LuaPattern( const std::string_view& pattern ); - bool matches( const char* stringSearch, int stringStartOffset, LuaPattern::Range* matchList, - size_t stringLength ) const; + virtual bool matches( const char* stringSearch, int stringStartOffset, + LuaPattern::Range* matchList, size_t stringLength ) const; - bool matches( const std::string& str, LuaPattern::Range* matchList = nullptr, - int stringStartOffset = 0 ) const; + virtual bool matches( const std::string& str, PatternMatcher::Range* matchList = nullptr, + int stringStartOffset = 0 ) const; - bool find( const char* stringSearch, int& startMatch, int& endMatch, int stringStartOffset = 0, - int stringLength = 0, int returnMatchIndex = 0 ) const; + virtual const size_t& getNumMatches() const; - bool find( const std::string& s, int& startMatch, int& endMatch, int offset = 0, - int returnedMatchIndex = 0 ) const; - - const size_t& getNumMatches() const; - - bool range( int indexGet, int& startMatch, int& endMatch, - LuaPattern::Range* returnedMatched ) const; - - const std::string_view& getPatern() const { return mPattern; } - - LuaPattern::Match gmatch( const char* s ) &; - - LuaPattern::Match gmatch( const char* s ) &&; - - LuaPattern::Match gmatch( const std::string& string ) &&; - - LuaPattern::Match gmatch( const std::string& string ) &; - - std::string gsub( const char* text, const char* replace ); - - std::string gsub( const std::string& text, const std::string& replace ); + const std::string_view& getPattern() const { return mPattern; } protected: std::string_view mPattern; diff --git a/include/eepp/system/patternmatcher.hpp b/include/eepp/system/patternmatcher.hpp new file mode 100644 index 000000000..992611c8b --- /dev/null +++ b/include/eepp/system/patternmatcher.hpp @@ -0,0 +1,137 @@ +#ifndef EE_SYSTEM_PATTERNMATCHER_HPP +#define EE_SYSTEM_PATTERNMATCHER_HPP + +#include +#include +#include + +namespace EE { namespace System { + +// Inpired in rx-cpp (https://github.com/stevedonovan/rx-cpp/). + +class EE_API PatternMatcher { + public: + enum class PatternType { LuaPattern, PCRE }; + + struct EE_API Range { + int start{ -1 }; + int end{ -1 }; + bool isValid() { return -1 != start && -1 != end; } + }; + + class EE_API State { + public: + State( PatternMatcher* pattern, bool ownPattern ); + + ~State(); + + bool range( int index, int& start, int& end ); + + bool matches( const char* string, size_t length ); + + PatternMatcher* mPattern; + Range* mRanges; + size_t mRefCount; + bool mOwnPattern; + }; + + class EE_API Match { + public: + ~Match(); + + Match( PatternMatcher& r, const char* string, bool ownPattern = false ); + + Match( PatternMatcher& r, const std::string& string, bool ownPattern = false ); + + Match( const PatternMatcher::Match& other ); + + Match& operator=( const Match& other ); + + bool matches(); + + bool subst( std::string& res ); + + void next(); + + std::string group( int idx = -1 ) const; + + std::string_view groupView( int idx = -1 ) const; + + bool range( int idx, int& start, int& end ) const; + + std::string operator[]( int index ) const; + + class iterator { + public: + iterator( Match* pm ) : mMatcher( pm ) { + if ( mMatcher != nullptr && !mMatcher->matches() ) + mMatcher = nullptr; + } + bool operator!=( const iterator& other ) { return mMatcher != other.mMatcher; } + bool operator==( const iterator& other ) { return mMatcher == other.mMatcher; } + const Match& operator*() const { return *mMatcher; } + iterator& operator++() { + mMatcher->next(); + if ( !mMatcher->matches() ) + mMatcher = nullptr; + return *this; + } + + protected: + Match* mMatcher; + }; + + iterator begin() { return iterator( this ); } + + iterator end() { return iterator( nullptr ); } + + protected: + PatternMatcher::State* mState{ nullptr }; + const char* mString{ nullptr }; + size_t mLength{ 0 }; + }; + + PatternMatcher( PatternType type ) : mType( type ) {} + + virtual ~PatternMatcher() {} + + PatternType getType() const { return mType; } + + PatternMatcher::Match gmatch( const char* s ) &; + + PatternMatcher::Match gmatch( const char* s ) &&; + + PatternMatcher::Match gmatch( const std::string& string ) &&; + + PatternMatcher::Match gmatch( const std::string& string ) &; + + bool range( int indexGet, int& startMatch, int& endMatch, + PatternMatcher::Range* returnedMatched ) const; + + bool find( const std::string& s, int& startMatch, int& endMatch, int offset = 0, + int returnedMatchIndex = 0 ) const; + + bool find( const char* stringSearch, int& startMatch, int& endMatch, int stringStartOffset = 0, + int stringLength = 0, int returnMatchIndex = 0 ) const; + + std::string gsub( const char* text, const char* replace ); + + std::string gsub( const std::string& text, const std::string& replace ); + + virtual const std::string_view& getPattern() const = 0; + + virtual bool matches( const char* stringSearch, int stringStartOffset, + PatternMatcher::Range* matchList, size_t stringLength ) const = 0; + + virtual bool matches( const std::string& str, PatternMatcher::Range* matchList = nullptr, + int stringStartOffset = 0 ) const = 0; + + virtual const size_t& getNumMatches() const = 0; + + protected: + PatternType mType; +}; + +}} // namespace EE::System + +#endif diff --git a/include/eepp/system/regex.hpp b/include/eepp/system/regex.hpp new file mode 100644 index 000000000..c391f64dc --- /dev/null +++ b/include/eepp/system/regex.hpp @@ -0,0 +1,46 @@ +#ifndef EE_SYSTEM_REGEX +#define EE_SYSTEM_REGEX + +#include + +namespace EE { namespace System { + +class EE_API RegEx : public PatternMatcher { + public: + RegEx( const std::string_view& pattern ); + + virtual ~RegEx(); + + bool isValid() const { return mValid; } + + virtual bool matches( const char* stringSearch, int stringStartOffset, + PatternMatcher::Range* matchList, size_t stringLength ) const override; + + virtual bool matches( const std::string& str, PatternMatcher::Range* matchList = nullptr, + int stringStartOffset = 0 ) const override; + + virtual const size_t& getNumMatches() const override; + + const std::string_view& getPattern() const override { return mPattern; } + + protected: + std::string_view mPattern; + mutable size_t mMatchNum; + void* mCompiledPattern; + int mCaptureCount; + bool mValid{ false }; +}; + +class EE_API RegExStorage : public RegEx { + public: + RegExStorage( const std::string& pattern ); + + explicit RegExStorage( std::string&& pattern ); + + protected: + std::string mPatternStorage; +}; + +}} // namespace EE::System + +#endif diff --git a/premake4.lua b/premake4.lua index 4ed1269df..ef23b4dcb 100644 --- a/premake4.lua +++ b/premake4.lua @@ -952,6 +952,8 @@ function build_eepp( build_name ) "src/thirdparty/pcre2/src" } + defines { "PCRE2_CODE_UNIT_WIDTH=8" } + if not _OPTIONS["without-mojoal"] then defines( "AL_LIBTYPE_STATIC" ) includedirs { "src/thirdparty/mojoAL" } @@ -1134,6 +1136,8 @@ solution "eepp" defines { "HAVE_CONFIG_H", "PCRE2_STATIC", "PCRE2_CODE_UNIT_WIDTH=8" } files { 'src/thirdparty/pcre2/src/pcre2_auto_possess.c', + 'src/thirdparty/pcre2/src/pcre2_chartables.c', + 'src/thirdparty/pcre2/src/pcre2_chkdint.c', 'src/thirdparty/pcre2/src/pcre2_compile.c', 'src/thirdparty/pcre2/src/pcre2_config.c', 'src/thirdparty/pcre2/src/pcre2_context.c', diff --git a/premake5.lua b/premake5.lua index 803c75de6..7cad65a93 100644 --- a/premake5.lua +++ b/premake5.lua @@ -713,6 +713,8 @@ function build_eepp( build_name ) add_static_links() check_ssl_support() + defines { "PCRE2_CODE_UNIT_WIDTH=8" } + if table.contains( backends, "SDL2" ) then files { "src/eepp/window/backend/SDL2/*.cpp" } defines { "EE_BACKEND_SDL_ACTIVE", "EE_SDL_VERSION_2" } @@ -935,6 +937,8 @@ workspace "eepp" defines { "HAVE_CONFIG_H", "PCRE2_STATIC", "PCRE2_CODE_UNIT_WIDTH=8" } files { 'src/thirdparty/pcre2/src/pcre2_auto_possess.c', + 'src/thirdparty/pcre2/src/pcre2_chartables.c', + 'src/thirdparty/pcre2/src/pcre2_chkdint.c', 'src/thirdparty/pcre2/src/pcre2_compile.c', 'src/thirdparty/pcre2/src/pcre2_config.c', 'src/thirdparty/pcre2/src/pcre2_context.c', diff --git a/src/eepp/system/luapattern.cpp b/src/eepp/system/luapattern.cpp index a85d5786b..6742291ee 100644 --- a/src/eepp/system/luapattern.cpp +++ b/src/eepp/system/luapattern.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -10,6 +9,24 @@ namespace EE { namespace System { const int MAX_DEFAULT_MATCHES = 12; static bool sFailHandlerInitialized = false; +static void failHandler( const char* msg ) { + throw std::string( msg ); +} + +LuaPatternStorage::LuaPatternStorage( const std::string& pattern ) : + LuaPattern( "" ), mPatternStorage( pattern ) { + mPattern = std::string_view{ mPatternStorage }; +} + +LuaPatternStorage::LuaPatternStorage( std::string&& pattern ) : + LuaPattern( "" ), mPatternStorage( std::move( pattern ) ) { + mPattern = std::string_view{ mPatternStorage }; +} + +const size_t& LuaPattern::getNumMatches() const { + return mMatchNum; +} + std::string_view LuaPattern::getURLPattern() { return "https?://[%w_.~!*:@&+$/?%%#-]-%w[-.%w]*%.%w%w%w?%w?:?%d*/?[%w_.~!*:@&+$/?%%#=-]*"sv; } @@ -18,10 +35,6 @@ std::string_view LuaPattern::getURIPattern() { return "%w+://[%w_.~!*:@&+$/?%%#-]-%w[-.%w]*%.%w%w%w?%w?:?%d*/?[%w_.~!*:@&+$/?%%#=-]*"sv; } -static void failHandler( const char* msg ) { - throw std::string( msg ); -} - std::string LuaPattern::match( const std::string& string, const std::string_view& pattern ) { LuaPattern matcher( pattern ); int start = 0, end = 0; @@ -42,7 +55,8 @@ std::string LuaPattern::matchesAny( const std::vector& stringvec, return ""; } -LuaPattern::Range LuaPattern::find( const std::string& string, const std::string_view& pattern ) { +PatternMatcher::Range LuaPattern::firstMatch( const std::string& string, + const std::string_view& pattern ) { LuaPattern matcher( pattern ); int start = 0, end = 0; if ( matcher.find( string, start, end ) ) @@ -50,11 +64,12 @@ LuaPattern::Range LuaPattern::find( const std::string& string, const std::string return { -1, -1 }; } -bool LuaPattern::matches( const std::string& string, const std::string_view& pattern ) { - return find( string, pattern ).isValid(); +bool LuaPattern::hasMatches( const std::string& string, const std::string_view& pattern ) { + return LuaPattern::firstMatch( string, pattern ).isValid(); } -LuaPattern::LuaPattern( const std::string_view& pattern ) : mPattern( pattern ), mMatchNum( 0 ) { +LuaPattern::LuaPattern( const std::string_view& pattern ) : + PatternMatcher( PatternType::LuaPattern ), mPattern( pattern ), mMatchNum( 0 ) { if ( !sFailHandlerInitialized ) { sFailHandlerInitialized = true; lua_str_fail_func( failHandler ); @@ -62,12 +77,12 @@ LuaPattern::LuaPattern( const std::string_view& pattern ) : mPattern( pattern ), } bool LuaPattern::matches( const char* stringSearch, int stringStartOffset, - LuaPattern::Range* matchList, size_t stringLength ) const { + PatternMatcher::Range* matchList, size_t stringLength ) const { if ( stringLength == 0 ) stringLength = strlen( stringSearch ); if ( matchList == nullptr ) { - LuaPattern::Range matchesBuffer[MAX_DEFAULT_MATCHES]; + PatternMatcher::Range matchesBuffer[MAX_DEFAULT_MATCHES]; try { mMatchNum = lua_str_match( stringSearch, stringStartOffset, stringLength, mPattern.data(), (LuaMatch*)matchesBuffer ); @@ -85,185 +100,9 @@ bool LuaPattern::matches( const char* stringSearch, int stringStartOffset, return mMatchNum == 0 ? false : true; } -bool LuaPattern::matches( const std::string& str, LuaPattern::Range* matchList, +bool LuaPattern::matches( const std::string& str, PatternMatcher::Range* matchList, int stringStartOffset ) const { return matches( str.c_str(), stringStartOffset, matchList, str.size() ); } -bool LuaPattern::find( const char* stringSearch, int& startMatch, int& endMatch, - int stringStartOffset, int stringLength, int returnMatchIndex ) const { - LuaPattern::Range matchesBuffer[MAX_DEFAULT_MATCHES]; - if ( matches( stringSearch, stringStartOffset, matchesBuffer, stringLength ) ) { - range( returnMatchIndex, startMatch, endMatch, matchesBuffer ); - return true; - } else { - startMatch = -1; - endMatch = -1; - return false; - } -} - -bool LuaPattern::find( const std::string& s, int& startMatch, int& endMatch, int offset, - int returnedMatchIndex ) const { - return find( s.c_str(), startMatch, endMatch, offset, s.size(), returnedMatchIndex ); -} - -bool LuaPattern::range( int indexGet, int& startMatch, int& endMatch, - LuaPattern::Range* returnedMatched ) const { - if ( indexGet == -1 ) - indexGet = getNumMatches() > 1 ? 1 : 0; - if ( indexGet >= 0 && indexGet < (int)getNumMatches() ) { - startMatch = returnedMatched[indexGet].start; - endMatch = returnedMatched[indexGet].end; - return true; - } - return false; -} - -const size_t& LuaPattern::getNumMatches() const { - return mMatchNum; -} - -bool LuaPattern::LuaPattern::State::range( int index, int& start, int& end ) { - return mPattern->range( index, start, end, mRanges ); -} - -bool LuaPattern::LuaPattern::State::matches( const char* string, size_t length ) { - return mPattern->matches( string, 0, mRanges, length ); -} - -LuaPattern::State::State( LuaPattern* pattern, bool ownPattern ) : - mRefCount( 1 ), mOwnPattern( ownPattern ) { - mRanges = new Range[10]; - mPattern = ownPattern ? new LuaPattern( pattern->getPatern() ) : pattern; -} - -LuaPattern::State::~State() { - delete[] mRanges; - if ( mOwnPattern ) - delete mPattern; -} - -LuaPattern::Match::Match( LuaPattern& r, const char* string, bool ownPattern ) : mString( string ) { - mLength = strlen( string ); - mState = new LuaPattern::LuaPattern::State( &r, ownPattern ); -} - -LuaPattern::Match::Match( LuaPattern& r, const std::string& string, bool ownPattern ) { - mState = new LuaPattern::LuaPattern::State( &r, ownPattern ); - mString = string.c_str(); - mLength = string.size(); -} - -LuaPattern::Match::~Match() { - --mState->mRefCount; - if ( mState->mRefCount == 0 ) - delete mState; -} - -LuaPattern::Match::Match( const LuaPattern::Match& other ) : - mState( other.mState ), mString( other.mString ), mLength( other.mLength ) { - ++mState->mRefCount; -} - -LuaPattern::Match& LuaPattern::Match::operator=( const Match& ) { - ++mState->mRefCount; - return *this; -} - -void LuaPattern::Match::next() { - int m1 = 0, m2 = 0; - mState->range( 0, m1, m2 ); - mString += m2; - mLength -= m2; -} - -std::string LuaPattern::Match::group( int idx ) const { - int m1, m2; - if ( mState->range( idx, m1, m2 ) ) - return std::string( mString + m1, m2 - m1 ); - return ""; -} - -std::string_view LuaPattern::Match::groupView( int idx ) const { - static constexpr auto EMPTY = ""sv; - int m1, m2; - if ( mState->range( idx, m1, m2 ) ) - return std::string_view( mString + m1, m2 - m1 ); - return EMPTY; -} - -bool LuaPattern::Match::range( int idx, int& start, int& end ) const { - return mState->range( idx, start, end ); -} - -std::string LuaPattern::Match::operator[]( int index ) const { - return group( index ); -} - -bool LuaPattern::Match::matches() { - return mState->matches( mString, mLength ); -} - -bool LuaPattern::Match::subst( std::string& res ) { - if ( !matches() ) { - res.append( mString ); - return false; - } - int start = 0, end = 0; - mState->range( 0, start, end ); - if ( start == 0 ) - return true; - res.append( mString, start ); - return true; -} - -LuaPattern::Match LuaPattern::gmatch( const char* s ) & { - return LuaPattern::Match( *this, s, false ); -} - -LuaPattern::Match LuaPattern::gmatch( const std::string& s ) & { - return LuaPattern::Match( *this, s, false ); -} - -LuaPattern::Match LuaPattern::gmatch( const char* s ) && { - return LuaPattern::Match( *this, s, true ); -} - -LuaPattern::Match LuaPattern::gmatch( const std::string& string ) && { - return LuaPattern::Match( *this, string, true ); -} - -std::string LuaPattern::gsub( const char* text, const char* replace ) { - LuaPattern::Match ms( *this, text ); - std::string res; - while ( ms.subst( res ) ) { - for ( const char* ptr = replace; *ptr; ++ptr ) { - if ( *ptr == '%' ) { - ++ptr; - int ngroup = (int)*ptr - (int)'0'; - res += ms.group( ngroup ); - } else { - res += *ptr; - } - } - ms.next(); - } - return res; -} - -std::string LuaPattern::gsub( const std::string& text, const std::string& replace ) { - return gsub( text.c_str(), replace.c_str() ); -} - -LuaPatternStorage::LuaPatternStorage( const std::string& pattern ) : - LuaPattern( "" ), mPatternStorage( pattern ) { - mPattern = std::string_view{ mPatternStorage }; -} - -LuaPatternStorage::LuaPatternStorage( std::string&& pattern ) : - LuaPattern( "" ), mPatternStorage( std::move( pattern ) ) { - mPattern = std::string_view{ mPatternStorage }; -} - }} // namespace EE::System diff --git a/src/eepp/system/patternmatcher.cpp b/src/eepp/system/patternmatcher.cpp new file mode 100644 index 000000000..daf2fcbef --- /dev/null +++ b/src/eepp/system/patternmatcher.cpp @@ -0,0 +1,186 @@ +#include +#include +#include +#include + +using namespace std::literals; + +namespace EE { namespace System { + +#define MAX_DEFAULT_MATCHES 12 + +bool PatternMatcher::find( const char* stringSearch, int& startMatch, int& endMatch, + int stringStartOffset, int stringLength, int returnMatchIndex ) const { + PatternMatcher::Range matchesBuffer[MAX_DEFAULT_MATCHES]; + if ( matches( stringSearch, stringStartOffset, matchesBuffer, stringLength ) ) { + range( returnMatchIndex, startMatch, endMatch, matchesBuffer ); + return true; + } else { + startMatch = -1; + endMatch = -1; + return false; + } +} + +bool PatternMatcher::find( const std::string& s, int& startMatch, int& endMatch, int offset, + int returnedMatchIndex ) const { + return find( s.c_str(), startMatch, endMatch, offset, s.size(), returnedMatchIndex ); +} + +bool PatternMatcher::range( int indexGet, int& startMatch, int& endMatch, + PatternMatcher::Range* returnedMatched ) const { + if ( indexGet == -1 ) + indexGet = getNumMatches() > 1 ? 1 : 0; + if ( indexGet >= 0 && indexGet < (int)getNumMatches() ) { + startMatch = returnedMatched[indexGet].start; + endMatch = returnedMatched[indexGet].end; + return true; + } + return false; +} + +bool PatternMatcher::State::range( int index, int& start, int& end ) { + return mPattern->range( index, start, end, mRanges ); +} + +bool PatternMatcher::State::matches( const char* string, size_t length ) { + return mPattern->matches( string, 0, mRanges, length ); +} + +PatternMatcher::State::State( PatternMatcher* pattern, bool ownPattern ) : + mRefCount( 1 ), mOwnPattern( ownPattern ) { + mRanges = new Range[10]; + if ( ownPattern ) { + switch ( pattern->getType() ) { + case PatternType::LuaPattern: + mPattern = new LuaPattern( pattern->getPattern() ); + break; + case PatternType::PCRE: + mPattern = new RegEx( pattern->getPattern() ); + break; + } + } else { + mPattern = pattern; + } +} + +PatternMatcher::State::~State() { + delete[] mRanges; + if ( mOwnPattern ) + delete mPattern; +} + +PatternMatcher::Match::Match( PatternMatcher& r, const char* string, bool ownPattern ) : + mString( string ) { + mLength = strlen( string ); + mState = new PatternMatcher::State( &r, ownPattern ); +} + +PatternMatcher::Match::Match( PatternMatcher& r, const std::string& string, bool ownPattern ) { + mState = new PatternMatcher::State( &r, ownPattern ); + mString = string.c_str(); + mLength = string.size(); +} + +PatternMatcher::Match::~Match() { + --mState->mRefCount; + if ( mState->mRefCount == 0 ) + delete mState; +} + +PatternMatcher::Match::Match( const PatternMatcher::Match& other ) : + mState( other.mState ), mString( other.mString ), mLength( other.mLength ) { + ++mState->mRefCount; +} + +PatternMatcher::Match& PatternMatcher::Match::operator=( const Match& ) { + ++mState->mRefCount; + return *this; +} + +void PatternMatcher::Match::next() { + int m1 = 0, m2 = 0; + mState->range( 0, m1, m2 ); + mString += m2; + mLength -= m2; +} + +std::string PatternMatcher::Match::group( int idx ) const { + int m1, m2; + if ( mState->range( idx, m1, m2 ) ) + return std::string( mString + m1, m2 - m1 ); + return ""; +} + +std::string_view PatternMatcher::Match::groupView( int idx ) const { + static constexpr auto EMPTY = ""sv; + int m1, m2; + if ( mState->range( idx, m1, m2 ) ) + return std::string_view( mString + m1, m2 - m1 ); + return EMPTY; +} + +bool PatternMatcher::Match::range( int idx, int& start, int& end ) const { + return mState->range( idx, start, end ); +} + +std::string PatternMatcher::Match::operator[]( int index ) const { + return group( index ); +} + +bool PatternMatcher::Match::matches() { + return mState->matches( mString, mLength ); +} + +bool PatternMatcher::Match::subst( std::string& res ) { + if ( !matches() ) { + res.append( mString ); + return false; + } + int start = 0, end = 0; + mState->range( 0, start, end ); + if ( start == 0 ) + return true; + res.append( mString, start ); + return true; +} + +PatternMatcher::Match PatternMatcher::gmatch( const char* s ) & { + return PatternMatcher::Match( *this, s, false ); +} + +PatternMatcher::Match PatternMatcher::gmatch( const std::string& s ) & { + return PatternMatcher::Match( *this, s, false ); +} + +PatternMatcher::Match PatternMatcher::gmatch( const char* s ) && { + return PatternMatcher::Match( *this, s, true ); +} + +PatternMatcher::Match PatternMatcher::gmatch( const std::string& string ) && { + return PatternMatcher::Match( *this, string, true ); +} + +std::string PatternMatcher::gsub( const char* text, const char* replace ) { + PatternMatcher::Match ms( *this, text ); + std::string res; + while ( ms.subst( res ) ) { + for ( const char* ptr = replace; *ptr; ++ptr ) { + if ( *ptr == '%' ) { + ++ptr; + int ngroup = (int)*ptr - (int)'0'; + res += ms.group( ngroup ); + } else { + res += *ptr; + } + } + ms.next(); + } + return res; +} + +std::string PatternMatcher::gsub( const std::string& text, const std::string& replace ) { + return gsub( text.c_str(), replace.c_str() ); +} + +}} // namespace EE::System diff --git a/src/eepp/system/regex.cpp b/src/eepp/system/regex.cpp new file mode 100644 index 000000000..63b33a2a0 --- /dev/null +++ b/src/eepp/system/regex.cpp @@ -0,0 +1,106 @@ +#include +#include + +namespace EE { namespace System { + +RegEx::RegEx( const std::string_view& pattern ) : + PatternMatcher( PatternType::PCRE ), + mPattern( pattern ), + mMatchNum( 0 ), + mCompiledPattern( nullptr ), + mCaptureCount( 0 ), + mValid( true ) { + int errornumber; + PCRE2_SIZE erroroffset; + PCRE2_SPTR pattern_sptr = reinterpret_cast( pattern.data() ); + + mCompiledPattern = pcre2_compile( pattern_sptr, // the pattern + pattern.size(), // the length of the pattern + 0, // default options + &errornumber, // for error number + &erroroffset, // for error offset + NULL // use default compile context + ); + + if ( mCompiledPattern == NULL ) { + PCRE2_UCHAR buffer[256]; + pcre2_get_error_message( errornumber, buffer, sizeof( buffer ) ); + mValid = false; + // throw std::runtime_error( "PCRE2 compilation failed at offset " + + // std::to_string( erroroffset ) + ": " + + // reinterpret_cast( buffer ) ); + } + + int rc = pcre2_pattern_info( reinterpret_cast( mCompiledPattern ), + PCRE2_INFO_CAPTURECOUNT, &mCaptureCount ); + if ( rc != 0 ) { + // throw std::runtime_error( "PCRE2 pattern info failed with error code " + + // std::to_string( rc ) ); + mValid = false; + } +} + +RegEx::~RegEx() { + if ( mCompiledPattern != nullptr ) { + pcre2_code_free( reinterpret_cast( mCompiledPattern ) ); + } +} + +bool RegEx::matches( const char* stringSearch, int stringStartOffset, + PatternMatcher::Range* matchList, size_t stringLength ) const { + auto* compiledPattern = reinterpret_cast( mCompiledPattern ); + pcre2_match_data* match_data = pcre2_match_data_create_from_pattern( compiledPattern, NULL ); + + PCRE2_SPTR subject = reinterpret_cast( stringSearch ); + + int rc = pcre2_match( compiledPattern, // the compiled pattern + subject, // the subject string + stringLength, // the length of the subject + stringStartOffset, // start at offset in the subject + 0, // default options + match_data, // match data + NULL // match context + ); + + if ( rc < 0 ) { + pcre2_match_data_free( match_data ); + mMatchNum = 0; + // if ( rc == PCRE2_ERROR_NOMATCH ) + return false; + // else + // throw std::runtime_error( "PCRE2 matching error " + std::to_string( rc ) ); + } + + mMatchNum = rc; + + if ( matchList != nullptr ) { + PCRE2_SIZE* ovector = pcre2_get_ovector_pointer( match_data ); + for ( size_t i = 0; i < static_cast( rc ); ++i ) { + matchList[i].start = static_cast( ovector[2 * i] ); + matchList[i].end = static_cast( ovector[2 * i + 1] ); + } + } + + pcre2_match_data_free( match_data ); + return true; +} + +bool RegEx::matches( const std::string& str, PatternMatcher::Range* matchList, + int stringStartOffset ) const { + return matches( str.c_str(), stringStartOffset, matchList, str.size() ); +} + +const size_t& RegEx::getNumMatches() const { + return mMatchNum; +} + +RegExStorage::RegExStorage( const std::string& pattern ) : RegEx( "" ), mPatternStorage( pattern ) { + mPattern = std::string_view{ mPatternStorage }; +} + +RegExStorage::RegExStorage( std::string&& pattern ) : + RegEx( "" ), mPatternStorage( std::move( pattern ) ) { + mPattern = std::string_view{ mPatternStorage }; +} + +}} // namespace EE::System diff --git a/src/eepp/system/time.cpp b/src/eepp/system/time.cpp index a942f7623..0c617e354 100644 --- a/src/eepp/system/time.cpp +++ b/src/eepp/system/time.cpp @@ -59,12 +59,12 @@ Time Time::fromString( const std::string& str ) { } std::string Time::toString() const { - Uint64 totalSeconds = asSeconds(); + double totalSeconds = asSeconds(); - if ( asSeconds() < 1 ) { + if ( totalSeconds < 1 ) { return String::fromFloat( asMilliseconds(), "ms" ); } else if ( totalSeconds < 60 ) { - return String::format( "%lus", static_cast( totalSeconds ) ); + return String::fromFloat( totalSeconds, "s" ); } long minutesLeft = totalSeconds / 60; diff --git a/src/eepp/window/engine.cpp b/src/eepp/window/engine.cpp index 356743def..11696eb05 100644 --- a/src/eepp/window/engine.cpp +++ b/src/eepp/window/engine.cpp @@ -396,7 +396,7 @@ bool Engine::openURI( const std::string& url ) { if ( nullptr == getPlatformHelper() ) return false; - if ( !LuaPattern::matches( url, "^%w+://" ) ) + if ( !LuaPattern::hasMatches( url, "^%w+://" ) ) return openURI( "file://" + url ); if ( String::startsWith( url, "file://" ) ) { diff --git a/src/modules/eterm/src/eterm/ui/uiterminal.cpp b/src/modules/eterm/src/eterm/ui/uiterminal.cpp index 25f0b2e14..353ee458d 100644 --- a/src/modules/eterm/src/eterm/ui/uiterminal.cpp +++ b/src/modules/eterm/src/eterm/ui/uiterminal.cpp @@ -535,7 +535,7 @@ void UITerminal::createDefaultContextMenuOptions( UIPopUpMenu* menu ) { if ( mTerm->getTerminal()->hasSelection() ) { auto sel( mTerm->getTerminal()->getSelection() ); - if ( LuaPattern::matches( sel, LuaPattern::getURIPattern() ) ) { + if ( LuaPattern::hasMatches( sel, LuaPattern::getURIPattern() ) ) { menuAdd( menu, i18n( "uiterminal_open_link", "Open Link" ), "earth", "terminal-open-link" ); } diff --git a/src/tests/unit_tests/regex.cpp b/src/tests/unit_tests/regex.cpp new file mode 100644 index 000000000..6b612960a --- /dev/null +++ b/src/tests/unit_tests/regex.cpp @@ -0,0 +1,34 @@ +#include "utest.h" +#include +#include + +using namespace EE::System; + +UTEST( RegEx, basicTest ) { + RegEx regex( "\\d+" ); + std::string testStr = "The number is 42."; + PatternMatcher::Range matches[10]; + regex.matches( testStr, matches ); + EXPECT_EQ( regex.isValid(), true ); + EXPECT_EQ( regex.getNumMatches(), 1ul ); + for ( size_t i = 0; i < regex.getNumMatches(); ++i ) { + int start = matches[i].start; + int end = matches[i].end; + EXPECT_EQ( start, 14 ); + EXPECT_EQ( end, 16 ); + } +} + +UTEST( LuaPattern, basicTest ) { + LuaPattern regex( "%d+" ); + std::string testStr = "The number is 42."; + PatternMatcher::Range matches[10]; + regex.matches( testStr, matches ); + EXPECT_EQ( regex.getNumMatches(), 1ul ); + for ( size_t i = 0; i < regex.getNumMatches(); ++i ) { + int start = matches[i].start; + int end = matches[i].end; + EXPECT_EQ( start, 14 ); + EXPECT_EQ( end, 16 ); + } +} diff --git a/src/tests/unit_tests/textformat.cpp b/src/tests/unit_tests/textformat.cpp index b10b89ba2..4bef034d7 100644 --- a/src/tests/unit_tests/textformat.cpp +++ b/src/tests/unit_tests/textformat.cpp @@ -85,7 +85,7 @@ UTEST( TextFormat, autodetectProject ) { if ( "a" == extension || "zip" == extension || "dll" == extension || "dat" == extension || "cur" == extension || "icns" == extension || "wav" == extension || Image::isImageExtension( file.getFilepath() ) || - LuaPattern::matches( file.getFilepath(), "SDL2%-%d+%.%d+%.%d+" ) ) + LuaPattern::hasMatches( file.getFilepath(), "SDL2%-%d+%.%d+%.%d+" ) ) continue; IOStreamFile stream( file.getFilepath() ); auto expectedEncoding = getEncoding( file.getFileName() ); diff --git a/src/tools/ecode/docsearchcontroller.cpp b/src/tools/ecode/docsearchcontroller.cpp index 01a6c7300..c1a7d0b74 100644 --- a/src/tools/ecode/docsearchcontroller.cpp +++ b/src/tools/ecode/docsearchcontroller.cpp @@ -392,7 +392,7 @@ void DocSearchController::findAndReplace( SearchState& search, const String& rep if ( doc.hasSelection() && ( doc.getSelectedText() == txt || ( search.type == TextDocument::FindReplaceType::LuaPattern && - LuaPattern::matches( doc.getAllSelectedText().toUtf8(), txt.toUtf8() ) ) ) ) { + LuaPattern::hasMatches( doc.getAllSelectedText().toUtf8(), txt.toUtf8() ) ) ) ) { replaceSelection( search, repl ); } else { findNextText( search ); diff --git a/src/tools/ecode/globalsearchcontroller.cpp b/src/tools/ecode/globalsearchcontroller.cpp index 485e8e23a..16c557c9c 100644 --- a/src/tools/ecode/globalsearchcontroller.cpp +++ b/src/tools/ecode/globalsearchcontroller.cpp @@ -1,5 +1,5 @@ -#include "globalsearchcontroller.hpp" #include "ecode.hpp" +#include "globalsearchcontroller.hpp" #include "uitreeviewglobalsearch.hpp" namespace ecode { @@ -63,7 +63,7 @@ size_t GlobalSearchController::replaceInFiles( const std::string& replaceText, const ProjectSearch::Result& res = model->getResult(); bool hasCaptures = - model->isResultFromLuaPattern() && LuaPattern::find( replaceText, "$%d+" ).isValid(); + model->isResultFromLuaPattern() && LuaPattern::hasMatches( replaceText, "$%d+" ); if ( hasCaptures ) { for ( const auto& fileResult : res ) { diff --git a/src/tools/ecode/plugins/autocomplete/autocompleteplugin.cpp b/src/tools/ecode/plugins/autocomplete/autocompleteplugin.cpp index 400084475..7951a06ea 100644 --- a/src/tools/ecode/plugins/autocomplete/autocompleteplugin.cpp +++ b/src/tools/ecode/plugins/autocomplete/autocompleteplugin.cpp @@ -672,9 +672,9 @@ void AutoCompletePlugin::tryStartSnippetNav( const Suggestion& suggestion, UICod bool AutoCompletePlugin::hasCompleteSteps( const Suggestion& suggestion ) { if ( suggestion.kind != LSPCompletionItemKind::Snippet ) return false; - if ( LuaPattern::matches( suggestion.insertText, SNIPPET_PTRN1 ) || - LuaPattern::matches( suggestion.insertText, SNIPPET_PTRN2 ) || - LuaPattern::matches( suggestion.insertText, SNIPPET_PTRN3 ) ) { + if ( LuaPattern::hasMatches( suggestion.insertText, SNIPPET_PTRN1 ) || + LuaPattern::hasMatches( suggestion.insertText, SNIPPET_PTRN2 ) || + LuaPattern::hasMatches( suggestion.insertText, SNIPPET_PTRN3 ) ) { return true; } return false; diff --git a/src/tools/ecode/plugins/formatter/formatterplugin.cpp b/src/tools/ecode/plugins/formatter/formatterplugin.cpp index 76135efde..6b593c258 100644 --- a/src/tools/ecode/plugins/formatter/formatterplugin.cpp +++ b/src/tools/ecode/plugins/formatter/formatterplugin.cpp @@ -496,7 +496,7 @@ FormatterPlugin::Formatter FormatterPlugin::supportsFormatter( std::shared_ptr doc ) { for ( auto& linter : mLinters ) { for ( auto& ext : linter.files ) { - if ( LuaPattern::find( fileName, ext ).isValid() ) + if ( LuaPattern::hasMatches( fileName, ext ) ) return linter; auto& files = def.getFiles(); if ( std::find( files.begin(), files.end(), ext ) != files.end() ) { diff --git a/src/tools/ecode/plugins/lsp/lspclientservermanager.cpp b/src/tools/ecode/plugins/lsp/lspclientservermanager.cpp index ffcbd5b84..c9b655050 100644 --- a/src/tools/ecode/plugins/lsp/lspclientservermanager.cpp +++ b/src/tools/ecode/plugins/lsp/lspclientservermanager.cpp @@ -28,7 +28,7 @@ LSPClientServerManager::supportsLSP( const std::shared_ptr& doc ) for ( auto& lsp : mLSPs ) { for ( auto& ext : lsp.filePatterns ) { - if ( LuaPattern::find( fileName, ext ).isValid() ) { + if ( LuaPattern::hasMatches( fileName, ext ) ) { lsps.push_back( lsp ); break; } diff --git a/src/tools/ecode/terminalmanager.cpp b/src/tools/ecode/terminalmanager.cpp index 090428df4..65a918a9d 100644 --- a/src/tools/ecode/terminalmanager.cpp +++ b/src/tools/ecode/terminalmanager.cpp @@ -12,9 +12,9 @@ UITerminal* TerminalManager::createTerminalInSplitter( const std::string& workin bool fallback ) { #if EE_PLATFORM == EE_PLATFORM_WIN std::string os = Sys::getOSName( true ); - if ( !LuaPattern::matches( os, "Windows 1%d"sv ) && - !LuaPattern::matches( os, "Windows Server 201[69]"sv ) && - !LuaPattern::matches( os, "Windows Server 202%d"sv ) ) + if ( !LuaPattern::hasMatches( os, "Windows 1%d"sv ) && + !LuaPattern::hasMatches( os, "Windows Server 201[69]"sv ) && + !LuaPattern::hasMatches( os, "Windows Server 202%d"sv ) ) return nullptr; #endif