From d6e9f49197c95c75dcd985f15d5f3d400de17faf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mart=C3=ADn=20Lucas=20Golini?= Date: Sun, 9 Aug 2020 00:27:19 -0300 Subject: [PATCH] Added Boyer-Moore-Horspool fast string search algorithm to String. Improved project search performance by using the BMH algorithm. --- include/eepp/core/string.hpp | 57 +++++---- projects/linux/ee.creator.user | 6 +- src/eepp/core/string.cpp | 101 ++++++++++++++++ src/eepp/ui/doc/syntaxdefinitionmanager.cpp | 4 +- src/eepp/ui/doc/textdocument.cpp | 3 + src/tools/codeeditor/codeeditor.cpp | 24 ++-- src/tools/codeeditor/projectsearch.cpp | 125 +++++++++++++++++++- src/tools/codeeditor/projectsearch.hpp | 12 +- 8 files changed, 284 insertions(+), 48 deletions(-) diff --git a/include/eepp/core/string.hpp b/include/eepp/core/string.hpp index 809194bae..48b725527 100644 --- a/include/eepp/core/string.hpp +++ b/include/eepp/core/string.hpp @@ -1,33 +1,3 @@ -//////////////////////////////////////////////////////////// -// -// SFML - Simple and Fast Multimedia Library -// Copyright (C) 2007-2009 Laurent Gomila (laurent.gom@gmail.com) -// -// This software is provided 'as-is', without any express or implied warranty. -// In no event will the authors be held liable for any damages arising from the use of this -// software. -// -// Permission is granted to anyone to use this software for any purpose, -// including commercial applications, and to alter it and redistribute it freely, -// subject to the following restrictions: -// -// 1. The origin of this software must not be misrepresented; -// you must not claim that you wrote the original software. -// If you use this software in a product, an acknowledgment -// in the product documentation would be appreciated but is not required. -// -// 2. Altered source versions must be plainly marked as such, -// and must not be misrepresented as being the original software. -// -// 3. This notice may not be removed or altered from any source distribution. -// -//////////////////////////////////////////////////////////// -/* -** The class was modified to fit EEPP own needs. This is not the original implementation from SFML2. -** Functions and methods are the same that in std::string to facilitate portability. -** Also added a lot of utilities for string manipulation -**/ - #ifndef EE_STRING_HPP #define EE_STRING_HPP @@ -43,6 +13,12 @@ namespace EE { +/* +** The class was modified to fit EEPP own needs. This is not the original implementation from SFML2. +** Functions and methods are the same that in std::string to facilitate portability. +** Also added a lot of utilities for string manipulation +**/ + /** Utility string class that automatically handles conversions between types and encodings **/ class EE_API String { public: @@ -56,6 +32,27 @@ class EE_API String { static const std::size_t InvalidPos; ///< Represents an invalid position in the string + /** Boyer–Moore–Horspool fast string search. */ + class EE_API BMH { + public: + typedef std::vector OccTable; + + static const OccTable createOccTable( const unsigned char* needle, size_t needleLength ); + + /** @returns haystackLength if not found, otherwise the position */ + static size_t search( const unsigned char* haystack, size_t haystackLength, const unsigned char* needle, + const size_t needleLength, + const OccTable& occ ); + + /** @returns -1 if not found otherwise the position */ + static Int64 find( const std::string& haystack, const std::string& needle, + const size_t& haystackOffset, const OccTable& occ ); + + /** @returns -1 if not found otherwise the position */ + static Int64 find( const std::string& haystack, const std::string& needle, + const size_t& haystackOffset = 0 ); + }; + /** @return string hash */ static constexpr HashType hash( const char* str ) { //! djb2 diff --git a/projects/linux/ee.creator.user b/projects/linux/ee.creator.user index 94228d6c7..70c35dba3 100644 --- a/projects/linux/ee.creator.user +++ b/projects/linux/ee.creator.user @@ -1,6 +1,6 @@ - + EnvironmentId @@ -89,7 +89,7 @@ {6d057187-158a-4883-8d5b-d470a6b6b025} 10 0 - 18 + 19 ../../make/linux @@ -1847,7 +1847,7 @@ ecode-debug ProjectExplorer.CustomExecutableRunConfiguration - ../src/eepp/ui/doc/textdocument.cpp + ../ false true diff --git a/src/eepp/core/string.cpp b/src/eepp/core/string.cpp index d6ac6b3e7..705ec477e 100644 --- a/src/eepp/core/string.cpp +++ b/src/eepp/core/string.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -10,6 +11,106 @@ namespace EE { const std::size_t String::InvalidPos = StringType::npos; +/* + * Originally written by Joel Yliluoma + * http://en.wikipedia.org/wiki/Talk:Boyer%E2%80%93Moore_string_search_algorithm#Common_functions + * + * Copyright (c) 2008 Joel Yliluoma + * Copyright (c) 2010 Hongli Lai + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* This function creates an occ table to be used by the search algorithms. */ +/* It only needs to be created once per a needle to search. */ +const String::BMH::OccTable String::BMH::createOccTable( const unsigned char* needle, + size_t needleLength ) { + OccTable occ( + UCHAR_MAX + 1, + needleLength ); // initialize a table of UCHAR_MAX+1 elements to value needle_length + + /* Populate it with the analysis of the needle */ + /* But ignoring the last letter */ + if ( needleLength >= 1 ) { + const size_t needleLengthMinus1 = needleLength - 1; + for ( size_t a = 0; a < needleLengthMinus1; ++a ) + occ[needle[a]] = needleLengthMinus1 - a; + } + return occ; +} + +/* A Boyer-Moore-Horspool search algorithm. */ +/* If it finds the needle, it returns an offset to haystack from which + * the needle was found. Otherwise, it returns haystack_length. + */ +size_t String::BMH::search( const unsigned char* haystack, size_t haystackLength, + const unsigned char* needle, const size_t needleLength, + const OccTable& occ ) { + if ( needleLength > haystackLength ) + return haystackLength; + if ( needleLength == 1 ) { + const unsigned char* result = + (const unsigned char*)std::memchr( haystack, *needle, haystackLength ); + return result ? size_t( result - haystack ) : haystackLength; + } + + const size_t needleLengthMinus1 = needleLength - 1; + + const unsigned char lastNeedleChar = needle[needleLengthMinus1]; + + size_t haystackPosition = 0; + while ( haystackPosition <= haystackLength - needleLength ) { + const unsigned char occChar = haystack[haystackPosition + needleLengthMinus1]; + + // The author modified this part. Original algorithm matches needle right-to-left. + // This code calls memcmp() (usually matches left-to-right) after matching the last + // character, thereby incorporating some ideas from + // "Tuning the Boyer-Moore-Horspool String Searching Algorithm" + // by Timo Raita, 1992. + if ( lastNeedleChar == occChar && + std::memcmp( needle, haystack + haystackPosition, needleLengthMinus1 ) == 0 ) { + return haystackPosition; + } + + haystackPosition += occ[occChar]; + } + return haystackLength; +} + +Int64 String::BMH::find( const std::string& haystack, const std::string& needle, + const size_t& haystackOffset, const OccTable& occ ) { + size_t result = search( (const unsigned char*)haystack.c_str() + haystackOffset, + haystack.size() - haystackOffset, (const unsigned char*)needle.c_str(), + needle.size(), occ ); + if ( result == haystack.size() - haystackOffset ) { + return -1; + } else { + return (Int64)haystackOffset + result; + } +} + +Int64 String::BMH::find( const std::string& haystack, const std::string& needle, + const size_t& haystackOffset ) { + const auto occ = createOccTable( (const unsigned char*)needle.c_str(), needle.size() ); + return find( haystack, needle, haystackOffset, occ ); +} + String::HashType String::hash( const std::string& str ) { return String::hash( str.c_str() ); } diff --git a/src/eepp/ui/doc/syntaxdefinitionmanager.cpp b/src/eepp/ui/doc/syntaxdefinitionmanager.cpp index 9d916b4f4..3e1f35783 100644 --- a/src/eepp/ui/doc/syntaxdefinitionmanager.cpp +++ b/src/eepp/ui/doc/syntaxdefinitionmanager.cpp @@ -879,10 +879,10 @@ SyntaxDefinitionManager::SyntaxDefinitionManager() { add( {"Config File", {"%.ini$", "%.conf$", "%.desktop$", "%.service$", "%.cfg$", "Doxyfile"}, { - {{"%s?#%x+"}, "string"}, - {{"[%a_][%w-+_%s%p]*%f[=]"}, "keyword"}, {{"^#.-\n"}, "comment"}, {{"%s#.-\n"}, "comment"}, + {{"%s?#%x+"}, "string"}, + {{"[%a_][%w-+_%s%p]*%f[=]"}, "keyword"}, {{"\"", "\"", "\\"}, "string"}, {{"'", "'", "\\"}, "string"}, {{"^%[.-%]"}, "keyword2"}, diff --git a/src/eepp/ui/doc/textdocument.cpp b/src/eepp/ui/doc/textdocument.cpp index d397c039a..1fe3e2227 100644 --- a/src/eepp/ui/doc/textdocument.cpp +++ b/src/eepp/ui/doc/textdocument.cpp @@ -189,6 +189,9 @@ void TextDocument::guessIndentType() { mIndentType = IndentType::IndentSpaces; mIndentWidth = guessWidth.begin()->first; } + + if ( mIndentWidth == 0 ) + mIndentWidth = 4; } void TextDocument::resetSyntax() { diff --git a/src/tools/codeeditor/codeeditor.cpp b/src/tools/codeeditor/codeeditor.cpp index 2dd106485..46e2c680c 100644 --- a/src/tools/codeeditor/codeeditor.cpp +++ b/src/tools/codeeditor/codeeditor.cpp @@ -650,10 +650,10 @@ void App::initGlobalSearchBar() { } ); }; UIPushButton* searchButton = mGlobalSearchBarLayout->find( "search" ); - UICheckBox* caseSensitiveBox = mGlobalSearchBarLayout->find( "case_sensitive" ); + UICheckBox* caseSensitiveChk = mGlobalSearchBarLayout->find( "case_sensitive" ); UIWidget* searchBarClose = mGlobalSearchBarLayout->find( "global_searchbar_close" ); mGlobalSearchInput = mGlobalSearchBarLayout->find( "global_search_find" ); - mGlobalSearchBarLayout->addCommand( "search-in-files", [&, caseSensitiveBox] { + mGlobalSearchBarLayout->addCommand( "search-in-files", [&, caseSensitiveChk] { if ( mDirTree && mDirTree->getFilesCount() > 0 && !mGlobalSearchInput->getText().empty() ) { UILoader* loader = UILoader::New(); loader->setId( "loader " ); @@ -665,7 +665,7 @@ void App::initGlobalSearchBar() { mGlobalSearchTree->getSize() * 0.5f - loader->getSize() * 0.5f ); Clock* clock = eeNew( Clock, () ); std::string search( mGlobalSearchInput->getText().toUtf8() ); - ProjectSearch::find( + ProjectSearch::findHorspool( mDirTree->getFiles(), search, #if EE_PLATFORM != EE_PLATFORM_EMSCRIPTEN mThreadPool, @@ -683,7 +683,7 @@ void App::initGlobalSearchBar() { loader->close(); } ); }, - caseSensitiveBox->isChecked() ); + caseSensitiveChk->isChecked() ); } } ); mGlobalSearchBarLayout->addCommand( "close-global-searchbar", [&] { @@ -693,6 +693,10 @@ void App::initGlobalSearchBar() { } ); mGlobalSearchBarLayout->getKeyBindings().addKeybindsString( { {"escape", "close-global-searchbar"}, + {"ctrl+s", "change-case"}, + } ); + mGlobalSearchBarLayout->addCommand( "change-case", [&, caseSensitiveChk] { + caseSensitiveChk->setChecked( !caseSensitiveChk->isChecked() ); } ); mGlobalSearchInput->addEventListener( Event::OnPressEnter, [&]( const Event* ) { if ( mGlobalSearchInput->hasFocus() ) { @@ -1325,12 +1329,12 @@ void App::updateDocumentMenu() { ->asType() ->setActive( doc.getAutoDetectIndentType() ); - mDocMenu->find( "indent_width" ) - ->asType() - ->getSubMenu() - ->find( String::format( "indent_width_%d", doc.getIndentWidth() ) ) - ->asType() - ->setActive( true ); + auto* curIndent = mDocMenu->find( "indent_width" ) + ->asType() + ->getSubMenu() + ->find( String::format( "indent_width_%d", doc.getIndentWidth() ) ); + if ( curIndent ) + curIndent->asType()->setActive( true ); mDocMenu->find( "indent_type" ) ->asType() diff --git a/src/tools/codeeditor/projectsearch.cpp b/src/tools/codeeditor/projectsearch.cpp index a4c3ee6f4..657ee35f1 100644 --- a/src/tools/codeeditor/projectsearch.cpp +++ b/src/tools/codeeditor/projectsearch.cpp @@ -1,4 +1,79 @@ #include "projectsearch.hpp" +#include + +static int countNewLines( const std::string& text, const size_t& start, const size_t& end ) { + const char* startPtr = text.c_str() + start; + const char* endPtr = text.c_str() + end; + size_t count = 0; + do { + if ( '\n' == *startPtr ) + count++; + } while ( ++startPtr && startPtr != endPtr ); + return count; +} + +static std::string textLine( const std::string& fileText, const size_t& fromPos, size_t& relCol ) { + size_t start = 0; + size_t end = 0; + const char* stringStartPtr = fileText.c_str(); + const char* startPtr = fileText.c_str() + fromPos; + const char* ptr = startPtr; + while ( stringStartPtr != ptr && *--ptr != '\n' ) { + } + const char* nlStartPtr = ptr + 1; + start = ptr - stringStartPtr + 1; + ptr = startPtr; + while ( ++ptr && *ptr != '\n' ) { + } + end = ptr - stringStartPtr; + relCol = startPtr - nlStartPtr; + return fileText.substr( start, end - start ); +} + +static std::vector +searchInFileHorspool( const std::string& file, const std::string& text, const bool& caseSensitive, + const String::BMH::OccTable& occ ) { + std::vector res; + std::string fileText; + Int64 lSearchRes = 0; + Int64 searchRes = 0; + size_t totNl = 0; + FileSystem::fileGet( file, fileText ); + if ( !caseSensitive ) { + std::string fileTextOriginal( fileText ); + String::toLowerInPlace( fileText ); + do { + searchRes = String::BMH::find( fileText, text, searchRes, occ ); + if ( searchRes != -1 ) { + TextPosition pos; + size_t relCol; + totNl += countNewLines( fileText, lSearchRes, searchRes ); + std::string str = textLine( fileTextOriginal, searchRes, relCol ); + pos.setLine( totNl ); + pos.setColumn( relCol ); + res.push_back( {str, pos} ); + lSearchRes = searchRes; + searchRes += text.size(); + } + } while ( searchRes != -1 ); + } else { + do { + searchRes = String::BMH::find( fileText, text, searchRes, occ ); + if ( searchRes != -1 ) { + TextPosition pos; + size_t relCol; + totNl += countNewLines( fileText, lSearchRes, searchRes ); + std::string str = textLine( fileText, searchRes, relCol ); + pos.setLine( totNl ); + pos.setColumn( relCol ); + res.push_back( {str, pos} ); + lSearchRes = searchRes; + searchRes += text.size(); + } + } while ( searchRes != -1 ); + } + return res; +} static std::vector searchInFile( const std::string& file, const std::string& text, const bool& caseSensitive ) { @@ -30,6 +105,19 @@ void ProjectSearch::find( const std::vector files, const std::strin result( res ); } +void ProjectSearch::findHorspool( const std::vector files, const std::string& string, + ResultCb result, bool caseSensitive ) { + Result res; + const auto occ = + String::BMH::createOccTable( (const unsigned char*)string.c_str(), string.size() ); + for ( auto& file : files ) { + auto fileRes = searchInFileHorspool( file, string, caseSensitive, occ ); + if ( !fileRes.empty() ) + res.push_back( {file, fileRes} ); + } + result( res ); +} + struct FindData { Mutex resMutex; Mutex countMutex; @@ -37,7 +125,7 @@ struct FindData { ProjectSearch::Result res; }; -void ProjectSearch::find( const std::vector files, const std::string& string, +void ProjectSearch::find( const std::vector files, std::string string, std::shared_ptr pool, ResultCb result, bool caseSensitive ) { if ( files.empty() ) result( {} ); @@ -66,3 +154,38 @@ void ProjectSearch::find( const std::vector files, const std::strin } ); } } + +void ProjectSearch::findHorspool( const std::vector files, std::string string, + std::shared_ptr pool, ResultCb result, + bool caseSensitive ) { + if ( files.empty() ) + result( {} ); + FindData* findData = eeNew( FindData, () ); + findData->resCount = files.size(); + if ( !caseSensitive ) + String::toLowerInPlace( string ); + const auto occ = + String::BMH::createOccTable( (const unsigned char*)string.c_str(), string.size() ); + for ( auto& file : files ) { + pool->run( + [findData, file, string, caseSensitive, occ] { + auto fileRes = searchInFileHorspool( file, string, caseSensitive, occ ); + if ( !fileRes.empty() ) { + Lock l( findData->resMutex ); + findData->res.push_back( {file, fileRes} ); + } + }, + [result, findData] { + int count; + { + Lock l( findData->countMutex ); + findData->resCount--; + count = findData->resCount; + } + if ( count == 0 ) { + result( findData->res ); + eeDelete( findData ); + } + } ); + } +} diff --git a/src/tools/codeeditor/projectsearch.hpp b/src/tools/codeeditor/projectsearch.hpp index 7ad7a9657..d7a3ca0ca 100644 --- a/src/tools/codeeditor/projectsearch.hpp +++ b/src/tools/codeeditor/projectsearch.hpp @@ -83,7 +83,8 @@ class ProjectSearch { case FileOrPosition: return Variant( String::format( "%6lld %s", - mResult[index.internalId()].results[index.row()].position.line(), + mResult[index.internalId()].results[index.row()].position.line() + + 1, mResult[index.internalId()].results[index.row()].line.c_str() ) ); } } @@ -124,8 +125,15 @@ class ProjectSearch { static void find( const std::vector files, const std::string& string, ResultCb result, bool caseSensitive ); - static void find( const std::vector files, const std::string& string, + static void find( const std::vector files, std::string string, std::shared_ptr pool, ResultCb result, bool caseSensitive ); + + static void findHorspool( const std::vector files, const std::string& string, + ResultCb result, bool caseSensitive ); + + static void findHorspool( const std::vector files, std::string string, + std::shared_ptr pool, ResultCb result, + bool caseSensitive ); }; #endif // PROJECTSEARCH_HPP