Boyer Moore Horspool Algorithm

First Step

๋ฌธ์ž์—ด์—์„œ ์ผ์ •ํ•œ ๊ทœ์น™์„ ์ฐพ๋Š”๊ฒƒ์€ ๋‹ค์–‘ํ•˜๊ฒŒ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ๊ณ  ๊ฐœ๋ฐœ๊ณผ์ •์—์„œ ์ž์ฃผ, ์‰ฝ๊ฒŒ ์ ‘ํ•  ์ˆ˜ ์žˆ์–ด์š”. ๋˜, ๋‹ค์–‘ํ•œ ํ”„๋กœ๊ทธ๋žจ์—์„œ ๋งŽ์ด ์“ฐ์ด๊ธฐ๋„ ํ•ฉ๋‹ˆ๋‹ค.

๊ทธ๋ฆฌ๊ณ  ๊ทธ๋Ÿฐ ๋ฌธ์ž์—ด ๊ฒ€์ƒ‰ ์•Œ๊ณ ๋ฆฌ์ฆ˜์€ ํฌ๋ˆ„์Šค-๋ชจ๋ฆฌ์Šค-ํ”„๋žซ ์•Œ๊ณ ๋ฆฌ์ฆ˜, ๋ณด์ด์–ด-๋ฌด์–ด-ํ˜ธ์Šคํ’€ ์•Œ๊ณ ๋ฆฌ์ฆ˜, ๋ผ๋นˆ-์นดํ”„ ์•Œ๊ณ ๋ฆฌ์ฆ˜ ๋“ฑ์„ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.

์ €๋Š” ๊ทธ๋Ÿฐ ์•Œ๊ณ ๋ฆฌ์ฆ˜ ์ค‘ BMH ์•Œ๊ณ ๋ฆฌ์ฆ˜์— ๋Œ€ํ•ด ์ด์•ผ๊ธฐ ํ•ด๋ณด๋ ค๊ณ  ํ•ด์š”. ์ด๊ฑธ ๋จผ์ € ์ด์•ผ๊ธฐํ•˜๋Š” ์ด์œ ๋Š” ๊ฐ€์žฅ ์‰ฝ๊ธฐ ๋•Œ๋ฌธ์ž…๋‹ˆ๋‹ค!

Naive string search visualize

BMH ์•Œ๊ณ ๋ฆฌ์ฆ˜์„ ์ด์•ผ๊ธฐ ํ•˜๊ธฐ ์ „์— ์ผ๋ฐ˜์ ์ธ ๋ฌธ์ž์—ด ๊ฒ€์ƒ‰์„ ์ด์•ผ๊ธฐ ํ•˜๊ณ  ์‹ถ์–ด์š”. ์ผ๋ฐ˜์ ์ธ ๋ฌธ์ž์—ด ๊ฒ€์ƒ‰์˜ ๋ฐฉ๋ฒ•์„ ๊ทธ๋ฆผ์œผ๋กœ ๋ณด๋ฉด ๋‹ค์Œ๊ณผ ๊ฐ™์•„์š”.

Naive string search code

// C++ program for Naive Pattern 
// Searching algorithm 
#include <bits/stdc++.h> 
using namespace std; 
  
void search(char* pat, char* txt) 
{ 
    int M = strlen(pat); 
    int N = strlen(txt); 
  
    /* A loop to slide pat[] one by one */
    for (int i = 0; i <= N - M; i++) { 
        int j; 
  
        /* For current index i, check for pattern match */
        for (j = 0; j < M; j++) 
            if (txt[i + j] != pat[j]) 
                break; 
  
        if (j == M) // if pat[0...M-1] = txt[i, i+1, ...i+M-1] 
            cout << "Pattern found at index "
                 << i << endl; 
    } 
} 
  
// Driver Code 
int main() 
{ 
    char txt[] = "AABAACAADAABAAABAA"; 
    char pat[] = "AABA"; 
    search(pat, txt); 
    return 0; 
} 
  
// This code is contributed 
// by Akanksha Rai 

Output Pattern found at index 0 Pattern found at index 9 Pattern found at index 13

์ด์ฒ˜๋Ÿผ Naive string search๋Š” ๊ฒฐ๊ณผ๊ฐ’์„ ์–ป๋Š” ๊ฐ€์žฅ ์ „ํ†ต์ ์ธ ๋ฐฉ์‹์ด์—์š”. ํ•˜์ง€๋งŒ ์ด๋Ÿฐ ๊ธ€์ž๊ฐ€ ์ˆ˜์‹ญ์–ต๊ฐœ๊ฐ€ ๋œ๋‹ค๋ฉด? ํ•˜๋‚˜ํ•˜๋‚˜ ์ผ์ผํžˆ ๋น„๊ตํ•˜๋ ค๋ฉด ๊ต‰์žฅํžˆ ๋งŽ์€ ์‹œ๊ฐ„์ด ๊ฑธ๋ฆด๊บผ์—์š”. ๊ทธ๋ž˜์„œ ์ €๋Š” BMH ์•Œ๊ณ ๋ฆฌ์ฆ˜์„ ์†Œ๊ฐœํ•ด๋ณด๋ ค๊ณ  ํ•ด์š”.

Boyer-Moore-Horspool search theory

BMH ์•Œ๊ณ ๋ฆฌ์ฆ˜์€ ๋‹ค์Œ๊ณผ ๊ฐ™์€ ๋ฌธ์ž์—ด ๊ณ„์‚ฐ ๊ณต์‹์„ ์ด์šฉํ•ด ํ‘œ๋ฅผ ๋งŒ๋“ค์–ด ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค. 1. ๊ฒ€์ƒ‰ ํ‚ค์›Œ๋“œ์˜ ์ด ๊ธธ์ด๋ฅผ ๊ตฌํ•œ๋‹ค. ****=> ์˜ˆ๋ฅผ ๋“ค์–ด TOOTH ๋ผ๋Š” ๋‹จ์–ด๊ฐ€ ์žˆ๋‹ค๋ฉด ์ด๊ธธ์ด๋Š” 5๊ฐ€ ๋ ๊บผ์—์š”. 2. ๊ฒ€์ƒ‰ํ•  ํ‚ค์›Œ๋“œ์—์„œ ์กด์žฌํ•˜๋Š” ๋ฌธ์ž์˜ index๋กœ ์ด๋ฃจ์–ด์ง„ ํ‘œ๋กœ ๋งŒ๋“ ๋‹ค.

TOOTH

0

1

2

3

4

3. ํ•ด๋‹น ํ‘œ์—์„œ index๋ฅผ (์ด๊ธธ์ด - index - 1) ๋กœ ๋ณ€๊ฒฝํ•œ๋‹ค.

TOOTH

4

3

2

1

0

4. ํ‘œ์—์„œ ๋งˆ์ง€๋ง‰ ๋ฌธ์ž์™€ ํ‚ค์›Œ๋“œ์— ์กด์žฌํ•˜์ง€ ์•Š๋Š” ๋‹ค๋ฅธ ๋ฌธ์ž๋Š” ์ด๊ธธ์ด๋กœ ๋ณ€๊ฒฝํ•œ๋‹ค.

TOOTH*

4

3

2

1

5

5

5. ๋งŒ์•ฝ ๊ฐ™์€ ๋ฌธ๊ฐ€ ์žˆ๋‹ค๋ฉด ํ•ด๋‹น ๋ฌธ์ž์˜๋ฅผ ๊ฐ€์žฅ ์ž‘์€ ์ˆซ์ž๋กœ ๊ธฐ์ค€์„ ์žก๋Š”๋‹ค.

TOH*

1

2

5

5

6. ์ด๋ฅผ ์ด์šฉํ•ด์„œ ์ˆœ์ฐจ์ ์œผ๋กœ ๋ฌธ์ž๊ฐ€ ์ผ์น˜ํ•˜๋Š”์ง€ ํ™•์ธํ•˜๊ณ  ๋งค์น˜๊ฐ€ ์•ˆ๋œ๋‹ค๋ฉด ํ‘œ์˜ value ๋งŒํผ ์ด๋™์‹œํ‚จ๋‹ค.

์ฆ‰ ์ด BMH ์•Œ๊ณ ๋ฆฌ์ฆ˜์€ ๋งค์น˜๊ฐ€ ๋˜์ง€ ์•Š์•˜์„๋•Œ ํ˜„์žฌ ๊ฒ€์ƒ‰ index๋ฅผ ๊ณ„์‚ฐ ๊ณต์‹๋งŒํผ ์ด๋™์‹œํ‚ค๋Š”๊ฒŒ ํ•ต์‹ฌ์ด์—์š”.

BMH algorithm visualize

BMH Algorithm code

์œ„์˜ ์ด๋ก ์„ ์ฝ”๋“œ๋กœ ๋‚˜ํƒ€๋‚ด๋ฉด ์•„๋ž˜์™€ ๊ฐ™์„๊บผ์—์š”.

//created by hyunjin.gil 
#include <bits/stdc++.h>

using namespace std;

string alphabets="ABCDEFGHIJKLMNOPQRSTUVWXYZ";

void initialize(map<char,unsigned int> & tables,const string & pattern)
{
    
    for(int i = 0; i < alphabets.size(); ++i)
    {
        const char ch = alphabets[i];
        tables[ch] = pattern.size();
    }
    
}

void makeTable(map<char,unsigned int> & tables, const string & pattern)
{
    //last index is pattern's length
    for(int i = pattern.size()-2; i >=0; --i)
    {
        const char ch = pattern[i];
        const int value = pattern.size() - i - 1;
        
        if(tables[ch] > value){
            tables[ch] = value;
        }
    }
}

vector<unsigned int> findString(map<char, unsigned int>& tables, const string& pattern, const string& str)
{
    unsigned int offset = 0;

    const auto patternLastIndex = pattern.size() - 1;

    vector<unsigned int> matches;

    while (offset <= str.size())
    {
        auto currentIndex = patternLastIndex;
        if (currentIndex + offset > str.size())
            break;

        while (pattern[currentIndex] == str[currentIndex + offset])
        {
            if (currentIndex == 0)
            {
                matches.push_back(offset);
                offset += 1;
                break;
            }
            currentIndex--;
        }

        const char rightMostChar = str[patternLastIndex + offset];
        if (tables.find(rightMostChar) != tables.end())
        {
            offset += tables[rightMostChar];
        }
        else
        {
            offset += pattern.size();
        }
    }

    return matches;
}

int main() {
    
    const string pattern = "TOOTH";
    const string str = "TRUSTHARDTOOTHBRUSHES";
    
    map<char,unsigned int> tables;
    
    initialize(tables, pattern);
    makeTable(tables, pattern);
    int position = findString(tables, pattern, str);
    
    cout << position << endl;
    
    return 0;
}

Reference.

Last updated