백준 9202 Boggle, Trie with 할당, 비할당 그리고 Hashing 비교

티스토리 뷰

알고리즘 문제/data structure

백준 9202 Boggle, Trie with 할당, 비할당 그리고 Hashing 비교

gaelim 2019. 4. 16. 20:51

1 : Hashing ( Open Addressing ), 2 : Trie without memory allocation, 3 : Trie with memory allocation

이 문제는 비교적 구현의 성격이 강한 문제이다.
해당 문제에 대해 포스팅을 남긴 이유는 삼성 B형과 성격이 비슷하기에 골랐다.

입력으로 들어온 단어들을 내 사전 (Trie) 로 저장해놓고, 4*4 배열을 규칙에 따라 순회하면서 생성된 단어를 Trie에서 탐색하는 것이 해당 문제의 풀이이다.

이 풀이 서술은 조금은 일반적인 서술로써, 구현에 대해 조금 구체적으로 이야기 해보려 한다.

1 트라이 with memory allocation

대부분이 알고있는 Trie 의 형태로써 Trie 노드가 자식 노드의 주소를 가지고 있는 것이다. 그리고 동적으로 관리하기 때문에 new, delete를 동반한다.

아래는 해당 문제에 대한 Trie 풀이 소스이다.
해당 문제 자체가 올림피아드 성격의 대회 문제이기에 구현성이 강하다. 따라서 이야기 할 내용에 비해 너무 긴 소스가 작성 되었다.

읽으시는 분들은 풀이의 구체적인 알고리즘 틀보다는, Trie 구조체 몸체와 al 변수가 자식 노드를 어떻게 관리하는지만 눈으로 흘겨보시면 된다. 특히, 자식 노드를 체크하는 go 함수 내부와 자식 노드를 생성하거나 자식 노드로 이동하는 Creating Trie 주석 아래 부분이다.

#include <iostream>
#include <string.h>
 
using namespace std;
struct Trie
{
  Trie* al[26];
  bool word;
  bool hit;
  Trie() { word = false; for (int i=0; i<26; i++) al[i] =0; hit=0;}
};
Trie* root;
 
char boggle[4][5]; // input 
bool v[4][4];
 
int score_table[10] = {0, 0, 0, 1, 1, 2, 3, 5, 11};
int Score;
char ansWord[9];
int mxLen;
char tempWord[9];
int hit;
 
void go (int x, int y , int depth, Trie* &cur)
{
  if ( depth == 9) return;
 
  tempWord[depth-1] = boggle[x][y];
  tempWord[depth] = 0;
 
  if (cur->word) // does exist in Trie
  {
    if ( !cur->hit )  // check if word duplicate in same test_case
    {
      cur->hit=true;
      Score += score_table[depth];
      hit++;
 
      int tmpLen = strlen(tempWord);
 
      if ( mxLen < tmpLen)
      {
        strcpy(ansWord, tempWord);
        mxLen = tmpLen;
      }
      else if ( mxLen == tmpLen )
      {
        if ( strcmp(ansWord, tempWord) >0) strcpy(ansWord, tempWord);
      }
    }
  }
 
  for (int i=-1; i<=1; i++)
  {
    for (int j=-1; j<=1; j++)
    {
 
      int nx = x +i, ny = y+j;
      if ( nx < 0 || ny <0 || nx >=4 || ny >= 4 ) continue;
      if( v[nx][ny] ) continue;
 
      Trie* next = cur->al[boggle[nx][ny]-'A'];
      if ( next )
      {
        v[nx][ny]=1;
        go(nx, ny, depth+1, next);
        v[nx][ny]=0;
      }
    }
  }
}
 
void init(Trie* cur)
{
  cur->hit=false;
  for (int i=0; i<26; i++)
  {
    if (cur->al[i]) init(cur->al[i]);
  }
}
 
int main()
{
 
  ios::sync_with_stdio(false);
  cin.tie(0); cout.tie(0);
  int n;
  cin >> n;
 
  char words[(int)3e5][9];
  for (int i=0; i<n; i++)
  {
    cin >> words[i];
  }
 
  /* Creating Trie */
  root = new Trie();
  for (int i=0; i<n; i++)
  {
    int len = strlen(words[i]);
 
    Trie* cur = root;
    for (int j=0; j<len; j++)
    {
      int idx = (words[i][j]-'A');
 
      if ( cur->al[idx] == 0 )
      {
        cur->al[idx] = new Trie();
      }
      cur= cur->al[idx];
    }
    cur->word = 1;
  }
 
  /* End of Creating */
 
  int T;
  cin >> T;
  for (int test_case=1; test_case<=T; test_case++)
  {
    init(root); // trie hit initializing
 
    memset(ansWord, 0, sizeof(ansWord));
    Score= hit = mxLen= 0;
    for (int i=0; i<4; i++)
    {
      cin >> boggle[i];
    }
 
    for (int i=0; i<4; i++)
    {
      for (int j=0; j<4; j++)
      {
        Trie* cur = root;
        Trie* next = cur->al[boggle[i][j]-'A'];
 
        if ( next ) 
        {
          v[i][j]=1;
          go(i,j,1, next);
          v[i][j]=0;
        }
      }
    }
    cout << Score << " " << ansWord << " " << hit << "\n";
  }
}
 
 
http://colorscripter.com/info#e" target="_blank" style="color:#4f4f4f; text-decoration:none">Colored by Color Scripter

2 트라이 without memory allocation

이제 조금은 흥미로운 이야기를 할까한다. 이 포스팅을 하게 된 이유이다. 트라이는 동적 할당이 없이 작성 할 수 있다.

소문자 알파벳만 가능하며 단어 길이가 최대 8자인 트라이를 상상할 때에 노드 수는 26^8 + 1개이다. 이는 2천억을 넘기는 수로 매우 큰 수이다.그러나 해당 문제는 단어가 30만개가 들어온다는 제약이 있다.

생각해보면 트라이에서 노드는 최대 30만 * 8 + 1 개가 존재할 수 있다. 최악의 경우, 30만개 단어에 속한 모든 알파벳이 공통된 sequence가 없이 내려간다고 하자(루트노드는 모두의 공통이므로 제외).
그러한 뻗어나감은 어쨋든 깊이 8을 넘길 수 없으며, 한 노드당 26개로 뻗어나갈 가능성이 있더라도 트리에 존재할 수 있는 알파벳 자체가 30만 * 8 개 이므로 노드 자체가 30만 + 1(루트노드) 밖에 존재할 수 없다.

이러한 점을 이용하여, 정적인 크기의 배열을 활용할 수 있다. 여기서 부모 노드는 자식 노드 저장을 전역으로 관리되는 trieIdx를 활용할 수 있다. 단, trieIdx = 0 은 루트 노드를 표현한다.

해당 접근법의 가장 큰 이점은, 동적 힙 메모리 관리에 따른 오버헤드가 없다는 점이다. 해당 문제는 동적 힙 메모리에 대한 delete 또는 free() 선언이 필요 없지만, 그런 것들이 반복되는 구조면 -특히, 여러 테스트케이스마다 새로운 단어들이 주어져서, 사전을 생성하는 성격의 문제 - 연산 자체의 오버헤드가 알고리즘 수행복잡도를 잡아 먹는 경우가 있다.

만약 수행시간을 기준으로 퍼센티지로 합격자/불합격자 컷팅을 하는 시험의 경우, 유용하게 사용될 수 있다. 삼성 B형이 그렇다고 들었다.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

#include <iostream>
#include <string.h>
 
using namespace std;
struct Trie
{
  int al[26];
  bool word;
  Trie() { word = false; for (int i=0; i<26; i++) al[i]=-1;}
};
 
int trieIdx;
Trie trie[(int)3e5*8+1];
 
char boggle[4][5]; // input 
bool wordChk[(int)3e5*9];
bool v[4][4];
 
int score_table[10] = {0, 0, 0, 1, 1, 2, 3, 5, 11};
int Score;
char ansWord[9];
int mxLen;
char tempWord[9];
int hit;
 
void go (int x, int y , int depth, int idx)
{
  if ( depth == 9) return;
 
  tempWord[depth-1] = boggle[x][y];
  tempWord[depth] = 0;
 
  if (trie[idx].word) // does exist in Trie
  {
    if (wordChk[idx] ==false )
    {
      wordChk[idx]=true;
      Score+=score_table[depth];
      hit++;
 
      int tmpLen = strlen(tempWord);
      if ( mxLen < tmpLen)
      {
        strcpy(ansWord, tempWord);
        mxLen = tmpLen;
      }
      else if ( mxLen == tmpLen )
      {
        if ( strcmp(ansWord, tempWord) >0) strcpy(ansWord, tempWord);
      }
    }
  }
 
  for (int i=-1; i<=1; i++)
  {
    for (int j=-1; j<=1; j++)
    {
      int nx = x +i, ny = y+j;
      if ( nx < 0 || ny <0 || nx >=4 || ny >= 4 ) continue;
      if( v[nx][ny] ) continue;
 
      int nextIdx = trie[idx].al[boggle[nx][ny]-'A'];
      if (nextIdx == -1) continue;
 
      v[nx][ny]=1;
      go(nx, ny, depth+1, nextIdx);
      v[nx][ny]=0;
    }
  }
}
 
 
int main()
{
  int n;
  cin >> n;
 
  char words[(int)3e5][9];
  for (int i=0; i<n; i++)
  {
    cin >> words[i];
  }
 
  /* Creating Trie */
  for (int i=0; i<n; i++)
  {
    int len = strlen(words[i]);
 
    Trie* cur = &trie[0];
    for (int j=0; j<len; j++)
    {
      int idx = (words[i][j]-'A');
      if ( cur->al[idx] == -1 ) cur->al[idx] = ++trieIdx;
      cur = &trie[cur->al[idx]]; 
    }
    cur->word = 1;
  }
 
  /* End of Creating */
 
  int T;
  cin >> T;
  for (int test_case=1; test_case<=T; test_case++)
  {
    memset(wordChk, false, sizeof(wordChk));
    memset(ansWord, 0, sizeof(ansWord));
    Score= hit = mxLen= 0;
    for (int i=0; i<4; i++)
    {
      cin >> boggle[i];
    }
 
    for (int i=0; i<4; i++)
    {
      for (int j=0; j<4; j++)
      {
 
        int nextIdx = trie[0].al[boggle[i][j]-'A'];
        if (nextIdx == -1) continue;
 
        v[i][j]=1;
        go(i,j,1, nextIdx);
        v[i][j]=0;
      }
    }
    cout << Score << " " << ansWord << " " << hit << "\n";
  }
}
 
 

cs

동적할당이 없는 트라이에 대한 내용은 아래 블로그에서 큰 영향을 받았다. 감사합니다.

https://blog.naver.com/proability/221478881381

[C++] 휴대폰 자판

어제 정적트라이로 문제를 풀고 재밌어서 트라이 문제 한문제 더 포스팅을 해볼까 한다확실히 굳혀졌다. ...

blog.naver.com

3 Hashing with Open Addressing

생성된 문자열에 대해 Hashing을 수행하여 테이블을 관리하는 것 또한 문제를 풀 수 있다.

나는 근본적으로, 해싱에 대해 불안함을 가지고 있다. 해싱은 이상적인 형태가 생성되리라 믿음에 의존하는 알고리즘이라고 생각하기 때문이다.

그러나 테스트 케이스가 해싱을 저격할 만큼 예리하지 않고 또는 그러한 접근법으로 풀기를 바라는 출제자가 있거나, 랜덤 테스트케이스일 경우 해싱은 충분히 좋은 결과를 낼 수 있다.

위는 테이블 크기를 단어의 갯수 * 2 로 하였을 때 결과이다. 메모리도 준수하고, 실행 시간 또한 그리 나쁘지 않다.

아래는 단어의 갯수의 30배를 넘기는 크기(1e7, 1e7+7) 또는 그 이하의 크기 (1e6, 1e6+7 ) 등에 대한 결과이다.

그렇게 결과 자체가 나쁘지않다. 그리고 구현자체 또한 복잡하지 않아 좋다. 그러나 중간 중간 볼수 있듯 충분하게 큰 테이블이 아닌경우 시간초과를 낼 수도 있으므로 주의 해야한다.

아래는 해싱을 활용한 풀이 소스이다.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

#include <iostream>
#include <string.h>
 
#define MAX_TABLE (int)3e5*2
 
using namespace std;
struct Hash
{
  int idx;
  Hash() { idx=-1; }
};
char words[(int)3e5][9];
Hash hashes[(int)6e5];
 
char boggle[4][5]; // input 
bool v[4][4];
bool wordChk[(int)3e5*9];
 
int score_table[10] = {0, 0, 0, 1, 1, 2, 3, 5, 11};
int Score;
char ansWord[9];
int mxLen;
char tempWord[9];
int hit;
 
unsigned long hashFunc(const char *str)
{
  unsigned long hash = 5381;
  int c;
  while (c = *str++) hash = (((hash << 5) + hash) + c) % MAX_TABLE; 
  return hash % MAX_TABLE;
}
 
void go (int x, int y , int depth)
{
  if ( depth == 9) return;
 
  tempWord[depth-1] = boggle[x][y];
  tempWord[depth] = 0;
 
  int idx = hashFunc(tempWord);
  while ( hashes[idx].idx !=  -1 )
  {
    int iidx = hashes[idx].idx;
    if ( strcmp (words[iidx], tempWord) == 0 )
    {
      if ( wordChk[iidx] ) break;
      wordChk[iidx]=true;
      Score+=score_table[depth];
      hit++;
 
      int tmpLen = strlen(tempWord);
      if ( mxLen < tmpLen)
      {
        strcpy(ansWord, tempWord);
        mxLen = tmpLen;
      }
      else if ( mxLen == tmpLen )
      {
        if ( strcmp(ansWord, tempWord) >0) strcpy(ansWord, tempWord);
      }
    }
    idx++;
    idx%=MAX_TABLE;
  }
 
  for (int i=-1; i<=1; i++)
  {
    for (int j=-1; j<=1; j++)
    {
      int nx = x +i, ny = y+j;
      if ( nx < 0 || ny <0 || nx >=4 || ny >= 4 ) continue;
      if( v[nx][ny] ) continue;
 
      v[nx][ny]=1;
      go(nx, ny, depth+1);
      v[nx][ny]=0;
    }
  }
}
 
int main()
{
  int n;
  cin >> n;
 
  for (int i=0; i<n; i++)
  {
    cin >> words[i];
  }
 
  /* HASHING */
  for (int i=0; i<n; i++)
  {
    int idx = hashFunc(words[i]);
    while ( hashes[idx].idx != -1 )
    {
      idx++;
      idx%=MAX_TABLE;
    }
    hashes[idx].idx = i;
  }
 
  /* End of Creating */
 
  int T;
  cin >> T;
  for (int test_case=1; test_case<=T; test_case++)
  {
 
    memset(wordChk, false, sizeof(wordChk));
    memset(ansWord, 0, sizeof(ansWord));
    Score= hit = mxLen= 0;
    for (int i=0; i<4; i++) cin >> boggle[i];
    for (int i=0; i<4; i++)
    {
      for (int j=0; j<4; j++)
      {
        v[i][j]=1;
        go(i,j,1);
        v[i][j]=0;
      }
    }
    cout << Score << " " << ansWord << " " << hit << "\n";
  }
}
 
 

 

'알고리즘 문제 > data structure' 카테고리의 다른 글

백준 6549 히스토그램에서 가장 큰 직사각형 (0)	2019.02.16
백준1620 나는야 포켓몬 마스터 이다솜 (0)	2018.11.23
swexpert 3135 홍준이의 사전놀이 (0)	2018.03.10
911E Stack Sorting (0)	2017.12.31

공지사항

소스코드 중 링크가 존재하지 않다고 뜨는 것은⋯

최근에 올라온 글

최근에 달린 댓글

Total

Today

Yesterday

링크

TAG more

« 2024/04 »
일	월	화	수	목	금	토
	1	2	3	4	5	6
7	8	9	10	11	12	13
14	15	16	17	18	19	20
21	22	23	24	25	26	27
28	29	30

글 보관함

welcome!

티스토리 뷰