Ternary Search Tree Implementation in C++ - cook the code

Friday, 30 June 2017

Ternary Search Tree Implementation in C++

A Ternary Search Tree is a trie which leverages concepts of Binary Search Tree as well. A Ternary Search Tree is as memory efficient as Binary Search Trees and time efficient as a Trie.

It is an efficient data structure to store and search large number of strings.

A node in a Ternary Search Tree comprises of these fields :

  • Left pointer - Points to Ternary Search Tree containing all strings alphabetically lesser than current node's data
  • Right pointer - Points to Ternary Search Tree containing all strings alphabetically greater than current node's data
  • Equal pointer - Points to Ternary Search Tree containing all strings alphabetically equal to current node's data
  • End of string flag - Flag indicating the end of string
  • Data - Actual data in the form of single character
Ternary Search Tree Node
For example, consider adding these strings in the same order into a Ternary Search Tree :
  1. "Lead"
  2. "Leader"
  3. "Leads"
  4. "Late"
  5. "State"
Let's build a visualization of ternary search tree out of above data :
  1. "Lead"
  2. "Leader"

  3. "Leads"

  4. "Late"

  5. "State"

9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
//TST.h
#ifndef TST_H
#define TST_H
//#define DEBUG_PROGRAM_MEMORY
//Node of a Ternary Search Tree
typedef struct TSTNode{
 char data; //Actual data stored in form of character
 bool bEOS; //flag marking end of string
 struct TSTNode* left;   //All character data less than this node
 struct TSTNode* eq;  //All character data equal to this node
 struct TSTNode* right; //All character data greater than this node
}TSTNode;
//Inserts a string in the TST
TSTNode* Insert(TSTNode* root, char* str);
//Prints all strings in the TST
void PrintAllStringsInTST(TSTNode* root);
//Gets the length of maximum length string in TST
int MaxLenStringLen(TSTNode *root);
//Deleted the complete TST
void DeleteTST(TSTNode *root);
//Search a pattern in TST
bool SearchTST(TSTNode *root, char* pattern);
//Prints
#ifdef DEBUG_PROGRAM_MEMORY
#include <map>
static std::map<TSTNode*, char> mem_addrs;
void CheckTSTMem();
#endif
#endif
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
//TST.cpp
#include <iostream>
#define DEBUG_PROGRAM_MEMORY
#include "TST.h"
#include <cstdlib>
#include <utility>
#define MAX_LEN 1024
#define MAX( a, b, c ) ((a)>(b) ? ((a)>(c) ? (a):(c)) : ( (b)>(c) ? (b):(c) ))
TSTNode* Insert(TSTNode* root, char* str)
{
 if(root == NULL)
 {
  root = (TSTNode*)malloc(sizeof(TSTNode));
  if(root == NULL)
  {
   std::cout<<"Memory allocation failed"<<std::endl;
   return NULL;
  }
  //Insert first character of string in the root node
  root->data = *str;
#ifdef DEBUG_PROGRAM_MEMORY
  mem_addrs.insert(std::make_pair(root, root->data));
#endif
  root->bEOS = false;
  root->left = root->eq = root->right = NULL;
 }
  
 if(*str  < root->data)
  root->left = Insert(root->left, str);
 else if (*str == root->data)
 {
  if(*(str + 1))
   root->eq = Insert(root->eq, str + 1);
  else
   root->bEOS = true;
 }
 else
  root->right = Insert(root->right, str);
  
 return root;
}
//Helper to print the strings in TST
static void PrintHelper(TSTNode* root, char* buffer, int depth)
{
 if (root)
 {
  // Traverse the left subtree
  PrintHelper(root->left, buffer, depth);
  buffer[depth] = root->data;
  //Once end of string flag is encountered, print the string
  if (root->bEOS)
  {
   buffer[depth + 1] = '\0';
   std::cout<< buffer << std::endl;
  }
  // Traverse the middle subtree
  PrintHelper(root->eq, buffer, depth + 1);
  // Traverse the right subtree
  PrintHelper(root->right, buffer, depth);
 }
}
// Function to print TST's strings
void PrintAllStringsInTST(TSTNode* root)
{
 char buffer[MAX_LEN];
 PrintHelper(root, buffer, 0);
}
bool SearchTST(TSTNode *root, char* pattern)
{
 while (root != NULL)
 {
  if (*pattern < root->data)
   root = root->left;
  else if (*pattern == root->data)
  {
   //If end of string flag is found and the pattern length is also exhausted,
   //we can safely say that the pattern is present in the TST
   if (root->bEOS && *(pattern + 1) == '\0')
    return true;
   pattern++;
   root = root->eq;
  }
  else
   root = root->right;
 }
 return false;
}
//Function to determine largest
int MaxLenStringLen(TSTNode *root)
{
 if (root == NULL)
  return 0;
 int leftLen = MaxLenStringLen(root->left);
 int middleLen = MaxLenStringLen(root->eq) + 1;
 int rightLen = MaxLenStringLen(root->right);
 return MAX( leftLen, middleLen, rightLen);
}
void DeleteTST(TSTNode *root)
{
 TSTNode *tmp = root;
 if (tmp)
 {
  DeleteTST(tmp->left);
  DeleteTST(tmp->eq);
  DeleteTST(tmp->right);
#ifdef DEBUG_PROGRAM_MEMORY
  mem_addrs.erase(tmp);
#endif
  delete tmp;
 }
}
#ifdef DEBUG_PROGRAM_MEMORY
void CheckTSTMem()
{
 std::map<TSTNode*, char>::iterator itr = mem_addrs.begin();
 if (mem_addrs.size() == 0)
 {
  std::cout << "No memory leaks";
  return;
 }
 while (itr != mem_addrs.end())
 {
  std::cout << "Memory address " << itr->first<< " for \"" << itr->second << "\" has not been deallocated" << std::endl;
  ++itr;
 }
}
#endif
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
//Main.cpp
#include <iostream>
#include "TST.h"
int main(int argc, char** argv) {
  
 TSTNode *root = NULL;
 root = Insert(root, "boats");
 root = Insert(root, "boat");
 root = Insert(root, "bat");
 root = Insert(root, "bats");
 root = Insert(root, "stages");
 PrintAllStringsInTST(root);
 std::cout << "Maximum length string in this TST is of size "<< MaxLenStringLen(root) << std::endl;
 char *str = "hello";
 char *str1 = "bat";
 if (SearchTST(root, str) == false)
  std::cout << "\""<<str<<"\" not found in TST" << std::endl;
 else
  std::cout << "\"" << str << "\" is present in TST" << std::endl;
 if (SearchTST(root, str1) == false)
  std::cout << "\"" << str << "\" not found in TST" << std::endl;
 else
  std::cout << "\"" << str1 << "\" is present in TST" << std::endl;
 DeleteTST(root);
#ifdef DEBUG_PROGRAM_MEMORY
 CheckTSTMem();
#endif
  
 return 0;
}

No comments:

Post a Comment