Trie Data Structure - cook the code

Saturday 24 June 2017

Trie Data Structure



Trie is an efficient information retrieval data structure. Using trie, search complexities can be brought to optimal limit (key length). If we store keys in binary search tree, a well balanced BST will need time proportional to M * log N, where M is maximum string length and N is number of keys in tree. Using trie, we can search the key in O(M) time. However the penalty is on trie storage requirements.
Every node of trie consists of multiple branches. Each branch represents a possible character of keys. We need to mark the last node of every key as leaf node. A trie node field value will be used to distinguish the node as leaf node (there are other uses of the value field). A simple structure to represent nodes of English alphabet can be as following,

// Trie node
struct TrieNode
{
     struct TrieNode *children[ALPHABET_SIZE];

     // isLeaf is true if the node represents
     // end of a word
     bool isLeaf;
};

Inserting a key into trie is simple approach. Every character of input key is inserted as an individual trie node. Note that the children is an array of pointers to next level trie nodes. The key character acts as an index into the array children. If the input key is new or an extension of existing key, we need to construct non-existing nodes of the key, and mark leaf node. If the input key is prefix of existing key in trie, we simply mark the last node of key as leaf. The key length determines trie depth.
Searching for a key is similar to insert operation, however we only compare the characters and move down. The search can terminate due to end of string or lack of key in trie. In the former case, if the value field of last node is non-zero then the key exists in trie. In the second case, the search terminates without examining all the characters of key, since the key is not present in trie.
The following picture explains construction of trie using keys given in the example below,


                       root
                    /   \    \
                    t   a     b
                    |   |     |
                    h   n     y
                    |   |  \  |
                    e   s  y  e
                 /  |   |
                 i  r   w
                 |  |   |
                 r  e   e
                        |
                        r
 
 
 
 In the picture, every character is of type trie_node_t. For example, 
 the root is of type trie_node_t, and it’s children a, b and t
 are filled, all other nodes of root will be NULL. Similarly, “a” at the
 next level is having only one child (“n”), all other children are NULL.
 The leaf nodes are in blue.

Insert and search costs O(key_length), however the memory requirements of trie is O(ALPHABET_SIZE * key_length * N) where N is number of keys in trie. There are efficient representation of trie nodes (e.g. compressed trie, ternary search tree, etc.) to minimize memory requirements of trie.


#include <bits/stdc++.h>
using namespace std;
struct node {
    int isword;
    struct node *child[26];
};
struct node *newnode()
{
    struct node *newnode = new struct node;
    newnode->isword = 0;
    for(int i = 0; i < 26; i++) {
        newnode->child[i] = NULL;
    }
    return newnode;
}
struct node *root = NULL;
void addword(struct node *root,string s)
{
    int i;
    int ch;
    struct node *temp = root;

    for(i = 0; i < s.size(); i++) {
        ch = s[i]-'a';
        if(temp->child[ch] == NULL) {
            temp->child[ch] = newnode();
        }
        temp = temp->child[ch];
    }
    temp->isword = 1;
}
int findword(struct node *root,string s)
{
    int i;
    int ch;
    struct node *temp = root;

    for(i = 0; i < s.size(); i++) {
        ch = s[i]-'a';
        if(temp->child[ch] == NULL) {
            return 0;
        }
        temp = temp->child[ch];
    }
    if(temp->isword == 1) {
        return 1;
    }
    return 0;
}
int isempty(struct node *p)
{
    int i;
    for(i = 0; i < 26; i++) {
        if(p->child[i] != NULL) {
            return 0;
        }
    }
    return 1;
}
void removeword(string s,struct node *current,int index,int n)
{
    if(current == NULL) {
        return;
    }
    if(index == n) {
        current->isword = 0;
        if(isempty(current)) {
            delete[] current;
        }
        return;
    }
    int c = s[index]-'a';
    removeword(s,current->child[c],index+1,n);
}
void sortedorder(struct node *current,string s)
{
    if(current == NULL) {
        return;
    }
    if(current->isword == 1) {
        cout<<s<<endl;
    }
    for(int i = 0; i < 26; i++) {
        if(current->child[i]) {
            sortedorder(current->child[i],s+(char)(i+'a'));
        }
    }
}
int main()
{
    struct node *root = newnode();
    string s;

    addword(root,"ram");
    addword(root,"shyam");
    addword(root,"shivam");
    addword(root,"rohit");
    cout<<findword(root,"rohit")<<endl;
    cout<<findword(root,"ram")<<endl;
    cout<<findword(root,"chawla")<<endl;
    removeword("ro",root,0,2);
    cout<<findword(root,"rohit")<<endl;
    sortedorder(root,s);

}




second programm :-

#include <iostream>

using namespace std;
struct trieNode
{
    trieNode *children[26];
    bool isLeaf;
};
trieNode *createNode()
{
    trieNode *newNode=new trieNode;
    if(newNode)
    {
        newNode->isLeaf=false;
        for(int i=0;i<26;i++)
            newNode->children[i]=NULL;
    }
    return newNode;

}
void insert(trieNode *root,string str)
{
    trieNode *ptr=root;
    for(auto it=str.begin();it!=str.end();it++)
    {
        if(ptr->children[*it-97])
            ptr=ptr->children[*it-97];
        else
        {
            trieNode *newNode=createNode();
            ptr->children[*it-97]=newNode;
            ptr=newNode;
        }
    }
    ptr->isLeaf=true;
}
bool search(trieNode *root,string str)
{
    for(auto it=str.begin();it!=str.end();it++)
    {
        if(!root->children[*it-97])
            return false;
        root=root->children[*it-97];
    }
    return root && root->isLeaf;
}
bool isFree(trieNode *root)
{
    for(int i=0;i<26;i++)
        if(root->children[i])
            return false;
    return true;
}
bool deleteStr(trieNode *root,string str,int level)
{
    if(root)
    {
        if(level==str.length())
        {
            root->isLeaf=false;
            if(isFree(root))
            {
                delete root;
                return true;
            }
            return false;
        }

        int key=str[level];
        if(deleteStr(root->children[key-97],str,level+1))
        {
            if(!root->isLeaf)
            {
                delete root;
                return true;
            }
            else
                return false;
        }

    }
    return false;

}
int main()
{
    trieNode *root=NULL;
    root=createNode();
    string str[]={"shubham","shubh","shubha"};
    int n=sizeof(str)/sizeof(str[0]);
    for(int i=0;i<n;i++)
    {
       insert(root,str[i]);
    }

    deleteStr(root,"shubham",0);
    if(search(root,"shubh"))
        cout<<"exists";
    else
        cout<<"doesn't exist";
    return 0;
}



No comments:

Post a Comment