Problem-11
python
Question
This problem was asked by Twitter. Implement an autocomplete system. That is, given a query string s and a set of all possible query strings, return all strings in the set that have s as a prefix. For example, given the query string 'de' and the set of strings [dog, deer, deal], return [deer, deal].
import requests
import random
import re
# get 25k words
word_site = "http://svnweb.freebsd.org/csrg/share/dict/words?view=co&content-type=text/plain"
letters = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'] # Other letters may be added for language support
response = requests.get(word_site)
WORDS = response.content.splitlines()
words = sorted(WORDS)
# Preprocess words to contain only alphanumerical characters and lowercase characters
for i,word in enumerate(words):
words[i] = str(word,"utf-8")
words[i] = re.sub(r'[^a-zA-Z0-9]','', words[i])
words[i] = words[i].lower()
class Node:
def __init__(self):
self.children = [None]*len(letters)
self.endChar = False
class Trie:
def __init__(self):
self.root = self.getNode()
def getNode(self): # Generate a new node
return Node()
def _getIndex(self,char):# Returns the index of the current letter
return ord(char.lower()) - ord('a')
def _getLetter(self,index): # Returns the letter belonging to the index
if(index<len(letters)):
return letters[index]
def insert(self,key):
traverse = self.root
length = len(key)
for i in range(0,length):
index = self._getIndex(key[i]) # returns the index of the i'th letter of the key
if (traverse.children[index] == None):
traverse.children[index] = self.getNode()
traverse = traverse.children[index]
traverse.endChar = True
def search(self,key):
traverse = self.root
length = len(key)
for i in range(0,length):
index = self._getIndex(key[i])
if(traverse.children[index] == None):
return False
traverse = traverse.children[index]
# found the prefix, now need to return all children
words = self.getChildrenWords(traverse)
for i in range(len(words)):
words[i] = key + words[i]
return words
def getChildrenWords(self,node): # Recursively find children words
words = []
for index,child in enumerate(node.children):
if(child == None):
continue
if(child.endChar == True):
words.append(self._getLetter(index))
else:
children_words = self.getChildrenWords(child)
if(children_words != None):
for children_word in children_words:
words.append(self._getLetter(index) + children_word)
return words
trie = Trie()
for word in words:
trie.insert(word)
import time
pres = ["he","hel", "de","do","derp","te","tes"]
for pre in pres:
print("For key:{}".format(pre))
start = time.time()
trie.search(pre.lower())
print("It took {} seconds to search in the dictionary".format(time.time()-start))