411.Minimum Unique Word Abbreviation

Tags: [tree], [trie], [prefix_tree], [abbreviation], [heap], [DFS], [backtracking], [wild_card]

Com: {g}

Link: https://leetcode.com/problems/minimum-unique-word-abbreviation/\#/description

A string such as"word"contains the following abbreviations:

["word", "1ord", "w1rd", "wo1d", "wor1", "2rd", "w2d", "wo2", "1o1d", "1or1", "w1r1", "1o2", "2r1", "3d", "w3", "4"]

Given a target string and a set of strings in a dictionary, find an abbreviation of this target string with the smallest possible length such that it does not conflict with abbreviations of the strings in the dictionary.

Each number or letter in the abbreviation is considered length = 1. For example, the abbreviation "a32bc" has length = 4.

Note:

  • In the case of multiple answers as shown in the second example below, you may return any one of them.
  • Assume length of target string = m, and dictionary size = n. You may assume that m ≤ 21, n ≤ 1000, and log(base=2)(n) + m ≤ 20.

Examples:

"apple", ["blade"] -> "a4" (because "5" or "4e" conflicts with "blade")
"apple", ["plain", "amber", "blade"] -> "1p3" (other valid answers include "ap3", "a3e", "2p2", "3le", "3l1").

Solution: Trie, Prefix Tree, DFS, Wild Card

class Solution(object):

    class Trie(object):
        class TrieNode(object):
            def __init__(self, val):
                self.val = val
                self.end_of_w = False
                self.children = []

        def __init__(self, words):
            self.root = self.TrieNode('')
            for w in words:
                self.__add_word(self.root, w, 0)

        def __add_word(self, root, w, index):
            c = w[index]
            target = None
            for child in root.children:
                if child.val == c:
                    target = child
                    break

            if not target:
                target = self.TrieNode(c)
                root.children.append(target)

            if index == len(w) - 1:
                target.end_of_w = True
            else:
                self.__add_word(target, w, index + 1)

        def search_word(self, w, wild_card=True):
            return self.__search_word_helper(self.root, w, 0, wild_card)

        def __search_word_helper(self, root, w, index, wild_card):
            c = w[index]
            if wild_card and c == '.':
                if index == len(w) - 1:
                    return True
                else:
                    for child in root.children:
                        if self.__search_word_helper(child, w, index + 1, wild_card):
                            return True
                    return False

            target = None
            for child in root.children:
                if child.val == c:
                    target = child
                    break

            if not target:
                return False
            else:
                if index == len(w) - 1:
                    return target.end_of_w
                else:
                    return self.__search_word_helper(target, w, index + 1, wild_card)


    def minAbbreviation(self, target, dictionary):
        """
        :type target: str
        :type dictionary: List[str]
        :rtype: str
        """
        if not target:
            return ''

        size = len(target)
        words = [x for x in dictionary if len(x) == size]
        trie = self.Trie(words)

        for abbr_len in xrange(1, size + 1):
            abbrs = self.get_abbr(target, abbr_len)
            for abbr in abbrs:
                wild_card_abbr = self.get_wild_card_abbr(abbr)
                if not trie.search_word(wild_card_abbr, wild_card=True):
                    return abbr

        return ''

    def get_abbr(self, s, abbr_len):
        result = []
        buff = []
        self.get_abbr_helper(s, abbr_len, 0, 0, buff, result)
        return result

    def get_abbr_helper(self, s, abbr_len, index, counter, buff, result):
        # base case
        if len(buff) > abbr_len:
            return
        if index >= len(s):
            if counter > 0:
                buff.append(str(counter))
            if len(buff) == abbr_len:
                result.append(''.join(buff))
            if counter > 0:
                buff.pop()
            return

        # choice 1: abbreviate the current char.
        self.get_abbr_helper(s, abbr_len, index + 1, counter + 1, buff, result)

        # choice 2: do not abbreviate the current char.
        if counter > 0:
            buff.append(str(counter))
        buff.append(s[index])
        self.get_abbr_helper(s, abbr_len, index + 1, 0, buff, result)
        buff.pop()
        if counter > 0:
            buff.pop()


    def get_wild_card_abbr(self, abbr):
        result = []
        start = 0
        end = 0
        size = len(abbr)
        while end < size:
            if '0' <= abbr[end] <= '9':
                end += 1
            else:
                if start == end:
                    result.append(abbr[start])
                    start += 1
                    end += 1
                else:
                    k = int(abbr[start:end])
                    result.append('.' * k)
                    start = end

        if start < end:
            k = int(abbr[start:end])
            result.append('.' * k)

        return ''.join(result)

Revelation:

  • 主要思路就是按长度来逐个计算出target的abbr,然后把abbr变成wild card形式,在dictionary的trie中进行搜索,如果在trie中找不到这个wild card abbr,那么个abbr就是结果.
  • 只把和target等长的dictionary word放入trie中,因为只有这些dictionary word才有可能和target生成的abbr匹配上.
  • 这里最重要的优化是: 不一次性把target的所有abbr都计算出来,而是按照长度逐个计算.

Note:

  • Time complexity = O(n + n*k + m * 2^m * (m + m)) = O(n*k + m^2 * 2^m), n is the length of dictionary, m is the length of target, k is the max length of the dictionary word.

Time Limited Exceeded: Heap

import heapq

class Solution(object):
    def minAbbreviation(self, target, dictionary):
        """
        :type target: str
        :type dictionary: List[str]
        :rtype: str
        """
        if not target:
            return ''

        size = len(target)
        new_dict = set([x for x in dictionary if len(x) == size])
        min_heap = []
        for abbr in self.get_abbrs(target):
            min_heap.append((self.get_abbr_len(abbr), abbr))

        heapq.heapify(min_heap)

        while min_heap:
            _, abbr = heapq.heappop(min_heap)
            find_conflict = False
            for w in new_dict:
                if self.is_valid_abbr(w, abbr):
                    find_conflict = True
                    break

            if not find_conflict:
                return abbr

        return ''

    def get_abbrs(self, s):
        if not s:
            return ''

        result = []
        buff = []
        self.get_abbrs_helper(s, 0, 0, buff, result)
        return result

    def get_abbrs_helper(self, s, index, counter, buff, result):
        # base case
        if index >= len(s):
            if counter > 0:
                buff.append(str(counter))
            result.append(''.join(buff))
            if counter > 0:
                buff.pop()
            return

        # choice 1: abbreviate the current char.
        self.get_abbrs_helper(s, index + 1, counter + 1, buff, result)

        # choice 2: do not abbreviate the current char.
        if counter > 0:
            buff.append(str(counter))
        buff.append(s[index])
        self.get_abbrs_helper(s, index + 1, 0, buff, result)
        buff.pop()
        if counter > 0:
            buff.pop()

    def get_abbr_len(self, abbr):
        if not abbr:
            return 0

        counter = 0
        start = 0
        end = 0
        while end < len(abbr):
            if '0' <= abbr[end] <= '9':
                end += 1
            else:
                if start == end:
                    counter += 1
                    start += 1
                    end += 1
                else:
                    counter += 1
                    start = end

        if start < end:
            counter += 1

        return counter

    def is_valid_abbr(self, w, abbr):
        w_i = 0
        a_i = 0
        while w_i < len(w) and a_i < len(abbr):
            if '0' <= abbr[a_i] <= '9':
                start = a_i
                end = start
                while end < len(abbr) and '0' <= abbr[end] <= '9':
                    end += 1

                k = int(abbr[start:end])
                w_i += k
                a_i = end
                continue
            elif w[w_i] != abbr[a_i]:
                return False

            w_i += 1
            a_i += 1

        return w_i == len(w) and a_i == len(abbr)

Time Limited Exceeded:

import heapq

class Solution(object):
    def minAbbreviation(self, target, dictionary):
        """
        :type target: str
        :type dictionary: List[str]
        :rtype: str
        """
        if not target:
            return ''

        dict_abbrs = set()
        for d in dictionary:
            for abbr in self.get_abbrs(d):
                dict_abbrs.add(abbr)

        min_heap = []
        for abbr in self.get_abbrs(target):
            min_heap.append((self.get_abbr_len(abbr), abbr))

        heapq.heapify(min_heap)
        while min_heap:
            _, abbr = heapq.heappop(min_heap)
            if abbr not in dict_abbrs:
                return abbr

        return ''

    def get_abbrs(self, s):
        if not s:
            return []

        result = []
        buff = []
        self.get_abbrs_helper(s, 0, 0, buff, result)
        return result

    def get_abbrs_helper(self, s, index, counter, buff, result):
        # base case
        if index >= len(s):
            if counter > 0:
                buff.append(str(counter))
            result.append(''.join(buff))
            if counter > 0:
                buff.pop()
            return

        # choice 1: abbreviate the current char
        self.get_abbrs_helper(s, index + 1, counter + 1, buff, result)

        # choice 2: do not abbreviate the current char
        if counter > 0:
            buff.append(str(counter))
        buff.append(s[index])
        self.get_abbrs_helper(s, index + 1, 0, buff, result)
        buff.pop()
        if counter > 0:
            buff.pop()

    def get_abbr_len(self, abbr):
        if not abbr:
            return 0

        counter = 0
        start = 0
        end = 0
        while end < len(abbr):
            if '0' <= abbr[end] <= '9':
                end += 1
            else:
                if start == end:
                    counter += 1
                    start += 1
                    end += 1
                else:
                    counter += 1
                    start = end

        if start < end:
            counter += 1

        return counter

results matching ""

    No results matching ""