30.Substring with Concatenation of All Words¶

30.Substring with Concatenation of All Words¶

Tags: Hard String Hash Table Two Pointers

Links: https://leetcode.com/problems/substring-with-concatenation-of-all-words/

You are given a string, s, and a list of words, words, that are all of the same length. Find all starting indices of substring(s) in s that is a concatenation of each word in words exactly once and without any intervening characters.

Example 1:

Input:
  s = "barfoothefoobarman",
  words = ["foo","bar"]
Output: [0,9]
Explanation: Substrings starting at index 0 and 9 are "barfoo" and "foobar" respectively.
The output order does not matter, returning [9,0] is fine too.

Example 2:

Input:
  s = "wordgoodgoodgoodbestword",
  words = ["word","good","best","word"]
Output: []

class Solution {
public:
    vector<int> findSubstring(string s, vector<string>& words) {
        std::ios_base::sync_with_stdio(false);
        cin.tie(NULL);
        cout.tie(NULL);

        if (words.size() == 0) return {};

        int n = words.size();
        int len = words[0].size();

        if (s.size() < n * len) return {};

        unordered_map<string, int> m;
        for (int i = 0; i < n; ++i) ++m[words[i]];

        vector<int> res;
        for (int i = 0; i < len; ++i) {
            int start = i;
            int pos = start;
            int cnt = 0;
            unordered_map<string, int> store;
            while (start + n * len <= s.size()) {
                while (cnt < n && m.find(s.substr(pos, len)) != m.end()) {
                    ++cnt;
                    ++store[s.substr(pos, len)];
                    pos += len;
                }

                if (cnt == n) {
                    if (store == m) res.push_back(pos - n * len);
                    --cnt;
                    --store[s.substr(start, len)];
                    start += len;
                }
                else {
                    cnt = 0;
                    store.clear();
                    start = pos + len;
                    pos = start;
                }
            }
        }

        return res;
    }
};

其实思路是很容易想的，最初出错是因为题目的要求写的有点模糊（可能自己理解的有偏差），以为是如果words里面有重复的单词是不计算在内的，比如foo, bar, foo，只考虑foo,bar，会影响判断函数的规则。

另外一个点题目也说的不是很清楚，在s里的单词是否截成单个的单词后和words内的单词长度都相同？从测试用例来看好像是这样，但是保险起见还是都检查一遍。

因为words内单词的长度是一样的，所以每次以单词的长度为单位进行搜索。搜索的过程其实就是尺取法，很直接就不赘述了，进行单词长度len次搜索即可完全检查。