"use strict"; // Simulations show these probabilities for a single change // 93.1% that one group is invalidated // 4.8% that two groups are invalidated // 1.1% that 3 groups are invalidated // 0.1% that 4 or more groups are invalidated // // And these for removing/adding 10 lexically adjacent files // 64.5% that one group is invalidated // 24.8% that two groups are invalidated // 7.8% that 3 groups are invalidated // 2.7% that 4 or more groups are invalidated // // And these for removing/adding 3 random files // 0% that one group is invalidated // 3.7% that two groups are invalidated // 80.8% that 3 groups are invalidated // 12.3% that 4 groups are invalidated // 3.2% that 5 or more groups are invalidated /** * * @param {string} a key * @param {string} b key * @returns {number} the similarity as number */ const similarity = (a, b) => { const l = Math.min(a.length, b.length); let dist = 0; for (let i = 0; i < l; i++) { const ca = a.charCodeAt(i); const cb = b.charCodeAt(i); dist += Math.max(0, 10 - Math.abs(ca - cb)); } return dist; }; /** * @param {string} a key * @param {string} b key * @returns {string} the common part and a single char for the difference */ const getName = (a, b) => { const l = Math.min(a.length, b.length); let r = ""; for (let i = 0; i < l; i++) { const ca = a.charAt(i); const cb = b.charAt(i); r += ca; if (ca === cb) { continue; } return r; } return a; }; /** * @template T */ class Node { /** * @param {T} item item * @param {string} key key * @param {number} size size */ constructor(item, key, size) { this.item = item; this.key = key; this.size = size; } } /** * @template T */ class Group { /** * @param {Node<T>[]} nodes nodes * @param {number[]} similarities similarities between the nodes (length = nodes.length - 1) */ constructor(nodes, similarities) { this.nodes = nodes; this.similarities = similarities; this.size = nodes.reduce((size, node) => size + node.size, 0); /** @type {string} */ this.key = undefined; } } /** * @template T * @typedef {Object} GroupedItems<T> * @property {string} key * @property {T[]} items * @property {number} size */ /** * @template T * @typedef {Object} Options * @property {number} maxSize maximum size of a group * @property {number} minSize minimum size of a group (preferred over maximum size) * @property {Iterable<T>} items a list of items * @property {function(T): number} getSize function to get size of an item * @property {function(T): string} getKey function to get the key of an item */ /** * @template T * @param {Options<T>} options options object * @returns {GroupedItems<T>[]} grouped items */ module.exports = ({ maxSize, minSize, items, getSize, getKey }) => { /** @type {Group<T>[]} */ const result = []; const nodes = Array.from( items, item => new Node(item, getKey(item), getSize(item)) ); /** @type {Node<T>[]} */ const initialNodes = []; // lexically ordering of keys nodes.sort((a, b) => { if (a.key < b.key) return -1; if (a.key > b.key) return 1; return 0; }); // return nodes bigger than maxSize directly as group for (const node of nodes) { if (node.size >= maxSize) { result.push(new Group([node], [])); } else { initialNodes.push(node); } } if (initialNodes.length > 0) { // calculate similarities between lexically adjacent nodes /** @type {number[]} */ const similarities = []; for (let i = 1; i < initialNodes.length; i++) { const a = initialNodes[i - 1]; const b = initialNodes[i]; similarities.push(similarity(a.key, b.key)); } const initialGroup = new Group(initialNodes, similarities); if (initialGroup.size < minSize) { // We hit an edgecase where the working set is already smaller than minSize // We merge it with the smallest result node to keep minSize intact if (result.length > 0) { const smallestGroup = result.reduce((min, group) => min.size > group.size ? group : min ); for (const node of initialGroup.nodes) smallestGroup.nodes.push(node); smallestGroup.nodes.sort((a, b) => { if (a.key < b.key) return -1; if (a.key > b.key) return 1; return 0; }); } else { // There are no other nodes // We use all nodes and have to accept that it's smaller than minSize result.push(initialGroup); } } else { const queue = [initialGroup]; while (queue.length) { const group = queue.pop(); // only groups bigger than maxSize need to be splitted if (group.size < maxSize) { result.push(group); continue; } // find unsplittable area from left and right // going minSize from left and right // at least one node need to be included otherwise we get stuck let left = 0; let leftSize = 0; while (leftSize <= minSize) { leftSize += group.nodes[left].size; left++; } let right = group.nodes.length - 1; let rightSize = 0; while (rightSize <= minSize) { rightSize += group.nodes[right].size; right--; } if (left - 1 > right) { // can't split group while holding minSize // because minSize is preferred of maxSize we return // the group here even while it's too big // To avoid this make sure maxSize > minSize * 3 result.push(group); continue; } if (left <= right) { // when there is a area between left and right // we look for best split point // we split at the minimum similarity // here key space is separated the most let best = left - 1; let bestSimilarity = group.similarities[best]; for (let i = left; i <= right; i++) { const similarity = group.similarities[i]; if (similarity < bestSimilarity) { best = i; bestSimilarity = similarity; } } left = best + 1; right = best; } // create two new groups for left and right area // and queue them up const rightNodes = [group.nodes[right + 1]]; /** @type {number[]} */ const rightSimilaries = []; for (let i = right + 2; i < group.nodes.length; i++) { rightSimilaries.push(group.similarities[i - 1]); rightNodes.push(group.nodes[i]); } queue.push(new Group(rightNodes, rightSimilaries)); const leftNodes = [group.nodes[0]]; /** @type {number[]} */ const leftSimilaries = []; for (let i = 1; i < left; i++) { leftSimilaries.push(group.similarities[i - 1]); leftNodes.push(group.nodes[i]); } queue.push(new Group(leftNodes, leftSimilaries)); } } } // lexically ordering result.sort((a, b) => { if (a.nodes[0].key < b.nodes[0].key) return -1; if (a.nodes[0].key > b.nodes[0].key) return 1; return 0; }); // give every group a name for (let i = 0; i < result.length; i++) { const group = result[i]; const first = group.nodes[0]; const last = group.nodes[group.nodes.length - 1]; let name = getName(first.key, last.key); group.key = name; } // return the results return result.map(group => { /** @type {GroupedItems} */ return { key: group.key, items: group.nodes.map(node => node.item), size: group.size }; }); };