Newer
Older
alert / js / node_modules / webpack-bundle-analyzer / lib / parseUtils.js
@Réz István Réz István on 18 Nov 2021 9 KB first commit
"use strict";

const fs = require('fs');

const _ = require('lodash');

const acorn = require('acorn');

const walk = require('acorn-walk');

module.exports = {
  parseBundle
};

function parseBundle(bundlePath) {
  const content = fs.readFileSync(bundlePath, 'utf8');
  const ast = acorn.parse(content, {
    sourceType: 'script',
    // I believe in a bright future of ECMAScript!
    // Actually, it's set to `2050` to support the latest ECMAScript version that currently exists.
    // Seems like `acorn` supports such weird option value.
    ecmaVersion: 2050
  });
  const walkState = {
    locations: null,
    expressionStatementDepth: 0
  };
  walk.recursive(ast, walkState, {
    ExpressionStatement(node, state, c) {
      if (state.locations) return;
      state.expressionStatementDepth++;

      if ( // Webpack 5 stores modules in the the top-level IIFE
      state.expressionStatementDepth === 1 && ast.body.includes(node) && isIIFE(node)) {
        const fn = getIIFECallExpression(node);

        if ( // It should not contain neither arguments
        fn.arguments.length === 0 && // ...nor parameters
        fn.callee.params.length === 0) {
          // Modules are stored in the very first variable declaration as hash
          const firstVariableDeclaration = fn.callee.body.body.find(node => node.type === 'VariableDeclaration');

          if (firstVariableDeclaration) {
            for (const declaration of firstVariableDeclaration.declarations) {
              if (declaration.init) {
                state.locations = getModulesLocations(declaration.init);

                if (state.locations) {
                  break;
                }
              }
            }
          }
        }
      }

      if (!state.locations) {
        c(node.expression, state);
      }

      state.expressionStatementDepth--;
    },

    AssignmentExpression(node, state) {
      if (state.locations) return; // Modules are stored in exports.modules:
      // exports.modules = {};

      const {
        left,
        right
      } = node;

      if (left && left.object && left.object.name === 'exports' && left.property && left.property.name === 'modules' && isModulesHash(right)) {
        state.locations = getModulesLocations(right);
      }
    },

    CallExpression(node, state, c) {
      if (state.locations) return;
      const args = node.arguments; // Main chunk with webpack loader.
      // Modules are stored in first argument:
      // (function (...) {...})(<modules>)

      if (node.callee.type === 'FunctionExpression' && !node.callee.id && args.length === 1 && isSimpleModulesList(args[0])) {
        state.locations = getModulesLocations(args[0]);
        return;
      } // Async Webpack < v4 chunk without webpack loader.
      // webpackJsonp([<chunks>], <modules>, ...)
      // As function name may be changed with `output.jsonpFunction` option we can't rely on it's default name.


      if (node.callee.type === 'Identifier' && mayBeAsyncChunkArguments(args) && isModulesList(args[1])) {
        state.locations = getModulesLocations(args[1]);
        return;
      } // Async Webpack v4 chunk without webpack loader.
      // (window.webpackJsonp=window.webpackJsonp||[]).push([[<chunks>], <modules>, ...]);
      // As function name may be changed with `output.jsonpFunction` option we can't rely on it's default name.


      if (isAsyncChunkPushExpression(node)) {
        state.locations = getModulesLocations(args[0].elements[1]);
        return;
      } // Webpack v4 WebWorkerChunkTemplatePlugin
      // globalObject.chunkCallbackName([<chunks>],<modules>, ...);
      // Both globalObject and chunkCallbackName can be changed through the config, so we can't check them.


      if (isAsyncWebWorkerChunkExpression(node)) {
        state.locations = getModulesLocations(args[1]);
        return;
      } // Walking into arguments because some of plugins (e.g. `DedupePlugin`) or some Webpack
      // features (e.g. `umd` library output) can wrap modules list into additional IIFE.


      args.forEach(arg => c(arg, state));
    }

  });
  let modules;

  if (walkState.locations) {
    modules = _.mapValues(walkState.locations, loc => content.slice(loc.start, loc.end));
  } else {
    modules = {};
  }

  return {
    modules,
    src: content,
    runtimeSrc: getBundleRuntime(content, walkState.locations)
  };
}
/**
 * Returns bundle source except modules
 */


function getBundleRuntime(content, modulesLocations) {
  const sortedLocations = Object.values(modulesLocations || {}).sort((a, b) => a.start - b.start);
  let result = '';
  let lastIndex = 0;

  for (const {
    start,
    end
  } of sortedLocations) {
    result += content.slice(lastIndex, start);
    lastIndex = end;
  }

  return result + content.slice(lastIndex, content.length);
}

function isIIFE(node) {
  return node.type === 'ExpressionStatement' && (node.expression.type === 'CallExpression' || node.expression.type === 'UnaryExpression' && node.expression.argument.type === 'CallExpression');
}

function getIIFECallExpression(node) {
  if (node.expression.type === 'UnaryExpression') {
    return node.expression.argument;
  } else {
    return node.expression;
  }
}

function isModulesList(node) {
  return isSimpleModulesList(node) || // Modules are contained in expression `Array([minimum ID]).concat([<module>, <module>, ...])`
  isOptimizedModulesArray(node);
}

function isSimpleModulesList(node) {
  return (// Modules are contained in hash. Keys are module ids.
    isModulesHash(node) || // Modules are contained in array. Indexes are module ids.
    isModulesArray(node)
  );
}

function isModulesHash(node) {
  return node.type === 'ObjectExpression' && node.properties.map(node => node.value).every(isModuleWrapper);
}

function isModulesArray(node) {
  return node.type === 'ArrayExpression' && node.elements.every(elem => // Some of array items may be skipped because there is no module with such id
  !elem || isModuleWrapper(elem));
}

function isOptimizedModulesArray(node) {
  // Checking whether modules are contained in `Array(<minimum ID>).concat(...modules)` array:
  // https://github.com/webpack/webpack/blob/v1.14.0/lib/Template.js#L91
  // The `<minimum ID>` + array indexes are module ids
  return node.type === 'CallExpression' && node.callee.type === 'MemberExpression' && // Make sure the object called is `Array(<some number>)`
  node.callee.object.type === 'CallExpression' && node.callee.object.callee.type === 'Identifier' && node.callee.object.callee.name === 'Array' && node.callee.object.arguments.length === 1 && isNumericId(node.callee.object.arguments[0]) && // Make sure the property X called for `Array(<some number>).X` is `concat`
  node.callee.property.type === 'Identifier' && node.callee.property.name === 'concat' && // Make sure exactly one array is passed in to `concat`
  node.arguments.length === 1 && isModulesArray(node.arguments[0]);
}

function isModuleWrapper(node) {
  return (// It's an anonymous function expression that wraps module
    (node.type === 'FunctionExpression' || node.type === 'ArrowFunctionExpression') && !node.id || // If `DedupePlugin` is used it can be an ID of duplicated module...
    isModuleId(node) || // or an array of shape [<module_id>, ...args]
    node.type === 'ArrayExpression' && node.elements.length > 1 && isModuleId(node.elements[0])
  );
}

function isModuleId(node) {
  return node.type === 'Literal' && (isNumericId(node) || typeof node.value === 'string');
}

function isNumericId(node) {
  return node.type === 'Literal' && Number.isInteger(node.value) && node.value >= 0;
}

function isChunkIds(node) {
  // Array of numeric or string ids. Chunk IDs are strings when NamedChunksPlugin is used
  return node.type === 'ArrayExpression' && node.elements.every(isModuleId);
}

function isAsyncChunkPushExpression(node) {
  const {
    callee,
    arguments: args
  } = node;
  return callee.type === 'MemberExpression' && callee.property.name === 'push' && callee.object.type === 'AssignmentExpression' && args.length === 1 && args[0].type === 'ArrayExpression' && mayBeAsyncChunkArguments(args[0].elements) && isModulesList(args[0].elements[1]);
}

function mayBeAsyncChunkArguments(args) {
  return args.length >= 2 && isChunkIds(args[0]);
}

function isAsyncWebWorkerChunkExpression(node) {
  const {
    callee,
    type,
    arguments: args
  } = node;
  return type === 'CallExpression' && callee.type === 'MemberExpression' && args.length === 2 && isChunkIds(args[0]) && isModulesList(args[1]);
}

function getModulesLocations(node) {
  if (node.type === 'ObjectExpression') {
    // Modules hash
    const modulesNodes = node.properties;
    return modulesNodes.reduce((result, moduleNode) => {
      const moduleId = moduleNode.key.name || moduleNode.key.value;
      result[moduleId] = getModuleLocation(moduleNode.value);
      return result;
    }, {});
  }

  const isOptimizedArray = node.type === 'CallExpression';

  if (node.type === 'ArrayExpression' || isOptimizedArray) {
    // Modules array or optimized array
    const minId = isOptimizedArray ? // Get the [minId] value from the Array() call first argument literal value
    node.callee.object.arguments[0].value : // `0` for simple array
    0;
    const modulesNodes = isOptimizedArray ? // The modules reside in the `concat()` function call arguments
    node.arguments[0].elements : node.elements;
    return modulesNodes.reduce((result, moduleNode, i) => {
      if (moduleNode) {
        result[i + minId] = getModuleLocation(moduleNode);
      }

      return result;
    }, {});
  }

  return {};
}

function getModuleLocation(node) {
  return {
    start: node.start,
    end: node.end
  };
}