Newer
Older
framework / system / ThirdParty / Kint / Object / BlobObject.php
@Lonnie Ezell Lonnie Ezell on 24 Feb 2020 5 KB Release 4.0.0
<?php

/*
 * The MIT License (MIT)
 *
 * Copyright (c) 2013 Jonathan Vollebregt (jnvsor@gmail.com), Rokas Šleinius (raveren@gmail.com)
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy of
 * this software and associated documentation files (the "Software"), to deal in
 * the Software without restriction, including without limitation the rights to
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 * the Software, and to permit persons to whom the Software is furnished to do so,
 * subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

namespace Kint\Object;

class BlobObject extends BasicObject
{
    /**
     * @var array Character encodings to detect
     *
     * @see https://secure.php.net/function.mb-detect-order
     *
     * In practice, mb_detect_encoding can only successfully determine the
     * difference between the following common charsets at once without
     * breaking things for one of the other charsets:
     * - ASCII
     * - UTF-8
     * - SJIS
     * - EUC-JP
     *
     * The order of the charsets is significant. If you put UTF-8 before ASCII
     * it will never match ASCII, because UTF-8 is a superset of ASCII.
     * Similarly, SJIS and EUC-JP frequently match UTF-8 strings, so you should
     * check UTF-8 first. SJIS and EUC-JP seem to work either way, but SJIS is
     * more common so it should probably be first.
     *
     * While you're free to experiment with other charsets, remember to keep
     * this behavior in mind when setting up your char_encodings array.
     *
     * This depends on the mbstring extension
     */
    public static $char_encodings = array(
        'ASCII',
        'UTF-8',
    );

    /**
     * @var array Legacy character encodings to detect
     *
     * @see https://secure.php.net/function.iconv
     *
     * Assuming the other encoding checks fail, this will perform a
     * simple iconv conversion to check for invalid bytes. If any are
     * found it will not match.
     *
     * This can be useful for ambiguous single byte encodings like
     * windows-125x and iso-8859-x which have practically undetectable
     * differences because they use every single byte available.
     *
     * This is *NOT* reliable and should not be trusted implicitly. As
     * with char_encodings, the order of the charsets is significant.
     *
     * This depends on the iconv extension
     */
    public static $legacy_encodings = array();

    public $type = 'string';
    public $encoding = false;
    public $hints = array('string');

    public function getType()
    {
        if (false === $this->encoding) {
            return 'binary '.$this->type;
        }

        if ('ASCII' === $this->encoding) {
            return $this->type;
        }

        return $this->encoding.' '.$this->type;
    }

    public function getValueShort()
    {
        if ($rep = $this->value) {
            return '"'.$rep->contents.'"';
        }
    }

    public function transplant(BasicObject $old)
    {
        parent::transplant($old);

        if ($old instanceof self) {
            $this->encoding = $old->encoding;
        }
    }

    public static function strlen($string, $encoding = false)
    {
        if (\function_exists('mb_strlen')) {
            if (false === $encoding) {
                $encoding = self::detectEncoding($string);
            }

            if ($encoding && 'ASCII' !== $encoding) {
                return \mb_strlen($string, $encoding);
            }
        }

        return \strlen($string);
    }

    public static function substr($string, $start, $length = null, $encoding = false)
    {
        if (\function_exists('mb_substr')) {
            if (false === $encoding) {
                $encoding = self::detectEncoding($string);
            }

            if ($encoding && 'ASCII' !== $encoding) {
                return \mb_substr($string, $start, $length, $encoding);
            }
        }

        // Special case for substr/mb_substr discrepancy
        if ('' === $string) {
            return '';
        }

        return \substr($string, $start, isset($length) ? $length : PHP_INT_MAX);
    }

    public static function detectEncoding($string)
    {
        if (\function_exists('mb_detect_encoding')) {
            if ($ret = \mb_detect_encoding($string, self::$char_encodings, true)) {
                return $ret;
            }
        }

        // Pretty much every character encoding uses first 32 bytes as control
        // characters. If it's not a multi-byte format it's safe to say matching
        // any control character besides tab, nl, and cr means it's binary.
        if (\preg_match('/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]/', $string)) {
            return false;
        }

        if (\function_exists('iconv')) {
            foreach (self::$legacy_encodings as $encoding) {
                if (@\iconv($encoding, $encoding, $string) === $string) {
                    return $encoding;
                }
            }
        } elseif (!\function_exists('mb_detect_encoding')) { // @codeCoverageIgnore
            // If a user has neither mb_detect_encoding, nor iconv, nor the
            // polyfills, there's not much we can do about it...
            // Pretend it's ASCII and pray the browser renders it properly.
            return 'ASCII'; // @codeCoverageIgnore
        }

        return false;
    }
}