NewUnicodeEscapeSequenceSniff.php 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. <?php
  2. /**
  3. * PHPCompatibility, an external standard for PHP_CodeSniffer.
  4. *
  5. * @package PHPCompatibility
  6. * @copyright 2012-2019 PHPCompatibility Contributors
  7. * @license https://opensource.org/licenses/LGPL-3.0 LGPL3
  8. * @link https://github.com/PHPCompatibility/PHPCompatibility
  9. */
  10. namespace PHPCompatibility\Sniffs\TextStrings;
  11. use PHPCompatibility\Sniff;
  12. use PHP_CodeSniffer_Exception as PHPCS_Exception;
  13. use PHP_CodeSniffer_File as File;
  14. /**
  15. * PHP 7.0 introduced a Unicode codepoint escape sequence.
  16. *
  17. * Strings containing a literal `\u{` followed by an invalid sequence will cause a
  18. * fatal error as of PHP 7.0.
  19. *
  20. * PHP version 7.0
  21. *
  22. * @link https://www.php.net/manual/en/migration70.new-features.php#migration70.new-features.unicode-codepoint-escape-syntax
  23. * @link https://www.php.net/manual/en/migration70.incompatible.php#migration70.incompatible.strings.unicode-escapes
  24. * @link https://wiki.php.net/rfc/unicode_escape
  25. * @link https://www.php.net/manual/en/language.types.string.php#language.types.string.syntax.double
  26. *
  27. * @since 9.3.0
  28. */
  29. class NewUnicodeEscapeSequenceSniff extends Sniff
  30. {
  31. /**
  32. * Returns an array of tokens this test wants to listen for.
  33. *
  34. * @since 9.3.0
  35. *
  36. * @return array
  37. */
  38. public function register()
  39. {
  40. return array(
  41. \T_CONSTANT_ENCAPSED_STRING,
  42. \T_DOUBLE_QUOTED_STRING,
  43. \T_HEREDOC,
  44. );
  45. }
  46. /**
  47. * Processes this test, when one of its tokens is encountered.
  48. *
  49. * @since 9.3.0
  50. *
  51. * @param \PHP_CodeSniffer_File $phpcsFile The file being scanned.
  52. * @param int $stackPtr The position of the current token in
  53. * the stack passed in $tokens.
  54. *
  55. * @return void
  56. */
  57. public function process(File $phpcsFile, $stackPtr)
  58. {
  59. $tokens = $phpcsFile->getTokens();
  60. // Check whether this is a single quoted or double quoted string.
  61. if ($tokens[$stackPtr]['code'] === \T_CONSTANT_ENCAPSED_STRING) {
  62. // Find the start of the - potentially multi-line - text string.
  63. $start = $stackPtr;
  64. for ($i = ($stackPtr - 1); $i >= 0; $i--) {
  65. if ($tokens[$i]['code'] === \T_WHITESPACE) {
  66. continue;
  67. }
  68. if ($tokens[$i]['code'] === \T_CONSTANT_ENCAPSED_STRING) {
  69. $start = $i;
  70. continue;
  71. }
  72. break;
  73. }
  74. try {
  75. $textString = $this->getCompleteTextString($phpcsFile, $start, false);
  76. } catch (PHPCS_Exception $e) {
  77. // Something went wrong determining the start of the text string.
  78. return;
  79. }
  80. $startQuote = $textString[0];
  81. $endQuote = substr($textString, -1);
  82. if (($startQuote === "'" && $endQuote === "'")
  83. || $startQuote !== $endQuote
  84. ) {
  85. // Single quoted string, not our concern.
  86. return;
  87. }
  88. }
  89. $content = $this->stripQuotes($tokens[$stackPtr]['content']);
  90. $count = preg_match_all('`(?<!\\\\)\\\\u\{([^}\n\r]*)(\})?`', $content, $matches, \PREG_SET_ORDER);
  91. if ($count === false || $count === 0) {
  92. return;
  93. }
  94. foreach ($matches as $match) {
  95. $valid = false; // If the close curly is missing, we have an incomplete escape sequence.
  96. if (isset($match[2])) {
  97. $valid = $this->isValidUnicodeEscapeSequence($match[1]);
  98. }
  99. if ($this->supportsBelow('5.6') === true && $valid === true) {
  100. $phpcsFile->addError(
  101. 'Unicode codepoint escape sequences are not supported in PHP 5.6 or earlier. Found: %s',
  102. $stackPtr,
  103. 'Found',
  104. array($match[0])
  105. );
  106. }
  107. if ($this->supportsAbove('7.0') === true && $valid === false) {
  108. $phpcsFile->addError(
  109. 'Strings containing a literal \u{ followed by an invalid unicode codepoint escape sequence will cause a fatal error in PHP 7.0 and above. Escape the leading backslash to prevent this. Found: %s',
  110. $stackPtr,
  111. 'Invalid',
  112. array($match[0])
  113. );
  114. }
  115. }
  116. }
  117. /**
  118. * Verify if the codepoint in a unicode escape sequence is valid.
  119. *
  120. * @since 9.3.0
  121. *
  122. * @param string $codepoint The codepoint as a string.
  123. *
  124. * @return bool
  125. */
  126. protected function isValidUnicodeEscapeSequence($codepoint)
  127. {
  128. if (trim($codepoint) === '') {
  129. return false;
  130. }
  131. // Check if it's a valid hex codepoint.
  132. if (preg_match('`^[0-9A-F]+$`iD', $codepoint, $match) !== 1) {
  133. return false;
  134. }
  135. if (hexdec($codepoint) > 1114111) {
  136. // Outside of the maximum permissable range.
  137. return false;
  138. }
  139. return true;
  140. }
  141. }