|
@@ -0,0 +1,763 @@
|
|
|
+<?php
|
|
|
+#
|
|
|
+# SmartyPants - Smart punctuation for web sites
|
|
|
+#
|
|
|
+# PHP SmartyPants
|
|
|
+# Copyright (c) 2004-2013 Michel Fortin
|
|
|
+# <http://michelf.ca/>
|
|
|
+#
|
|
|
+# Original SmartyPants
|
|
|
+# Copyright (c) 2003-2004 John Gruber
|
|
|
+# <http://daringfireball.net>
|
|
|
+#
|
|
|
+
|
|
|
+
|
|
|
+define( 'SMARTYPANTS_VERSION', "1.5.1f" ); # Unreleased
|
|
|
+
|
|
|
+
|
|
|
+#
|
|
|
+# Default configuration:
|
|
|
+#
|
|
|
+# 1 -> "--" for em-dashes; no en-dash support
|
|
|
+# 2 -> "---" for em-dashes; "--" for en-dashes
|
|
|
+# 3 -> "--" for em-dashes; "---" for en-dashes
|
|
|
+# See docs for more configuration options.
|
|
|
+#
|
|
|
+define( 'SMARTYPANTS_ATTR', 1 );
|
|
|
+
|
|
|
+
|
|
|
+# SmartyPants will not alter the content of these tags:
|
|
|
+define( 'SMARTYPANTS_TAGS_TO_SKIP', 'pre|code|kbd|script|style|math');
|
|
|
+
|
|
|
+
|
|
|
+### Standard Function Interface ###
|
|
|
+
|
|
|
+define( 'SMARTYPANTS_PARSER_CLASS', 'SmartyPants_Parser' );
|
|
|
+
|
|
|
+function SmartyPants($text, $attr = SMARTYPANTS_ATTR) {
|
|
|
+#
|
|
|
+# Initialize the parser and return the result of its transform method.
|
|
|
+#
|
|
|
+ # Setup static parser array.
|
|
|
+ static $parser = array();
|
|
|
+ if (!isset($parser[$attr])) {
|
|
|
+ $parser_class = SMARTYPANTS_PARSER_CLASS;
|
|
|
+ $parser[$attr] = new $parser_class($attr);
|
|
|
+ }
|
|
|
+
|
|
|
+ # Transform text using parser.
|
|
|
+ return $parser[$attr]->transform($text);
|
|
|
+}
|
|
|
+
|
|
|
+function SmartQuotes($text, $attr = null) {
|
|
|
+ switch ($attr) {
|
|
|
+ case 0: return $text;
|
|
|
+ case 2: $attr = 'qb'; break;
|
|
|
+ default: $attr = 'q'; break;
|
|
|
+ }
|
|
|
+ return SmartyPants($text, $attr);
|
|
|
+}
|
|
|
+
|
|
|
+function SmartDashes($text, $attr = null) {
|
|
|
+ switch ($attr) {
|
|
|
+ case 0: return $text;
|
|
|
+ case 2: $attr = 'D'; break;
|
|
|
+ case 3: $attr = 'i'; break;
|
|
|
+ default: $attr = 'd'; break;
|
|
|
+ }
|
|
|
+ return SmartyPants($text, $attr);
|
|
|
+}
|
|
|
+
|
|
|
+function SmartElipsis($text, $attr = null) {
|
|
|
+ switch ($attr) {
|
|
|
+ case 0: return $text;
|
|
|
+ default: $attr = 'e'; break;
|
|
|
+ }
|
|
|
+ return SmartyPants($text, $attr);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+### WordPress Plugin Interface ###
|
|
|
+
|
|
|
+/*
|
|
|
+Plugin Name: SmartyPants
|
|
|
+Plugin URI: http://michelf.ca/projects/php-smartypants/
|
|
|
+Description: SmartyPants is a web publishing utility that translates plain ASCII punctuation characters into “smart” typographic punctuation HTML entities. This plugin <strong>replace the default WordPress Texturize algorithm</strong> for the content and the title of your posts, the comments body and author name, and everywhere else Texturize normally apply. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>.
|
|
|
+Version: 1.5.1f
|
|
|
+Author: Michel Fortin
|
|
|
+Author URI: http://michelf.ca/
|
|
|
+*/
|
|
|
+
|
|
|
+if (isset($wp_version)) {
|
|
|
+ # Remove default Texturize filter that would conflict with SmartyPants.
|
|
|
+ remove_filter('category_description', 'wptexturize');
|
|
|
+ remove_filter('list_cats', 'wptexturize');
|
|
|
+ remove_filter('comment_author', 'wptexturize');
|
|
|
+ remove_filter('comment_text', 'wptexturize');
|
|
|
+ remove_filter('single_post_title', 'wptexturize');
|
|
|
+ remove_filter('the_title', 'wptexturize');
|
|
|
+ remove_filter('the_content', 'wptexturize');
|
|
|
+ remove_filter('the_excerpt', 'wptexturize');
|
|
|
+ remove_filter('the_tags', 'wptexturize');
|
|
|
+ # Add SmartyPants filter.
|
|
|
+ add_filter('category_description', 'SmartyPants', 11);
|
|
|
+ add_filter('list_cats', 'SmartyPants', 11);
|
|
|
+ add_filter('comment_author', 'SmartyPants', 11);
|
|
|
+ add_filter('comment_text', 'SmartyPants', 11);
|
|
|
+ add_filter('single_post_title', 'SmartyPants', 11);
|
|
|
+ add_filter('the_title', 'SmartyPants', 11);
|
|
|
+ add_filter('the_content', 'SmartyPants', 11);
|
|
|
+ add_filter('the_excerpt', 'SmartyPants', 11);
|
|
|
+ add_filter('the_tags', 'SmartyPants', 11);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+### Smarty Modifier Interface ###
|
|
|
+
|
|
|
+function smarty_modifier_smartypants($text, $attr = NULL) {
|
|
|
+ return SmartyPants($text, $attr);
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+#
|
|
|
+# SmartyPants Parser
|
|
|
+#
|
|
|
+
|
|
|
+class SmartyPants_Parser {
|
|
|
+
|
|
|
+ # Options to specify which transformations to make:
|
|
|
+ var $do_nothing = 0;
|
|
|
+ var $do_quotes = 0;
|
|
|
+ var $do_backticks = 0;
|
|
|
+ var $do_dashes = 0;
|
|
|
+ var $do_ellipses = 0;
|
|
|
+ var $do_stupefy = 0;
|
|
|
+ var $convert_quot = 0; # should we translate " entities into normal quotes?
|
|
|
+
|
|
|
+ function SmartyPants_Parser($attr = SMARTYPANTS_ATTR) {
|
|
|
+ #
|
|
|
+ # Initialize a SmartyPants_Parser with certain attributes.
|
|
|
+ #
|
|
|
+ # Parser attributes:
|
|
|
+ # 0 : do nothing
|
|
|
+ # 1 : set all
|
|
|
+ # 2 : set all, using old school en- and em- dash shortcuts
|
|
|
+ # 3 : set all, using inverted old school en and em- dash shortcuts
|
|
|
+ #
|
|
|
+ # q : quotes
|
|
|
+ # b : backtick quotes (``double'' only)
|
|
|
+ # B : backtick quotes (``double'' and `single')
|
|
|
+ # d : dashes
|
|
|
+ # D : old school dashes
|
|
|
+ # i : inverted old school dashes
|
|
|
+ # e : ellipses
|
|
|
+ # w : convert " entities to " for Dreamweaver users
|
|
|
+ #
|
|
|
+ if ($attr == "0") {
|
|
|
+ $this->do_nothing = 1;
|
|
|
+ }
|
|
|
+ else if ($attr == "1") {
|
|
|
+ # Do everything, turn all options on.
|
|
|
+ $this->do_quotes = 1;
|
|
|
+ $this->do_backticks = 1;
|
|
|
+ $this->do_dashes = 1;
|
|
|
+ $this->do_ellipses = 1;
|
|
|
+ }
|
|
|
+ else if ($attr == "2") {
|
|
|
+ # Do everything, turn all options on, use old school dash shorthand.
|
|
|
+ $this->do_quotes = 1;
|
|
|
+ $this->do_backticks = 1;
|
|
|
+ $this->do_dashes = 2;
|
|
|
+ $this->do_ellipses = 1;
|
|
|
+ }
|
|
|
+ else if ($attr == "3") {
|
|
|
+ # Do everything, turn all options on, use inverted old school dash shorthand.
|
|
|
+ $this->do_quotes = 1;
|
|
|
+ $this->do_backticks = 1;
|
|
|
+ $this->do_dashes = 3;
|
|
|
+ $this->do_ellipses = 1;
|
|
|
+ }
|
|
|
+ else if ($attr == "-1") {
|
|
|
+ # Special "stupefy" mode.
|
|
|
+ $this->do_stupefy = 1;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $chars = preg_split('//', $attr);
|
|
|
+ foreach ($chars as $c){
|
|
|
+ if ($c == "q") { $this->do_quotes = 1; }
|
|
|
+ else if ($c == "b") { $this->do_backticks = 1; }
|
|
|
+ else if ($c == "B") { $this->do_backticks = 2; }
|
|
|
+ else if ($c == "d") { $this->do_dashes = 1; }
|
|
|
+ else if ($c == "D") { $this->do_dashes = 2; }
|
|
|
+ else if ($c == "i") { $this->do_dashes = 3; }
|
|
|
+ else if ($c == "e") { $this->do_ellipses = 1; }
|
|
|
+ else if ($c == "w") { $this->convert_quot = 1; }
|
|
|
+ else {
|
|
|
+ # Unknown attribute option, ignore.
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ function transform($text) {
|
|
|
+
|
|
|
+ if ($this->do_nothing) {
|
|
|
+ return $text;
|
|
|
+ }
|
|
|
+
|
|
|
+ $tokens = $this->tokenizeHTML($text);
|
|
|
+ $result = '';
|
|
|
+ $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
|
|
|
+
|
|
|
+ $prev_token_last_char = ""; # This is a cheat, used to get some context
|
|
|
+ # for one-character tokens that consist of
|
|
|
+ # just a quote char. What we do is remember
|
|
|
+ # the last character of the previous text
|
|
|
+ # token, to use as context to curl single-
|
|
|
+ # character quote tokens correctly.
|
|
|
+
|
|
|
+ foreach ($tokens as $cur_token) {
|
|
|
+ if ($cur_token[0] == "tag") {
|
|
|
+ # Don't mess with quotes inside tags.
|
|
|
+ $result .= $cur_token[1];
|
|
|
+ if (preg_match('@<(/?)(?:'.SMARTYPANTS_TAGS_TO_SKIP.')[\s>]@', $cur_token[1], $matches)) {
|
|
|
+ $in_pre = isset($matches[1]) && $matches[1] == '/' ? 0 : 1;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ $t = $cur_token[1];
|
|
|
+ $last_char = substr($t, -1); # Remember last char of this token before processing.
|
|
|
+ if (! $in_pre) {
|
|
|
+ $t = $this->educate($t, $prev_token_last_char);
|
|
|
+ }
|
|
|
+ $prev_token_last_char = $last_char;
|
|
|
+ $result .= $t;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return $result;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ function educate($t, $prev_token_last_char) {
|
|
|
+ $t = $this->processEscapes($t);
|
|
|
+
|
|
|
+ if ($this->convert_quot) {
|
|
|
+ $t = preg_replace('/"/', '"', $t);
|
|
|
+ }
|
|
|
+
|
|
|
+ if ($this->do_dashes) {
|
|
|
+ if ($this->do_dashes == 1) $t = $this->educateDashes($t);
|
|
|
+ if ($this->do_dashes == 2) $t = $this->educateDashesOldSchool($t);
|
|
|
+ if ($this->do_dashes == 3) $t = $this->educateDashesOldSchoolInverted($t);
|
|
|
+ }
|
|
|
+
|
|
|
+ if ($this->do_ellipses) $t = $this->educateEllipses($t);
|
|
|
+
|
|
|
+ # Note: backticks need to be processed before quotes.
|
|
|
+ if ($this->do_backticks) {
|
|
|
+ $t = $this->educateBackticks($t);
|
|
|
+ if ($this->do_backticks == 2) $t = $this->educateSingleBackticks($t);
|
|
|
+ }
|
|
|
+
|
|
|
+ if ($this->do_quotes) {
|
|
|
+ if ($t == "'") {
|
|
|
+ # Special case: single-character ' token
|
|
|
+ if (preg_match('/\S/', $prev_token_last_char)) {
|
|
|
+ $t = "’";
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $t = "‘";
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else if ($t == '"') {
|
|
|
+ # Special case: single-character " token
|
|
|
+ if (preg_match('/\S/', $prev_token_last_char)) {
|
|
|
+ $t = "”";
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ $t = "“";
|
|
|
+ }
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ # Normal case:
|
|
|
+ $t = $this->educateQuotes($t);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if ($this->do_stupefy) $t = $this->stupefyEntities($t);
|
|
|
+
|
|
|
+ return $t;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ function educateQuotes($_) {
|
|
|
+ #
|
|
|
+ # Parameter: String.
|
|
|
+ #
|
|
|
+ # Returns: The string, with "educated" curly quote HTML entities.
|
|
|
+ #
|
|
|
+ # Example input: "Isn't this fun?"
|
|
|
+ # Example output: “Isn’t this fun?”
|
|
|
+ #
|
|
|
+ # Make our own "punctuation" character class, because the POSIX-style
|
|
|
+ # [:PUNCT:] is only available in Perl 5.6 or later:
|
|
|
+ $punct_class = "[!\"#\\$\\%'()*+,-.\\/:;<=>?\\@\\[\\\\\]\\^_`{|}~]";
|
|
|
+
|
|
|
+ # Special case if the very first character is a quote
|
|
|
+ # followed by punctuation at a non-word-break. Close the quotes by brute force:
|
|
|
+ $_ = preg_replace(
|
|
|
+ array("/^'(?=$punct_class\\B)/", "/^\"(?=$punct_class\\B)/"),
|
|
|
+ array('’', '”'), $_);
|
|
|
+
|
|
|
+
|
|
|
+ # Special case for double sets of quotes, e.g.:
|
|
|
+ # <p>He said, "'Quoted' words in a larger quote."</p>
|
|
|
+ $_ = preg_replace(
|
|
|
+ array("/\"'(?=\w)/", "/'\"(?=\w)/"),
|
|
|
+ array('“‘', '‘“'), $_);
|
|
|
+
|
|
|
+ # Special case for decade abbreviations (the '80s):
|
|
|
+ $_ = preg_replace("/'(?=\\d{2}s)/", '’', $_);
|
|
|
+
|
|
|
+ $close_class = '[^\ \t\r\n\[\{\(\-]';
|
|
|
+ $dec_dashes = '&\#8211;|&\#8212;';
|
|
|
+
|
|
|
+ # Get most opening single quotes:
|
|
|
+ $_ = preg_replace("{
|
|
|
+ (
|
|
|
+ \\s | # a whitespace char, or
|
|
|
+ | # a non-breaking space entity, or
|
|
|
+ -- | # dashes, or
|
|
|
+ &[mn]dash; | # named dash entities
|
|
|
+ $dec_dashes | # or decimal entities
|
|
|
+ &\\#x201[34]; # or hex
|
|
|
+ )
|
|
|
+ ' # the quote
|
|
|
+ (?=\\w) # followed by a word character
|
|
|
+ }x", '\1‘', $_);
|
|
|
+ # Single closing quotes:
|
|
|
+ $_ = preg_replace("{
|
|
|
+ ($close_class)?
|
|
|
+ '
|
|
|
+ (?(1)| # If $1 captured, then do nothing;
|
|
|
+ (?=\\s | s\\b) # otherwise, positive lookahead for a whitespace
|
|
|
+ ) # char or an 's' at a word ending position. This
|
|
|
+ # is a special case to handle something like:
|
|
|
+ # \"<i>Custer</i>'s Last Stand.\"
|
|
|
+ }xi", '\1’', $_);
|
|
|
+
|
|
|
+ # Any remaining single quotes should be opening ones:
|
|
|
+ $_ = str_replace("'", '‘', $_);
|
|
|
+
|
|
|
+
|
|
|
+ # Get most opening double quotes:
|
|
|
+ $_ = preg_replace("{
|
|
|
+ (
|
|
|
+ \\s | # a whitespace char, or
|
|
|
+ | # a non-breaking space entity, or
|
|
|
+ -- | # dashes, or
|
|
|
+ &[mn]dash; | # named dash entities
|
|
|
+ $dec_dashes | # or decimal entities
|
|
|
+ &\\#x201[34]; # or hex
|
|
|
+ )
|
|
|
+ \" # the quote
|
|
|
+ (?=\\w) # followed by a word character
|
|
|
+ }x", '\1“', $_);
|
|
|
+
|
|
|
+ # Double closing quotes:
|
|
|
+ $_ = preg_replace("{
|
|
|
+ ($close_class)?
|
|
|
+ \"
|
|
|
+ (?(1)|(?=\\s)) # If $1 captured, then do nothing;
|
|
|
+ # if not, then make sure the next char is whitespace.
|
|
|
+ }x", '\1”', $_);
|
|
|
+
|
|
|
+ # Any remaining quotes should be opening ones.
|
|
|
+ $_ = str_replace('"', '“', $_);
|
|
|
+
|
|
|
+ return $_;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ function educateBackticks($_) {
|
|
|
+ #
|
|
|
+ # Parameter: String.
|
|
|
+ # Returns: The string, with ``backticks'' -style double quotes
|
|
|
+ # translated into HTML curly quote entities.
|
|
|
+ #
|
|
|
+ # Example input: ``Isn't this fun?''
|
|
|
+ # Example output: “Isn't this fun?”
|
|
|
+ #
|
|
|
+
|
|
|
+ $_ = str_replace(array("``", "''",),
|
|
|
+ array('“', '”'), $_);
|
|
|
+ return $_;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ function educateSingleBackticks($_) {
|
|
|
+ #
|
|
|
+ # Parameter: String.
|
|
|
+ # Returns: The string, with `backticks' -style single quotes
|
|
|
+ # translated into HTML curly quote entities.
|
|
|
+ #
|
|
|
+ # Example input: `Isn't this fun?'
|
|
|
+ # Example output: ‘Isn’t this fun?’
|
|
|
+ #
|
|
|
+
|
|
|
+ $_ = str_replace(array("`", "'",),
|
|
|
+ array('‘', '’'), $_);
|
|
|
+ return $_;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ function educateDashes($_) {
|
|
|
+ #
|
|
|
+ # Parameter: String.
|
|
|
+ #
|
|
|
+ # Returns: The string, with each instance of "--" translated to
|
|
|
+ # an em-dash HTML entity.
|
|
|
+ #
|
|
|
+
|
|
|
+ $_ = str_replace('--', '—', $_);
|
|
|
+ return $_;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ function educateDashesOldSchool($_) {
|
|
|
+ #
|
|
|
+ # Parameter: String.
|
|
|
+ #
|
|
|
+ # Returns: The string, with each instance of "--" translated to
|
|
|
+ # an en-dash HTML entity, and each "---" translated to
|
|
|
+ # an em-dash HTML entity.
|
|
|
+ #
|
|
|
+
|
|
|
+ # em en
|
|
|
+ $_ = str_replace(array("---", "--",),
|
|
|
+ array('—', '–'), $_);
|
|
|
+ return $_;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ function educateDashesOldSchoolInverted($_) {
|
|
|
+ #
|
|
|
+ # Parameter: String.
|
|
|
+ #
|
|
|
+ # Returns: The string, with each instance of "--" translated to
|
|
|
+ # an em-dash HTML entity, and each "---" translated to
|
|
|
+ # an en-dash HTML entity. Two reasons why: First, unlike the
|
|
|
+ # en- and em-dash syntax supported by
|
|
|
+ # EducateDashesOldSchool(), it's compatible with existing
|
|
|
+ # entries written before SmartyPants 1.1, back when "--" was
|
|
|
+ # only used for em-dashes. Second, em-dashes are more
|
|
|
+ # common than en-dashes, and so it sort of makes sense that
|
|
|
+ # the shortcut should be shorter to type. (Thanks to Aaron
|
|
|
+ # Swartz for the idea.)
|
|
|
+ #
|
|
|
+
|
|
|
+ # en em
|
|
|
+ $_ = str_replace(array("---", "--",),
|
|
|
+ array('–', '—'), $_);
|
|
|
+ return $_;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ function educateEllipses($_) {
|
|
|
+ #
|
|
|
+ # Parameter: String.
|
|
|
+ # Returns: The string, with each instance of "..." translated to
|
|
|
+ # an ellipsis HTML entity. Also converts the case where
|
|
|
+ # there are spaces between the dots.
|
|
|
+ #
|
|
|
+ # Example input: Huh...?
|
|
|
+ # Example output: Huh…?
|
|
|
+ #
|
|
|
+
|
|
|
+ $_ = str_replace(array("...", ". . .",), '…', $_);
|
|
|
+ return $_;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ function stupefyEntities($_) {
|
|
|
+ #
|
|
|
+ # Parameter: String.
|
|
|
+ # Returns: The string, with each SmartyPants HTML entity translated to
|
|
|
+ # its ASCII counterpart.
|
|
|
+ #
|
|
|
+ # Example input: “Hello — world.”
|
|
|
+ # Example output: "Hello -- world."
|
|
|
+ #
|
|
|
+
|
|
|
+ # en-dash em-dash
|
|
|
+ $_ = str_replace(array('–', '—'),
|
|
|
+ array('-', '--'), $_);
|
|
|
+
|
|
|
+ # single quote open close
|
|
|
+ $_ = str_replace(array('‘', '’'), "'", $_);
|
|
|
+
|
|
|
+ # double quote open close
|
|
|
+ $_ = str_replace(array('“', '”'), '"', $_);
|
|
|
+
|
|
|
+ $_ = str_replace('…', '...', $_); # ellipsis
|
|
|
+
|
|
|
+ return $_;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ function processEscapes($_) {
|
|
|
+ #
|
|
|
+ # Parameter: String.
|
|
|
+ # Returns: The string, with after processing the following backslash
|
|
|
+ # escape sequences. This is useful if you want to force a "dumb"
|
|
|
+ # quote or other character to appear.
|
|
|
+ #
|
|
|
+ # Escape Value
|
|
|
+ # ------ -----
|
|
|
+ # \\ \
|
|
|
+ # \" "
|
|
|
+ # \' '
|
|
|
+ # \. .
|
|
|
+ # \- -
|
|
|
+ # \` `
|
|
|
+ #
|
|
|
+ $_ = str_replace(
|
|
|
+ array('\\\\', '\"', "\'", '\.', '\-', '\`'),
|
|
|
+ array('\', '"', ''', '.', '-', '`'), $_);
|
|
|
+
|
|
|
+ return $_;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ function tokenizeHTML($str) {
|
|
|
+ #
|
|
|
+ # Parameter: String containing HTML markup.
|
|
|
+ # Returns: An array of the tokens comprising the input
|
|
|
+ # string. Each token is either a tag (possibly with nested,
|
|
|
+ # tags contained therein, such as <a href="<MTFoo>">, or a
|
|
|
+ # run of text between tags. Each element of the array is a
|
|
|
+ # two-element array; the first is either 'tag' or 'text';
|
|
|
+ # the second is the actual value.
|
|
|
+ #
|
|
|
+ #
|
|
|
+ # Regular expression derived from the _tokenize() subroutine in
|
|
|
+ # Brad Choate's MTRegex plugin.
|
|
|
+ # <http://www.bradchoate.com/past/mtregex.php>
|
|
|
+ #
|
|
|
+ $index = 0;
|
|
|
+ $tokens = array();
|
|
|
+
|
|
|
+ $match = '(?s:<!--.*?-->)|'. # comment
|
|
|
+ '(?s:<\?.*?\?>)|'. # processing instruction
|
|
|
+ # regular tags
|
|
|
+ '(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
|
|
|
+
|
|
|
+ $parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
|
|
|
+
|
|
|
+ foreach ($parts as $part) {
|
|
|
+ if (++$index % 2 && $part != '')
|
|
|
+ $tokens[] = array('text', $part);
|
|
|
+ else
|
|
|
+ $tokens[] = array('tag', $part);
|
|
|
+ }
|
|
|
+ return $tokens;
|
|
|
+ }
|
|
|
+
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+/*
|
|
|
+
|
|
|
+PHP SmartyPants
|
|
|
+===============
|
|
|
+
|
|
|
+Description
|
|
|
+-----------
|
|
|
+
|
|
|
+This is a PHP translation of the original SmartyPants quote educator written in
|
|
|
+Perl by John Gruber.
|
|
|
+
|
|
|
+SmartyPants is a web publishing utility that translates plain ASCII
|
|
|
+punctuation characters into "smart" typographic punctuation HTML
|
|
|
+entities. SmartyPants can perform the following transformations:
|
|
|
+
|
|
|
+* Straight quotes (`"` and `'`) into "curly" quote HTML entities
|
|
|
+* Backticks-style quotes (` ``like this'' `) into "curly" quote HTML
|
|
|
+ entities
|
|
|
+* Dashes (`--` and `---`) into en- and em-dash entities
|
|
|
+* Three consecutive dots (`...`) into an ellipsis entity
|
|
|
+
|
|
|
+SmartyPants does not modify characters within `<pre>`, `<code>`, `<kbd>`,
|
|
|
+`<script>`, or `<math>` tag blocks. Typically, these tags are used to
|
|
|
+display text where smart quotes and other "smart punctuation" would not
|
|
|
+be appropriate, such as source code or example markup.
|
|
|
+
|
|
|
+
|
|
|
+### Backslash Escapes ###
|
|
|
+
|
|
|
+If you need to use literal straight quotes (or plain hyphens and
|
|
|
+periods), SmartyPants accepts the following backslash escape sequences
|
|
|
+to force non-smart punctuation. It does so by transforming the escape
|
|
|
+sequence into a decimal-encoded HTML entity:
|
|
|
+
|
|
|
+ Escape Value Character
|
|
|
+ ------ ----- ---------
|
|
|
+ \\ \ \
|
|
|
+ \" " "
|
|
|
+ \' ' '
|
|
|
+ \. . .
|
|
|
+ \- - -
|
|
|
+ \` ` `
|
|
|
+
|
|
|
+This is useful, for example, when you want to use straight quotes as
|
|
|
+foot and inch marks: 6'2" tall; a 17" iMac.
|
|
|
+
|
|
|
+
|
|
|
+Bugs
|
|
|
+----
|
|
|
+
|
|
|
+To file bug reports or feature requests (other than topics listed in the
|
|
|
+Caveats section above) please send email to:
|
|
|
+
|
|
|
+<michel.fortin@michelf.com>
|
|
|
+
|
|
|
+If the bug involves quotes being curled the wrong way, please send example
|
|
|
+text to illustrate.
|
|
|
+
|
|
|
+
|
|
|
+### Algorithmic Shortcomings ###
|
|
|
+
|
|
|
+One situation in which quotes will get curled the wrong way is when
|
|
|
+apostrophes are used at the start of leading contractions. For example:
|
|
|
+
|
|
|
+ 'Twas the night before Christmas.
|
|
|
+
|
|
|
+In the case above, SmartyPants will turn the apostrophe into an opening
|
|
|
+single-quote, when in fact it should be a closing one. I don't think
|
|
|
+this problem can be solved in the general case -- every word processor
|
|
|
+I've tried gets this wrong as well. In such cases, it's best to use the
|
|
|
+proper HTML entity for closing single-quotes (`’`) by hand.
|
|
|
+
|
|
|
+
|
|
|
+Version History
|
|
|
+---------------
|
|
|
+
|
|
|
+1.5.1f (23 Jan 2013)
|
|
|
+
|
|
|
+* Fixed handling of HTML comments to match latest HTML specs instead of
|
|
|
+ doing it the old SGML way.
|
|
|
+
|
|
|
+* Lowered WordPress filtering priority to avoid clashing with the
|
|
|
+ [caption] tag filter. Thanks to Mehdi Kabab for the fix.
|
|
|
+
|
|
|
+
|
|
|
+1.5.1oo (19 May 2006, unreleased)
|
|
|
+
|
|
|
+* Converted SmartyPants to a object-oriented design.
|
|
|
+
|
|
|
+
|
|
|
+1.5.1e (9 Dec 2005)
|
|
|
+
|
|
|
+* Corrected a bug that prevented special characters from being
|
|
|
+ escaped.
|
|
|
+
|
|
|
+
|
|
|
+1.5.1d (25 May 2005)
|
|
|
+
|
|
|
+* Corrected a small bug in `_TokenizeHTML` where a Doctype declaration
|
|
|
+ was not seen as HTML (smart quotes where applied inside).
|
|
|
+
|
|
|
+
|
|
|
+1.5.1c (13 Dec 2004)
|
|
|
+
|
|
|
+* Changed a regular expression in `_TokenizeHTML` that could lead to
|
|
|
+ a segmentation fault with PHP 4.3.8 on Linux.
|
|
|
+
|
|
|
+
|
|
|
+1.5.1b (6 Sep 2004)
|
|
|
+
|
|
|
+* Corrected a problem with quotes immediately following a dash
|
|
|
+ with no space between: `Text--"quoted text"--text.`
|
|
|
+
|
|
|
+* PHP SmartyPants can now be used as a modifier by the Smarty
|
|
|
+ template engine. Rename the file to "modifier.smartypants.php"
|
|
|
+ and put it in your smarty plugins folder.
|
|
|
+
|
|
|
+* Replaced a lot of space characters by tabs, saving about 4 KB.
|
|
|
+
|
|
|
+
|
|
|
+1.5.1a (30 Jun 2004)
|
|
|
+
|
|
|
+* PHP Markdown and PHP Smartypants now share the same `_TokenizeHTML`
|
|
|
+ function when loaded simultanously.
|
|
|
+
|
|
|
+* Changed the internals of `_TokenizeHTML` to lower the PHP version
|
|
|
+ requirement to PHP 4.0.5.
|
|
|
+
|
|
|
+
|
|
|
+1.5.1 (6 Jun 2004)
|
|
|
+
|
|
|
+* Initial release of PHP SmartyPants, based on version 1.5.1 of the
|
|
|
+ original SmartyPants written in Perl.
|
|
|
+
|
|
|
+
|
|
|
+Author
|
|
|
+------
|
|
|
+
|
|
|
+Original SmartyPants by John Gruber
|
|
|
+<http://daringfireball.net/>
|
|
|
+
|
|
|
+PHP Port by Michel Fortin
|
|
|
+<http://michelf.ca/>
|
|
|
+
|
|
|
+
|
|
|
+Additional Credits
|
|
|
+------------------
|
|
|
+
|
|
|
+Portions of this plug-in are based on Brad Choate's nifty MTRegex plug-in.
|
|
|
+Brad Choate also contributed a few bits of source code to this plug-in.
|
|
|
+Brad Choate is a fine hacker indeed. (<http://bradchoate.com/>)
|
|
|
+
|
|
|
+Jeremy Hedley (<http://antipixel.com/>) and Charles Wiltgen
|
|
|
+(<http://playbacktime.com/>) deserve mention for exemplary beta testing of
|
|
|
+the orignal SmartyPants.
|
|
|
+
|
|
|
+
|
|
|
+Copyright and License
|
|
|
+---------------------
|
|
|
+
|
|
|
+Copyright (c) 2003 John Gruber
|
|
|
+<http://daringfireball.net/>
|
|
|
+All rights reserved.
|
|
|
+
|
|
|
+Copyright (c) 2004-2013 Michel Fortin
|
|
|
+<http://michelf.ca>
|
|
|
+
|
|
|
+Redistribution and use in source and binary forms, with or without
|
|
|
+modification, are permitted provided that the following conditions are met:
|
|
|
+
|
|
|
+* Redistributions of source code must retain the above copyright
|
|
|
+ notice, this list of conditions and the following disclaimer.
|
|
|
+
|
|
|
+* Redistributions in binary form must reproduce the above copyright
|
|
|
+ notice, this list of conditions and the following disclaimer in the
|
|
|
+ documentation and/or other materials provided with the distribution.
|
|
|
+
|
|
|
+* Neither the name "SmartyPants" nor the names of its contributors may
|
|
|
+ be used to endorse or promote products derived from this software
|
|
|
+ without specific prior written permission.
|
|
|
+
|
|
|
+This software is provided by the copyright holders and contributors "as is"
|
|
|
+and any express or implied warranties, including, but not limited to, the
|
|
|
+implied warranties of merchantability and fitness for a particular purpose
|
|
|
+are disclaimed. In no event shall the copyright owner or contributors be
|
|
|
+liable for any direct, indirect, incidental, special, exemplary, or
|
|
|
+consequential damages (including, but not limited to, procurement of
|
|
|
+substitute goods or services; loss of use, data, or profits; or business
|
|
|
+interruption) however caused and on any theory of liability, whether in
|
|
|
+contract, strict liability, or tort (including negligence or otherwise)
|
|
|
+arising in any way out of the use of this software, even if advised of the
|
|
|
+possibility of such damage.
|
|
|
+
|
|
|
+*/
|
|
|
+?>
|