Diff
checker
텍스트
텍스트
이미지
문서
Excel
폴더
Legal
Enterprise
데스크톱
요금제
로그인
데스크톱 앱 다운로드
텍스트 비교
두 텍스트 파일의 차이점을 찾아보세요
도구
기록
실시간 편집
변경 없는 행 숨기기
줄바꿈 비활성화
레이아웃
나란히 보기
합쳐 보기
비교 단위
스마트
단어
글자
구문 강조
언어 선택
제외
텍스트 변환
첫 변경으로
수정
Diffchecker Desktop
가장 안전하게 Diffchecker를 사용하는 방법. 데스크톱 앱을 사용하면 비교 데이터가 외부로 전송되지 않습니다!
데스크톱 앱 받기
Untitled diff
생성일
10년 전
비교 결과 만료 없음
초기화
내보내기
공유
두 텍스트가 동일합니다
두 텍스트 간 차이점이 없습니다
0 삭제
행
총
삭제
글자
총
삭제
이 기능을 계속 사용하려면 업그레이드해 주세요
Diff
checker
Pro
요금제 보기
480 행
복사
0 추가
행
총
추가
글자
총
추가
이 기능을 계속 사용하려면 업그레이드해 주세요
Diff
checker
Pro
요금제 보기
480 행
복사
<?php
<?php
/**
/**
* PHPTAL templating engine
* PHPTAL templating engine
*
*
* PHP Version 5
* PHP Version 5
*
*
* @category HTML
* @category HTML
* @package PHPTAL
* @package PHPTAL
* @author Laurent Bedubourg <lbedubourg@motion-twin.com>
* @author Laurent Bedubourg <lbedubourg@motion-twin.com>
* @author Kornel Lesiński <kornel@aardvarkmedia.co.uk>
* @author Kornel Lesiński <kornel@aardvarkmedia.co.uk>
* @license http://www.gnu.org/licenses/lgpl.html GNU Lesser General Public License
* @license http://www.gnu.org/licenses/lgpl.html GNU Lesser General Public License
* @version SVN: $Id$
* @version SVN: $Id$
* @link http://phptal.org/
* @link http://phptal.org/
*/
*/
/**
/**
* Simple sax like xml parser for PHPTAL
* Simple sax like xml parser for PHPTAL
* ("Dom" in the class name comes from name of the directory, not mode of operation)
* ("Dom" in the class name comes from name of the directory, not mode of operation)
*
*
* At the time this parser was created, standard PHP libraries were not suitable
* At the time this parser was created, standard PHP libraries were not suitable
* (could not retrieve doctypes, xml declaration, problems with comments and CDATA).
* (could not retrieve doctypes, xml declaration, problems with comments and CDATA).
*
*
* There are still some problems: XML parsers don't care about exact format of enties
* There are still some problems: XML parsers don't care about exact format of enties
* or CDATA sections (PHPTAL tries to preserve them),
* or CDATA sections (PHPTAL tries to preserve them),
* <?php ?> blocks are not allowed in attributes.
* <?php ?> blocks are not allowed in attributes.
*
*
* This parser failed to enforce some XML well-formedness constraints,
* This parser failed to enforce some XML well-formedness constraints,
* and there are ill-formed templates "in the wild" because of this.
* and there are ill-formed templates "in the wild" because of this.
*
*
* @package PHPTAL
* @package PHPTAL
* @subpackage Dom
* @subpackage Dom
* @see PHPTAL_DOM_DocumentBuilder
* @see PHPTAL_DOM_DocumentBuilder
*/
*/
class PHPTAL_Dom_SaxXmlParser
class PHPTAL_Dom_SaxXmlParser
{
{
private $_file;
private $_file;
private $_line;
private $_line;
private $_source;
private $_source;
// available parser states
// available parser states
const ST_ROOT = 0;
const ST_ROOT = 0;
const ST_TEXT = 1;
const ST_TEXT = 1;
const ST_LT = 2;
const ST_LT = 2;
const ST_TAG_NAME = 3;
const ST_TAG_NAME = 3;
const ST_TAG_CLOSE = 4;
const ST_TAG_CLOSE = 4;
const ST_TAG_SINGLE = 5;
const ST_TAG_SINGLE = 5;
const ST_TAG_ATTRIBUTES = 6;
const ST_TAG_ATTRIBUTES = 6;
const ST_TAG_BETWEEN_ATTRIBUTE = 7;
const ST_TAG_BETWEEN_ATTRIBUTE = 7;
const ST_CDATA = 8;
const ST_CDATA = 8;
const ST_COMMENT = 9;
const ST_COMMENT = 9;
const ST_DOCTYPE = 10;
const ST_DOCTYPE = 10;
const ST_XMLDEC = 11;
const ST_XMLDEC = 11;
const ST_PREPROC = 12;
const ST_PREPROC = 12;
const ST_ATTR_KEY = 13;
const ST_ATTR_KEY = 13;
const ST_ATTR_EQ = 14;
const ST_ATTR_EQ = 14;
const ST_ATTR_QUOTE = 15;
const ST_ATTR_QUOTE = 15;
const ST_ATTR_VALUE = 16;
const ST_ATTR_VALUE = 16;
const BOM_STR = "\xef\xbb\xbf";
const BOM_STR = "\xef\xbb\xbf";
static $state_names = array(
static $state_names = array(
self::ST_ROOT => 'root node',
self::ST_ROOT => 'root node',
self::ST_TEXT => 'text',
self::ST_TEXT => 'text',
self::ST_LT => 'start of tag',
self::ST_LT => 'start of tag',
self::ST_TAG_NAME => 'tag name',
self::ST_TAG_NAME => 'tag name',
self::ST_TAG_CLOSE => 'closing tag',
self::ST_TAG_CLOSE => 'closing tag',
self::ST_TAG_SINGLE => 'self-closing tag',
self::ST_TAG_SINGLE => 'self-closing tag',
self::ST_TAG_ATTRIBUTES => 'tag',
self::ST_TAG_ATTRIBUTES => 'tag',
self::ST_TAG_BETWEEN_ATTRIBUTE => 'tag attributes',
self::ST_TAG_BETWEEN_ATTRIBUTE => 'tag attributes',
self::ST_CDATA => 'CDATA',
self::ST_CDATA => 'CDATA',
self::ST_COMMENT => 'comment',
self::ST_COMMENT => 'comment',
self::ST_DOCTYPE => 'doctype',
self::ST_DOCTYPE => 'doctype',
self::ST_XMLDEC => 'XML declaration',
self::ST_XMLDEC => 'XML declaration',
self::ST_PREPROC => 'preprocessor directive',
self::ST_PREPROC => 'preprocessor directive',
self::ST_ATTR_KEY => 'attribute name',
self::ST_ATTR_KEY => 'attribute name',
self::ST_ATTR_EQ => 'attribute value',
self::ST_ATTR_EQ => 'attribute value',
self::ST_ATTR_QUOTE => 'quoted attribute value',
self::ST_ATTR_QUOTE => 'quoted attribute value',
self::ST_ATTR_VALUE => 'unquoted attribute value',
self::ST_ATTR_VALUE => 'unquoted attribute value',
);
);
private $input_encoding;
private $input_encoding;
public function __construct($input_encoding)
public function __construct($input_encoding)
{
{
$this->input_encoding = $input_encoding;
$this->input_encoding = $input_encoding;
$this->_file = "<string>";
$this->_file = "<string>";
}
}
public function parseFile(PHPTAL_Dom_DocumentBuilder $builder, $src)
public function parseFile(PHPTAL_Dom_DocumentBuilder $builder, $src)
{
{
if (!file_exists($src)) {
if (!file_exists($src)) {
throw new PHPTAL_IOException("file $src not found");
throw new PHPTAL_IOException("file $src not found");
}
}
return $this->parseString($builder, file_get_contents($src), $src);
return $this->parseString($builder, file_get_contents($src), $src);
}
}
public function parseString(PHPTAL_Dom_DocumentBuilder $builder, $src, $filename = '<string>')
public function parseString(PHPTAL_Dom_DocumentBuilder $builder, $src, $filename = '<string>')
{
{
try
try
{
{
$builder->setEncoding($this->input_encoding);
$builder->setEncoding($this->input_encoding);
$this->_file = $filename;
$this->_file = $filename;
$this->_line = 1;
$this->_line = 1;
$state = self::ST_ROOT;
$state = self::ST_ROOT;
$mark = 0;
$mark = 0;
$len = strlen($src);
$len = strlen($src);
$quoteStyle = '"';
$quoteStyle = '"';
$tagname = "";
$tagname = "";
$attribute = "";
$attribute = "";
$attributes = array();
$attributes = array();
$customDoctype = false;
$customDoctype = false;
$builder->setSource($this->_file, $this->_line);
$builder->setSource($this->_file, $this->_line);
$builder->onDocumentStart();
$builder->onDocumentStart();
$i=0;
$i=0;
// remove BOM (UTF-8 byte order mark)...
// remove BOM (UTF-8 byte order mark)...
if (substr($src, 0, 3) === self::BOM_STR) {
if (substr($src, 0, 3) === self::BOM_STR) {
$i=3;
$i=3;
}
}
for (; $i<$len; $i++) {
for (; $i<$len; $i++) {
$c = $src[$i]; // Change to substr($src, $i, 1); if you want to use mb_string.func_overload
$c = $src[$i]; // Change to substr($src, $i, 1); if you want to use mb_string.func_overload
if ($c === "\n") $builder->setSource($this->_file, ++$this->_line);
if ($c === "\n") $builder->setSource($this->_file, ++$this->_line);
switch ($state) {
switch ($state) {
case self::ST_ROOT:
case self::ST_ROOT:
if ($c === '<') {
if ($c === '<') {
$mark = $i; // mark tag start
$mark = $i; // mark tag start
$state = self::ST_LT;
$state = self::ST_LT;
} elseif (!self::isWhiteChar($c)) {
} elseif (!self::isWhiteChar($c)) {
$this->raiseError("Characters found before beginning of the document! (wrap document in < tal:block > to avoid this error)");
$this->raiseError("Characters found before beginning of the document! (wrap document in < tal:block > to avoid this error)");
}
}
break;
break;
case self::ST_TEXT:
case self::ST_TEXT:
if ($c === '<') {
if ($c === '<') {
if ($mark != $i) {
if ($mark != $i) {
$builder->onElementData($this->sanitizeEscapedText($this->checkEncoding(substr($src, $mark, $i-$mark))));
$builder->onElementData($this->sanitizeEscapedText($this->checkEncoding(substr($src, $mark, $i-$mark))));
}
}
$mark = $i;
$mark = $i;
$state = self::ST_LT;
$state = self::ST_LT;
}
}
break;
break;
case self::ST_LT:
case self::ST_LT:
if ($c === '/') {
if ($c === '/') {
$mark = $i+1;
$mark = $i+1;
$state = self::ST_TAG_CLOSE;
$state = self::ST_TAG_CLOSE;
} elseif ($c === '?' and strtolower(substr($src, $i, 5)) === '?xml ') {
} elseif ($c === '?' and strtolower(substr($src, $i, 5)) === '?xml ') {
$state = self::ST_XMLDEC;
$state = self::ST_XMLDEC;
} elseif ($c === '?') {
} elseif ($c === '?') {
$state = self::ST_PREPROC;
$state = self::ST_PREPROC;
} elseif ($c === '!' and substr($src, $i, 3) === '!--') {
} elseif ($c === '!' and substr($src, $i, 3) === '!--') {
$state = self::ST_COMMENT;
$state = self::ST_COMMENT;
} elseif ($c === '!' and substr($src, $i, 8) === '![CDATA[') {
} elseif ($c === '!' and substr($src, $i, 8) === '![CDATA[') {
$state = self::ST_CDATA;
$state = self::ST_CDATA;
$mark = $i+8; // past opening tag
$mark = $i+8; // past opening tag
} elseif ($c === '!' and strtoupper(substr($src, $i, 8)) === '!DOCTYPE') {
} elseif ($c === '!' and strtoupper(substr($src, $i, 8)) === '!DOCTYPE') {
$state = self::ST_DOCTYPE;
$state = self::ST_DOCTYPE;
} elseif (self::isWhiteChar($c)) {
} elseif (self::isWhiteChar($c)) {
$state = self::ST_TEXT;
$state = self::ST_TEXT;
} else {
} else {
$mark = $i; // mark node name start
$mark = $i; // mark node name start
$attributes = array();
$attributes = array();
$attribute = "";
$attribute = "";
$state = self::ST_TAG_NAME;
$state = self::ST_TAG_NAME;
}
}
break;
break;
case self::ST_TAG_NAME:
case self::ST_TAG_NAME:
if (self::isWhiteChar($c) || $c === '/' || $c === '>') {
if (self::isWhiteChar($c) || $c === '/' || $c === '>') {
$tagname = substr($src, $mark, $i-$mark);
$tagname = substr($src, $mark, $i-$mark);
if (!$this->isValidQName($tagname)) $this->raiseError("Invalid tag name '$tagname'");
if (!$this->isValidQName($tagname)) $this->raiseError("Invalid tag name '$tagname'");
if ($c === '/') {
if ($c === '/') {
$state = self::ST_TAG_SINGLE;
$state = self::ST_TAG_SINGLE;
} elseif ($c === '>') {
} elseif ($c === '>') {
$mark = $i+1; // mark text start
$mark = $i+1; // mark text start
$state = self::ST_TEXT;
$state = self::ST_TEXT;
$builder->onElementStart($tagname, $attributes);
$builder->onElementStart($tagname, $attributes);
} else /* isWhiteChar */ {
} else /* isWhiteChar */ {
$state = self::ST_TAG_ATTRIBUTES;
$state = self::ST_TAG_ATTRIBUTES;
}
}
}
}
break;
break;
case self::ST_TAG_CLOSE:
case self::ST_TAG_CLOSE:
if ($c === '>') {
if ($c === '>') {
$tagname = rtrim(substr($src, $mark, $i-$mark));
$tagname = rtrim(substr($src, $mark, $i-$mark));
$builder->onElementClose($tagname);
$builder->onElementClose($tagname);
$mark = $i+1; // mark text start
$mark = $i+1; // mark text start
$state = self::ST_TEXT;
$state = self::ST_TEXT;
}
}
break;
break;
case self::ST_TAG_SINGLE:
case self::ST_TAG_SINGLE:
if ($c !== '>') {
if ($c !== '>') {
$this->raiseError("Expected '/>', but found '/$c' inside tag < $tagname >");
$this->raiseError("Expected '/>', but found '/$c' inside tag < $tagname >");
}
}
$mark = $i+1; // mark text start
$mark = $i+1; // mark text start
$state = self::ST_TEXT;
$state = self::ST_TEXT;
$builder->onElementStart($tagname, $attributes);
$builder->onElementStart($tagname, $attributes);
$builder->onElementClose($tagname);
$builder->onElementClose($tagname);
break;
break;
case self::ST_TAG_BETWEEN_ATTRIBUTE:
case self::ST_TAG_BETWEEN_ATTRIBUTE:
case self::ST_TAG_ATTRIBUTES:
case self::ST_TAG_ATTRIBUTES:
if ($c === '>') {
if ($c === '>') {
$mark = $i+1; // mark text start
$mark = $i+1; // mark text start
$state = self::ST_TEXT;
$state = self::ST_TEXT;
$builder->onElementStart($tagname, $attributes);
$builder->onElementStart($tagname, $attributes);
} elseif ($c === '/') {
} elseif ($c === '/') {
$state = self::ST_TAG_SINGLE;
$state = self::ST_TAG_SINGLE;
} elseif (self::isWhiteChar($c)) {
} elseif (self::isWhiteChar($c)) {
$state = self::ST_TAG_ATTRIBUTES;
$state = self::ST_TAG_ATTRIBUTES;
} elseif ($state === self::ST_TAG_ATTRIBUTES && $this->isValidQName($c)) {
} elseif ($state === self::ST_TAG_ATTRIBUTES && $this->isValidQName($c)) {
$mark = $i; // mark attribute key start
$mark = $i; // mark attribute key start
$state = self::ST_ATTR_KEY;
$state = self::ST_ATTR_KEY;
} else $this->raiseError("Unexpected character '$c' between attributes of < $tagname >");
} else $this->raiseError("Unexpected character '$c' between attributes of < $tagname >");
break;
break;
case self::ST_COMMENT:
case self::ST_COMMENT:
if ($c === '>' && $i > $mark+4 && substr($src, $i-2, 2) === '--') {
if ($c === '>' && $i > $mark+4 && substr($src, $i-2, 2) === '--') {
if (preg_match('/^-|--|-$/', substr($src, $mark +4, $i-$mark+1 -7))) {
if (preg_match('/^-|--|-$/', substr($src, $mark +4, $i-$mark+1 -7))) {
$this->raiseError("Ill-formed comment. XML comments are not allowed to contain '--' or start/end with '-': ".substr($src, $mark+4, $i-$mark+1-7));
$this->raiseError("Ill-formed comment. XML comments are not allowed to contain '--' or start/end with '-': ".substr($src, $mark+4, $i-$mark+1-7));
}
}
$builder->onComment($this->checkEncoding(substr($src, $mark+4, $i-$mark+1-7)));
$builder->onComment($this->checkEncoding(substr($src, $mark+4, $i-$mark+1-7)));
$mark = $i+1; // mark text start
$mark = $i+1; // mark text start
$state = self::ST_TEXT;
$state = self::ST_TEXT;
}
}
break;
break;
case self::ST_CDATA:
case self::ST_CDATA:
if ($c === '>' and substr($src, $i-2, 2) === ']]') {
if ($c === '>' and substr($src, $i-2, 2) === ']]') {
$builder->onCDATASection($this->checkEncoding(substr($src, $mark, $i-$mark-2)));
$builder->onCDATASection($this->checkEncoding(substr($src, $mark, $i-$mark-2)));
$mark = $i+1; // mark text start
$mark = $i+1; // mark text start
$state = self::ST_TEXT;
$state = self::ST_TEXT;
}
}
break;
break;
case self::ST_XMLDEC:
case self::ST_XMLDEC:
if ($c === '?' && substr($src, $i, 2) === '?>') {
if ($c === '?' && substr($src, $i, 2) === '?>') {
$builder->onXmlDecl($this->checkEncoding(substr($src, $mark, $i-$mark+2)));
$builder->onXmlDecl($this->checkEncoding(substr($src, $mark, $i-$mark+2)));
$i++; // skip '>'
$i++; // skip '>'
$mark = $i+1; // mark text start
$mark = $i+1; // mark text start
$state = self::ST_TEXT;
$state = self::ST_TEXT;
}
}
break;
break;
case self::ST_DOCTYPE:
case self::ST_DOCTYPE:
if ($c === '[') {
if ($c === '[') {
$customDoctype = true;
$customDoctype = true;
} elseif ($customDoctype && $c === '>' && substr($src, $i-1, 2) === ']>') {
} elseif ($customDoctype && $c === '>' && substr($src, $i-1, 2) === ']>') {
$customDoctype = false;
$customDoctype = false;
$builder->onDocType($this->checkEncoding(substr($src, $mark, $i-$mark+1)));
$builder->onDocType($this->checkEncoding(substr($src, $mark, $i-$mark+1)));
$mark = $i+1; // mark text start
$mark = $i+1; // mark text start
$state = self::ST_TEXT;
$state = self::ST_TEXT;
} elseif (!$customDoctype && $c === '>') {
} elseif (!$customDoctype && $c === '>') {
$customDoctype = false;
$customDoctype = false;
$builder->onDocType($this->checkEncoding(substr($src, $mark, $i-$mark+1)));
$builder->onDocType($this->checkEncoding(substr($src, $mark, $i-$mark+1)));
$mark = $i+1; // mark text start
$mark = $i+1; // mark text start
$state = self::ST_TEXT;
$state = self::ST_TEXT;
}
}
break;
break;
case self::ST_PREPROC:
case self::ST_PREPROC:
if ($c === '>' and substr($src, $i-1, 1) === '?') {
if ($c === '>' and substr($src, $i-1, 1) === '?') {
$builder->onProcessingInstruction($this->checkEncoding(substr($src, $mark, $i-$mark+1)));
$builder->onProcessingInstruction($this->checkEncoding(substr($src, $mark, $i-$mark+1)));
$mark = $i+1; // mark text start
$mark = $i+1; // mark text start
$state = self::ST_TEXT;
$state = self::ST_TEXT;
}
}
break;
break;
case self::ST_ATTR_KEY:
case self::ST_ATTR_KEY:
if ($c === '=' || self::isWhiteChar($c)) {
if ($c === '=' || self::isWhiteChar($c)) {
$attribute = substr($src, $mark, $i-$mark);
$attribute = substr($src, $mark, $i-$mark);
if (!$this->isValidQName($attribute)) {
if (!$this->isValidQName($attribute)) {
$this->raiseError("Invalid attribute name '$attribute' in < $tagname >");
$this->raiseError("Invalid attribute name '$attribute' in < $tagname >");
}
}
if (isset($attributes[$attribute])) {
if (isset($attributes[$attribute])) {
$this->raiseError("Attribute $attribute in < $tagname > is defined more than once");
$this->raiseError("Attribute $attribute in < $tagname > is defined more than once");
}
}
if ($c === '=') $state = self::ST_ATTR_VALUE;
if ($c === '=') $state = self::ST_ATTR_VALUE;
else /* white char */ $state = self::ST_ATTR_EQ;
else /* white char */ $state = self::ST_ATTR_EQ;
} elseif ($c === '/' || $c==='>') {
} elseif ($c === '/' || $c==='>') {
$attribute = substr($src, $mark, $i-$mark);
$attribute = substr($src, $mark, $i-$mark);
if (!$this->isValidQName($attribute)) {
if (!$this->isValidQName($attribute)) {
$this->raiseError("Invalid attribute name '$attribute'");
$this->raiseError("Invalid attribute name '$attribute'");
}
}
$this->raiseError("Attribute $attribute does not have value (found end of tag instead of '=')");
$this->raiseError("Attribute $attribute does not have value (found end of tag instead of '=')");
}
}
break;
break;
case self::ST_ATTR_EQ:
case self::ST_ATTR_EQ:
if ($c === '=') {
if ($c === '=') {
$state = self::ST_ATTR_VALUE;
$state = self::ST_ATTR_VALUE;
} elseif (!self::isWhiteChar($c)) {
} elseif (!self::isWhiteChar($c)) {
$this->raiseError("Attribute $attribute in < $tagname > does not have value (found character '$c' instead of '=')");
$this->raiseError("Attribute $attribute in < $tagname > does not have value (found character '$c' instead of '=')");
}
}
break;
break;
case self::ST_ATTR_VALUE:
case self::ST_ATTR_VALUE:
if (self::isWhiteChar($c)) {
if (self::isWhiteChar($c)) {
} elseif ($c === '"' or $c === '\'') {
} elseif ($c === '"' or $c === '\'') {
$quoteStyle = $c;
$quoteStyle = $c;
$state = self::ST_ATTR_QUOTE;
$state = self::ST_ATTR_QUOTE;
$mark = $i+1; // mark attribute real value start
$mark = $i+1; // mark attribute real value start
} else {
} else {
$this->raiseError("Value of attribute $attribute in < $tagname > is not in quotes (found character '$c' instead of quote)");
$this->raiseError("Value of attribute $attribute in < $tagname > is not in quotes (found character '$c' instead of quote)");
}
}
break;
break;
case self::ST_ATTR_QUOTE:
case self::ST_ATTR_QUOTE:
if ($c === $quoteStyle) {
if ($c === $quoteStyle) {
$attributes[$attribute] = $this->sanitizeEscapedText($this->checkEncoding(substr($src, $mark, $i-$mark)));
$attributes[$attribute] = $this->sanitizeEscapedText($this->checkEncoding(substr($src, $mark, $i-$mark)));
// PHPTAL's code generator assumes input is escaped for double-quoted strings. Single-quoted attributes need to be converted.
// PHPTAL's code generator assumes input is escaped for double-quoted strings. Single-quoted attributes need to be converted.
// FIXME: it should be escaped at later stage.
// FIXME: it should be escaped at later stage.
$attributes[$attribute] = str_replace('"',""", $attributes[$attribute]);
$attributes[$attribute] = str_replace('"',""", $attributes[$attribute]);
$state = self::ST_TAG_BETWEEN_ATTRIBUTE;
$state = self::ST_TAG_BETWEEN_ATTRIBUTE;
}
}
break;
break;
}
}
}
}
if ($state === self::ST_TEXT) // allows text past root node, which is in violation of XML spec
if ($state === self::ST_TEXT) // allows text past root node, which is in violation of XML spec
{
{
if ($i > $mark) {
if ($i > $mark) {
$text = substr($src, $mark, $i-$mark);
$text = substr($src, $mark, $i-$mark);
if (!ctype_space($text)) $this->raiseError("Characters found after end of the root element (wrap document in < tal:block > to avoid this error)");
if (!ctype_space($text)) $this->raiseError("Characters found after end of the root element (wrap document in < tal:block > to avoid this error)");
}
}
} else {
} else {
if ($state === self::ST_ROOT) {
if ($state === self::ST_ROOT) {
$msg = "Document does not have any tags";
$msg = "Document does not have any tags";
} else {
} else {
$msg = "Finished document in unexpected state: ".self::$state_names[$state]." is not finished";
$msg = "Finished document in unexpected state: ".self::$state_names[$state]." is not finished";
}
}
$this->raiseError($msg);
$this->raiseError($msg);
}
}
$builder->onDocumentEnd();
$builder->onDocumentEnd();
}
}
catch(PHPTAL_TemplateException $e)
catch(PHPTAL_TemplateException $e)
{
{
$e->hintSrcPosition($this->_file, $this->_line);
$e->hintSrcPosition($this->_file, $this->_line);
throw $e;
throw $e;
}
}
return $builder;
return $builder;
}
}
private function isValidQName($name)
private function isValidQName($name)
{
{
$name = $this->checkEncoding($name);
$name = $this->checkEncoding($name);
return preg_match('/^([a-z_\x80-\xff]+[a-z0-9._\x80-\xff-]*:)?[a-z_\x80-\xff]+[a-z0-9._\x80-\xff-]*$/i', $name);
return preg_match('/^([a-z_\x80-\xff]+[a-z0-9._\x80-\xff-]*:)?[a-z_\x80-\xff]+[a-z0-9._\x80-\xff-]*$/i', $name);
}
}
private function checkEncoding($str)
private function checkEncoding($str)
{
{
if ($str === '') return '';
if ($str === '') return '';
if ($this->input_encoding === 'UTF-8') {
if ($this->input_encoding === 'UTF-8') {
// $match expression below somehow triggers quite deep recurrency and stack overflow in preg
// $match expression below somehow triggers quite deep recurrency and stack overflow in preg
// to avoid this, check string bit by bit, omitting ASCII fragments.
// to avoid this, check string bit by bit, omitting ASCII fragments.
if (strlen($str) > 200) {
if (strlen($str) > 200) {
$chunks = preg_split('/(?>[\x09\x0A\x0D\x20-\x7F]+)/',$str,null,PREG_SPLIT_NO_EMPTY);
$chunks = preg_split('/(?>[\x09\x0A\x0D\x20-\x7F]+)/',$str,null,PREG_SPLIT_NO_EMPTY);
foreach ($chunks as $chunk) {
foreach ($chunks as $chunk) {
if (strlen($chunk) < 200) {
if (strlen($chunk) < 200) {
$this->checkEncoding($chunk);
$this->checkEncoding($chunk);
}
}
}
}
return $str;
return $str;
}
}
// http://www.w3.org/International/questions/qa-forms-utf-8
// http://www.w3.org/International/questions/qa-forms-utf-8
$match = '[\x09\x0A\x0D\x20-\x7F]' // ASCII
$match = '[\x09\x0A\x0D\x20-\x7F]' // ASCII
. '|[\xC2-\xDF][\x80-\xBF]' // non-overlong 2-byte
. '|[\xC2-\xDF][\x80-\xBF]' // non-overlong 2-byte
. '|\xE0[\xA0-\xBF][\x80-\xBF]' // excluding overlongs
. '|\xE0[\xA0-\xBF][\x80-\xBF]' // excluding overlongs
. '|[\xE1-\xEC\xEE\xEE][\x80-\xBF]{2}' // straight 3-byte (exclude FFFE and FFFF)
. '|[\xE1-\xEC\xEE\xEE][\x80-\xBF]{2}' // straight 3-byte (exclude FFFE and FFFF)
. '|\xEF[\x80-\xBE][\x80-\xBF]' // straight 3-byte
. '|\xEF[\x80-\xBE][\x80-\xBF]' // straight 3-byte
. '|\xEF\xBF[\x80-\xBD]' // straight 3-byte
. '|\xEF\xBF[\x80-\xBD]' // straight 3-byte
. '|\xED[\x80-\x9F][\x80-\xBF]' // excluding surrogates
. '|\xED[\x80-\x9F][\x80-\xBF]' // excluding surrogates
. '|\xF0[\x90-\xBF][\x80-\xBF]{2}' // planes 1-3
. '|\xF0[\x90-\xBF][\x80-\xBF]{2}' // planes 1-3
. '|[\xF1-\xF3][\x80-\xBF]{3}' // planes 4-15
. '|[\xF1-\xF3][\x80-\xBF]{3}' // planes 4-15
. '|\xF4[\x80-\x8F][\x80-\xBF]{2}'; // plane 16
. '|\xF4[\x80-\x8F][\x80-\xBF]{2}'; // plane 16
if (!preg_match('/^(?:(?>'.$match.'))+$/s',$str)) {
if (!preg_match('/^(?:(?>'.$match.'))+$/s',$str)) {
$res = preg_split('/((?>'.$match.')+)/s',$str,null,PREG_SPLIT_DELIM_CAPTURE);
$res = preg_split('/((?>'.$match.')+)/s',$str,null,PREG_SPLIT_DELIM_CAPTURE);
for($i=0; $i < count($res); $i+=2)
for($i=0; $i < count($res); $i+=2)
{
{
$res[$i] = self::convertBytesToEntities(array(1=>$res[$i]));
$res[$i] = self::convertBytesToEntities(array(1=>$res[$i]));
}
}
$this->raiseError("Invalid UTF-8 bytes: ".implode('', $res));
$this->raiseError("Invalid UTF-8 bytes: ".implode('', $res));
}
}
}
}
if ($this->input_encoding === 'ISO-8859-1') {
if ($this->input_encoding === 'ISO-8859-1') {
// http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-RestrictedChar
// http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-RestrictedChar
$forbid = '/((?>[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x84\x86-\x9F]+))/s';
$forbid = '/((?>[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x84\x86-\x9F]+))/s';
if (preg_match($forbid, $str)) {
if (preg_match($forbid, $str)) {
$str = preg_replace_callback($forbid, array('self', 'convertBytesToEntities'), $str);
$str = preg_replace_callback($forbid, array('self', 'convertBytesToEntities'), $str);
$this->raiseError("Invalid ISO-8859-1 characters: ".$str);
$this->raiseError("Invalid ISO-8859-1 characters: ".$str);
}
}
}
}
return $str;
return $str;
}
}
/**
/**
* preg callback
* preg callback
* Changes all bytes to hexadecimal XML entities
* Changes all bytes to hexadecimal XML entities
*
*
* @param array $m first array element is used for input
* @param array $m first array element is used for input
*
*
* @return string
* @return string
*/
*/
private static function convertBytesToEntities(array $m)
private static function convertBytesToEntities(array $m)
{
{
$m = $m[1]; $out = '';
$m = $m[1]; $out = '';
for($i=0; $i < strlen($m); $i++)
for($i=0; $i < strlen($m); $i++)
{
{
$out .= '&#X'.strtoupper(dechex(ord($m[$i]))).';';
$out .= '&#X'.strtoupper(dechex(ord($m[$i]))).';';
}
}
return $out;
return $out;
}
}
/**
/**
* This is where this parser violates XML and refuses to be an annoying bastard.
* This is where this parser violates XML and refuses to be an annoying bastard.
*/
*/
private function sanitizeEscapedText($str)
private function sanitizeEscapedText($str)
{
{
$str = str_replace(''', ''', $str); // PHP's html_entity_decode doesn't seem to support that!
$str = str_replace(''', ''', $str); // PHP's html_entity_decode doesn't seem to support that!
/* <?php ?> blocks can't reliably work in attributes (due to escaping impossible in XML)
/* <?php ?> blocks can't reliably work in attributes (due to escaping impossible in XML)
so they have to be converted into special TALES expression
so they have to be converted into special TALES expression
*/
*/
$types = version_compare(PHP_VERSION, '5.4.0') < 0 ? (ini_get('short_open_tag') ? 'php|=|' : 'php') : 'php|=';
$types = version_compare(PHP_VERSION, '5.4.0') < 0 ? (ini_get('short_open_tag') ? 'php|=|' : 'php') : 'php|=';
$str = preg_replace_callback("/<\?($types)(.*?)\?>/", array('self', 'convertPHPBlockToTALES'), $str);
$str = preg_replace_callback("/<\?($types)(.*?)\?>/", array('self', 'convertPHPBlockToTALES'), $str);
// corrects all non-entities and neutralizes potentially problematic CDATA end marker
// corrects all non-entities and neutralizes potentially problematic CDATA end marker
$str = strtr(preg_replace('/&(?!(?:#x?[a-f0-9]+|[a-z][a-z0-9]*);)/i', '&', $str), array('<'=>'<', ']]>'=>']]>'));
$str = strtr(preg_replace('/&(?!(?:#x?[a-f0-9]+|[a-z][a-z0-9]*);)/i', '&', $str), array('<'=>'<', ']]>'=>']]>'));
return $str;
return $str;
}
}
private static function convertPHPBlockToTALES($m)
private static function convertPHPBlockToTALES($m)
{
{
list(, $type, $code) = $m;
list(, $type, $code) = $m;
if ($type === '=') $code = 'echo '.$code;
if ($type === '=') $code = 'echo '.$code;
return '${structure phptal-internal-php-block:'.rawurlencode($code).'}';
return '${structure phptal-internal-php-block:'.rawurlencode($code).'}';
}
}
public function getSourceFile()
public function getSourceFile()
{
{
return $this->_file;
return $this->_file;
}
}
public function getLineNumber()
public function getLineNumber()
{
{
return $this->_line;
return $this->_line;
}
}
public static function isWhiteChar($c)
public static function isWhiteChar($c)
{
{
return strpos(" \t\n\r\0", $c) !== false;
return strpos(" \t\n\r\0", $c) !== false;
}
}
protected function raiseError($errStr)
protected function raiseError($errStr)
{
{
throw new PHPTAL_ParserException($errStr, $this->_file, $this->_line);
throw new PHPTAL_ParserException($errStr, $this->_file, $this->_line);
}
}
}
}
저장된 비교 결과
원본
파일 열기
<?php /** * PHPTAL templating engine * * PHP Version 5 * * @category HTML * @package PHPTAL * @author Laurent Bedubourg <lbedubourg@motion-twin.com> * @author Kornel Lesiński <kornel@aardvarkmedia.co.uk> * @license http://www.gnu.org/licenses/lgpl.html GNU Lesser General Public License * @version SVN: $Id$ * @link http://phptal.org/ */ /** * Simple sax like xml parser for PHPTAL * ("Dom" in the class name comes from name of the directory, not mode of operation) * * At the time this parser was created, standard PHP libraries were not suitable * (could not retrieve doctypes, xml declaration, problems with comments and CDATA). * * There are still some problems: XML parsers don't care about exact format of enties * or CDATA sections (PHPTAL tries to preserve them), * <?php ?> blocks are not allowed in attributes. * * This parser failed to enforce some XML well-formedness constraints, * and there are ill-formed templates "in the wild" because of this. * * @package PHPTAL * @subpackage Dom * @see PHPTAL_DOM_DocumentBuilder */ class PHPTAL_Dom_SaxXmlParser { private $_file; private $_line; private $_source; // available parser states const ST_ROOT = 0; const ST_TEXT = 1; const ST_LT = 2; const ST_TAG_NAME = 3; const ST_TAG_CLOSE = 4; const ST_TAG_SINGLE = 5; const ST_TAG_ATTRIBUTES = 6; const ST_TAG_BETWEEN_ATTRIBUTE = 7; const ST_CDATA = 8; const ST_COMMENT = 9; const ST_DOCTYPE = 10; const ST_XMLDEC = 11; const ST_PREPROC = 12; const ST_ATTR_KEY = 13; const ST_ATTR_EQ = 14; const ST_ATTR_QUOTE = 15; const ST_ATTR_VALUE = 16; const BOM_STR = "\xef\xbb\xbf"; static $state_names = array( self::ST_ROOT => 'root node', self::ST_TEXT => 'text', self::ST_LT => 'start of tag', self::ST_TAG_NAME => 'tag name', self::ST_TAG_CLOSE => 'closing tag', self::ST_TAG_SINGLE => 'self-closing tag', self::ST_TAG_ATTRIBUTES => 'tag', self::ST_TAG_BETWEEN_ATTRIBUTE => 'tag attributes', self::ST_CDATA => 'CDATA', self::ST_COMMENT => 'comment', self::ST_DOCTYPE => 'doctype', self::ST_XMLDEC => 'XML declaration', self::ST_PREPROC => 'preprocessor directive', self::ST_ATTR_KEY => 'attribute name', self::ST_ATTR_EQ => 'attribute value', self::ST_ATTR_QUOTE => 'quoted attribute value', self::ST_ATTR_VALUE => 'unquoted attribute value', ); private $input_encoding; public function __construct($input_encoding) { $this->input_encoding = $input_encoding; $this->_file = "<string>"; } public function parseFile(PHPTAL_Dom_DocumentBuilder $builder, $src) { if (!file_exists($src)) { throw new PHPTAL_IOException("file $src not found"); } return $this->parseString($builder, file_get_contents($src), $src); } public function parseString(PHPTAL_Dom_DocumentBuilder $builder, $src, $filename = '<string>') { try { $builder->setEncoding($this->input_encoding); $this->_file = $filename; $this->_line = 1; $state = self::ST_ROOT; $mark = 0; $len = strlen($src); $quoteStyle = '"'; $tagname = ""; $attribute = ""; $attributes = array(); $customDoctype = false; $builder->setSource($this->_file, $this->_line); $builder->onDocumentStart(); $i=0; // remove BOM (UTF-8 byte order mark)... if (substr($src, 0, 3) === self::BOM_STR) { $i=3; } for (; $i<$len; $i++) { $c = $src[$i]; // Change to substr($src, $i, 1); if you want to use mb_string.func_overload if ($c === "\n") $builder->setSource($this->_file, ++$this->_line); switch ($state) { case self::ST_ROOT: if ($c === '<') { $mark = $i; // mark tag start $state = self::ST_LT; } elseif (!self::isWhiteChar($c)) { $this->raiseError("Characters found before beginning of the document! (wrap document in < tal:block > to avoid this error)"); } break; case self::ST_TEXT: if ($c === '<') { if ($mark != $i) { $builder->onElementData($this->sanitizeEscapedText($this->checkEncoding(substr($src, $mark, $i-$mark)))); } $mark = $i; $state = self::ST_LT; } break; case self::ST_LT: if ($c === '/') { $mark = $i+1; $state = self::ST_TAG_CLOSE; } elseif ($c === '?' and strtolower(substr($src, $i, 5)) === '?xml ') { $state = self::ST_XMLDEC; } elseif ($c === '?') { $state = self::ST_PREPROC; } elseif ($c === '!' and substr($src, $i, 3) === '!--') { $state = self::ST_COMMENT; } elseif ($c === '!' and substr($src, $i, 8) === '![CDATA[') { $state = self::ST_CDATA; $mark = $i+8; // past opening tag } elseif ($c === '!' and strtoupper(substr($src, $i, 8)) === '!DOCTYPE') { $state = self::ST_DOCTYPE; } elseif (self::isWhiteChar($c)) { $state = self::ST_TEXT; } else { $mark = $i; // mark node name start $attributes = array(); $attribute = ""; $state = self::ST_TAG_NAME; } break; case self::ST_TAG_NAME: if (self::isWhiteChar($c) || $c === '/' || $c === '>') { $tagname = substr($src, $mark, $i-$mark); if (!$this->isValidQName($tagname)) $this->raiseError("Invalid tag name '$tagname'"); if ($c === '/') { $state = self::ST_TAG_SINGLE; } elseif ($c === '>') { $mark = $i+1; // mark text start $state = self::ST_TEXT; $builder->onElementStart($tagname, $attributes); } else /* isWhiteChar */ { $state = self::ST_TAG_ATTRIBUTES; } } break; case self::ST_TAG_CLOSE: if ($c === '>') { $tagname = rtrim(substr($src, $mark, $i-$mark)); $builder->onElementClose($tagname); $mark = $i+1; // mark text start $state = self::ST_TEXT; } break; case self::ST_TAG_SINGLE: if ($c !== '>') { $this->raiseError("Expected '/>', but found '/$c' inside tag < $tagname >"); } $mark = $i+1; // mark text start $state = self::ST_TEXT; $builder->onElementStart($tagname, $attributes); $builder->onElementClose($tagname); break; case self::ST_TAG_BETWEEN_ATTRIBUTE: case self::ST_TAG_ATTRIBUTES: if ($c === '>') { $mark = $i+1; // mark text start $state = self::ST_TEXT; $builder->onElementStart($tagname, $attributes); } elseif ($c === '/') { $state = self::ST_TAG_SINGLE; } elseif (self::isWhiteChar($c)) { $state = self::ST_TAG_ATTRIBUTES; } elseif ($state === self::ST_TAG_ATTRIBUTES && $this->isValidQName($c)) { $mark = $i; // mark attribute key start $state = self::ST_ATTR_KEY; } else $this->raiseError("Unexpected character '$c' between attributes of < $tagname >"); break; case self::ST_COMMENT: if ($c === '>' && $i > $mark+4 && substr($src, $i-2, 2) === '--') { if (preg_match('/^-|--|-$/', substr($src, $mark +4, $i-$mark+1 -7))) { $this->raiseError("Ill-formed comment. XML comments are not allowed to contain '--' or start/end with '-': ".substr($src, $mark+4, $i-$mark+1-7)); } $builder->onComment($this->checkEncoding(substr($src, $mark+4, $i-$mark+1-7))); $mark = $i+1; // mark text start $state = self::ST_TEXT; } break; case self::ST_CDATA: if ($c === '>' and substr($src, $i-2, 2) === ']]') { $builder->onCDATASection($this->checkEncoding(substr($src, $mark, $i-$mark-2))); $mark = $i+1; // mark text start $state = self::ST_TEXT; } break; case self::ST_XMLDEC: if ($c === '?' && substr($src, $i, 2) === '?>') { $builder->onXmlDecl($this->checkEncoding(substr($src, $mark, $i-$mark+2))); $i++; // skip '>' $mark = $i+1; // mark text start $state = self::ST_TEXT; } break; case self::ST_DOCTYPE: if ($c === '[') { $customDoctype = true; } elseif ($customDoctype && $c === '>' && substr($src, $i-1, 2) === ']>') { $customDoctype = false; $builder->onDocType($this->checkEncoding(substr($src, $mark, $i-$mark+1))); $mark = $i+1; // mark text start $state = self::ST_TEXT; } elseif (!$customDoctype && $c === '>') { $customDoctype = false; $builder->onDocType($this->checkEncoding(substr($src, $mark, $i-$mark+1))); $mark = $i+1; // mark text start $state = self::ST_TEXT; } break; case self::ST_PREPROC: if ($c === '>' and substr($src, $i-1, 1) === '?') { $builder->onProcessingInstruction($this->checkEncoding(substr($src, $mark, $i-$mark+1))); $mark = $i+1; // mark text start $state = self::ST_TEXT; } break; case self::ST_ATTR_KEY: if ($c === '=' || self::isWhiteChar($c)) { $attribute = substr($src, $mark, $i-$mark); if (!$this->isValidQName($attribute)) { $this->raiseError("Invalid attribute name '$attribute' in < $tagname >"); } if (isset($attributes[$attribute])) { $this->raiseError("Attribute $attribute in < $tagname > is defined more than once"); } if ($c === '=') $state = self::ST_ATTR_VALUE; else /* white char */ $state = self::ST_ATTR_EQ; } elseif ($c === '/' || $c==='>') { $attribute = substr($src, $mark, $i-$mark); if (!$this->isValidQName($attribute)) { $this->raiseError("Invalid attribute name '$attribute'"); } $this->raiseError("Attribute $attribute does not have value (found end of tag instead of '=')"); } break; case self::ST_ATTR_EQ: if ($c === '=') { $state = self::ST_ATTR_VALUE; } elseif (!self::isWhiteChar($c)) { $this->raiseError("Attribute $attribute in < $tagname > does not have value (found character '$c' instead of '=')"); } break; case self::ST_ATTR_VALUE: if (self::isWhiteChar($c)) { } elseif ($c === '"' or $c === '\'') { $quoteStyle = $c; $state = self::ST_ATTR_QUOTE; $mark = $i+1; // mark attribute real value start } else { $this->raiseError("Value of attribute $attribute in < $tagname > is not in quotes (found character '$c' instead of quote)"); } break; case self::ST_ATTR_QUOTE: if ($c === $quoteStyle) { $attributes[$attribute] = $this->sanitizeEscapedText($this->checkEncoding(substr($src, $mark, $i-$mark))); // PHPTAL's code generator assumes input is escaped for double-quoted strings. Single-quoted attributes need to be converted. // FIXME: it should be escaped at later stage. $attributes[$attribute] = str_replace('"',""", $attributes[$attribute]); $state = self::ST_TAG_BETWEEN_ATTRIBUTE; } break; } } if ($state === self::ST_TEXT) // allows text past root node, which is in violation of XML spec { if ($i > $mark) { $text = substr($src, $mark, $i-$mark); if (!ctype_space($text)) $this->raiseError("Characters found after end of the root element (wrap document in < tal:block > to avoid this error)"); } } else { if ($state === self::ST_ROOT) { $msg = "Document does not have any tags"; } else { $msg = "Finished document in unexpected state: ".self::$state_names[$state]." is not finished"; } $this->raiseError($msg); } $builder->onDocumentEnd(); } catch(PHPTAL_TemplateException $e) { $e->hintSrcPosition($this->_file, $this->_line); throw $e; } return $builder; } private function isValidQName($name) { $name = $this->checkEncoding($name); return preg_match('/^([a-z_\x80-\xff]+[a-z0-9._\x80-\xff-]*:)?[a-z_\x80-\xff]+[a-z0-9._\x80-\xff-]*$/i', $name); } private function checkEncoding($str) { if ($str === '') return ''; if ($this->input_encoding === 'UTF-8') { // $match expression below somehow triggers quite deep recurrency and stack overflow in preg // to avoid this, check string bit by bit, omitting ASCII fragments. if (strlen($str) > 200) { $chunks = preg_split('/(?>[\x09\x0A\x0D\x20-\x7F]+)/',$str,null,PREG_SPLIT_NO_EMPTY); foreach ($chunks as $chunk) { if (strlen($chunk) < 200) { $this->checkEncoding($chunk); } } return $str; } // http://www.w3.org/International/questions/qa-forms-utf-8 $match = '[\x09\x0A\x0D\x20-\x7F]' // ASCII . '|[\xC2-\xDF][\x80-\xBF]' // non-overlong 2-byte . '|\xE0[\xA0-\xBF][\x80-\xBF]' // excluding overlongs . '|[\xE1-\xEC\xEE\xEE][\x80-\xBF]{2}' // straight 3-byte (exclude FFFE and FFFF) . '|\xEF[\x80-\xBE][\x80-\xBF]' // straight 3-byte . '|\xEF\xBF[\x80-\xBD]' // straight 3-byte . '|\xED[\x80-\x9F][\x80-\xBF]' // excluding surrogates . '|\xF0[\x90-\xBF][\x80-\xBF]{2}' // planes 1-3 . '|[\xF1-\xF3][\x80-\xBF]{3}' // planes 4-15 . '|\xF4[\x80-\x8F][\x80-\xBF]{2}'; // plane 16 if (!preg_match('/^(?:(?>'.$match.'))+$/s',$str)) { $res = preg_split('/((?>'.$match.')+)/s',$str,null,PREG_SPLIT_DELIM_CAPTURE); for($i=0; $i < count($res); $i+=2) { $res[$i] = self::convertBytesToEntities(array(1=>$res[$i])); } $this->raiseError("Invalid UTF-8 bytes: ".implode('', $res)); } } if ($this->input_encoding === 'ISO-8859-1') { // http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-RestrictedChar $forbid = '/((?>[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x84\x86-\x9F]+))/s'; if (preg_match($forbid, $str)) { $str = preg_replace_callback($forbid, array('self', 'convertBytesToEntities'), $str); $this->raiseError("Invalid ISO-8859-1 characters: ".$str); } } return $str; } /** * preg callback * Changes all bytes to hexadecimal XML entities * * @param array $m first array element is used for input * * @return string */ private static function convertBytesToEntities(array $m) { $m = $m[1]; $out = ''; for($i=0; $i < strlen($m); $i++) { $out .= '&#X'.strtoupper(dechex(ord($m[$i]))).';'; } return $out; } /** * This is where this parser violates XML and refuses to be an annoying bastard. */ private function sanitizeEscapedText($str) { $str = str_replace(''', ''', $str); // PHP's html_entity_decode doesn't seem to support that! /* <?php ?> blocks can't reliably work in attributes (due to escaping impossible in XML) so they have to be converted into special TALES expression */ $types = version_compare(PHP_VERSION, '5.4.0') < 0 ? (ini_get('short_open_tag') ? 'php|=|' : 'php') : 'php|='; $str = preg_replace_callback("/<\?($types)(.*?)\?>/", array('self', 'convertPHPBlockToTALES'), $str); // corrects all non-entities and neutralizes potentially problematic CDATA end marker $str = strtr(preg_replace('/&(?!(?:#x?[a-f0-9]+|[a-z][a-z0-9]*);)/i', '&', $str), array('<'=>'<', ']]>'=>']]>')); return $str; } private static function convertPHPBlockToTALES($m) { list(, $type, $code) = $m; if ($type === '=') $code = 'echo '.$code; return '${structure phptal-internal-php-block:'.rawurlencode($code).'}'; } public function getSourceFile() { return $this->_file; } public function getLineNumber() { return $this->_line; } public static function isWhiteChar($c) { return strpos(" \t\n\r\0", $c) !== false; } protected function raiseError($errStr) { throw new PHPTAL_ParserException($errStr, $this->_file, $this->_line); } }
수정본
파일 열기
<?php /** * PHPTAL templating engine * * PHP Version 5 * * @category HTML * @package PHPTAL * @author Laurent Bedubourg <lbedubourg@motion-twin.com> * @author Kornel Lesiński <kornel@aardvarkmedia.co.uk> * @license http://www.gnu.org/licenses/lgpl.html GNU Lesser General Public License * @version SVN: $Id$ * @link http://phptal.org/ */ /** * Simple sax like xml parser for PHPTAL * ("Dom" in the class name comes from name of the directory, not mode of operation) * * At the time this parser was created, standard PHP libraries were not suitable * (could not retrieve doctypes, xml declaration, problems with comments and CDATA). * * There are still some problems: XML parsers don't care about exact format of enties * or CDATA sections (PHPTAL tries to preserve them), * <?php ?> blocks are not allowed in attributes. * * This parser failed to enforce some XML well-formedness constraints, * and there are ill-formed templates "in the wild" because of this. * * @package PHPTAL * @subpackage Dom * @see PHPTAL_DOM_DocumentBuilder */ class PHPTAL_Dom_SaxXmlParser { private $_file; private $_line; private $_source; // available parser states const ST_ROOT = 0; const ST_TEXT = 1; const ST_LT = 2; const ST_TAG_NAME = 3; const ST_TAG_CLOSE = 4; const ST_TAG_SINGLE = 5; const ST_TAG_ATTRIBUTES = 6; const ST_TAG_BETWEEN_ATTRIBUTE = 7; const ST_CDATA = 8; const ST_COMMENT = 9; const ST_DOCTYPE = 10; const ST_XMLDEC = 11; const ST_PREPROC = 12; const ST_ATTR_KEY = 13; const ST_ATTR_EQ = 14; const ST_ATTR_QUOTE = 15; const ST_ATTR_VALUE = 16; const BOM_STR = "\xef\xbb\xbf"; static $state_names = array( self::ST_ROOT => 'root node', self::ST_TEXT => 'text', self::ST_LT => 'start of tag', self::ST_TAG_NAME => 'tag name', self::ST_TAG_CLOSE => 'closing tag', self::ST_TAG_SINGLE => 'self-closing tag', self::ST_TAG_ATTRIBUTES => 'tag', self::ST_TAG_BETWEEN_ATTRIBUTE => 'tag attributes', self::ST_CDATA => 'CDATA', self::ST_COMMENT => 'comment', self::ST_DOCTYPE => 'doctype', self::ST_XMLDEC => 'XML declaration', self::ST_PREPROC => 'preprocessor directive', self::ST_ATTR_KEY => 'attribute name', self::ST_ATTR_EQ => 'attribute value', self::ST_ATTR_QUOTE => 'quoted attribute value', self::ST_ATTR_VALUE => 'unquoted attribute value', ); private $input_encoding; public function __construct($input_encoding) { $this->input_encoding = $input_encoding; $this->_file = "<string>"; } public function parseFile(PHPTAL_Dom_DocumentBuilder $builder, $src) { if (!file_exists($src)) { throw new PHPTAL_IOException("file $src not found"); } return $this->parseString($builder, file_get_contents($src), $src); } public function parseString(PHPTAL_Dom_DocumentBuilder $builder, $src, $filename = '<string>') { try { $builder->setEncoding($this->input_encoding); $this->_file = $filename; $this->_line = 1; $state = self::ST_ROOT; $mark = 0; $len = strlen($src); $quoteStyle = '"'; $tagname = ""; $attribute = ""; $attributes = array(); $customDoctype = false; $builder->setSource($this->_file, $this->_line); $builder->onDocumentStart(); $i=0; // remove BOM (UTF-8 byte order mark)... if (substr($src, 0, 3) === self::BOM_STR) { $i=3; } for (; $i<$len; $i++) { $c = $src[$i]; // Change to substr($src, $i, 1); if you want to use mb_string.func_overload if ($c === "\n") $builder->setSource($this->_file, ++$this->_line); switch ($state) { case self::ST_ROOT: if ($c === '<') { $mark = $i; // mark tag start $state = self::ST_LT; } elseif (!self::isWhiteChar($c)) { $this->raiseError("Characters found before beginning of the document! (wrap document in < tal:block > to avoid this error)"); } break; case self::ST_TEXT: if ($c === '<') { if ($mark != $i) { $builder->onElementData($this->sanitizeEscapedText($this->checkEncoding(substr($src, $mark, $i-$mark)))); } $mark = $i; $state = self::ST_LT; } break; case self::ST_LT: if ($c === '/') { $mark = $i+1; $state = self::ST_TAG_CLOSE; } elseif ($c === '?' and strtolower(substr($src, $i, 5)) === '?xml ') { $state = self::ST_XMLDEC; } elseif ($c === '?') { $state = self::ST_PREPROC; } elseif ($c === '!' and substr($src, $i, 3) === '!--') { $state = self::ST_COMMENT; } elseif ($c === '!' and substr($src, $i, 8) === '![CDATA[') { $state = self::ST_CDATA; $mark = $i+8; // past opening tag } elseif ($c === '!' and strtoupper(substr($src, $i, 8)) === '!DOCTYPE') { $state = self::ST_DOCTYPE; } elseif (self::isWhiteChar($c)) { $state = self::ST_TEXT; } else { $mark = $i; // mark node name start $attributes = array(); $attribute = ""; $state = self::ST_TAG_NAME; } break; case self::ST_TAG_NAME: if (self::isWhiteChar($c) || $c === '/' || $c === '>') { $tagname = substr($src, $mark, $i-$mark); if (!$this->isValidQName($tagname)) $this->raiseError("Invalid tag name '$tagname'"); if ($c === '/') { $state = self::ST_TAG_SINGLE; } elseif ($c === '>') { $mark = $i+1; // mark text start $state = self::ST_TEXT; $builder->onElementStart($tagname, $attributes); } else /* isWhiteChar */ { $state = self::ST_TAG_ATTRIBUTES; } } break; case self::ST_TAG_CLOSE: if ($c === '>') { $tagname = rtrim(substr($src, $mark, $i-$mark)); $builder->onElementClose($tagname); $mark = $i+1; // mark text start $state = self::ST_TEXT; } break; case self::ST_TAG_SINGLE: if ($c !== '>') { $this->raiseError("Expected '/>', but found '/$c' inside tag < $tagname >"); } $mark = $i+1; // mark text start $state = self::ST_TEXT; $builder->onElementStart($tagname, $attributes); $builder->onElementClose($tagname); break; case self::ST_TAG_BETWEEN_ATTRIBUTE: case self::ST_TAG_ATTRIBUTES: if ($c === '>') { $mark = $i+1; // mark text start $state = self::ST_TEXT; $builder->onElementStart($tagname, $attributes); } elseif ($c === '/') { $state = self::ST_TAG_SINGLE; } elseif (self::isWhiteChar($c)) { $state = self::ST_TAG_ATTRIBUTES; } elseif ($state === self::ST_TAG_ATTRIBUTES && $this->isValidQName($c)) { $mark = $i; // mark attribute key start $state = self::ST_ATTR_KEY; } else $this->raiseError("Unexpected character '$c' between attributes of < $tagname >"); break; case self::ST_COMMENT: if ($c === '>' && $i > $mark+4 && substr($src, $i-2, 2) === '--') { if (preg_match('/^-|--|-$/', substr($src, $mark +4, $i-$mark+1 -7))) { $this->raiseError("Ill-formed comment. XML comments are not allowed to contain '--' or start/end with '-': ".substr($src, $mark+4, $i-$mark+1-7)); } $builder->onComment($this->checkEncoding(substr($src, $mark+4, $i-$mark+1-7))); $mark = $i+1; // mark text start $state = self::ST_TEXT; } break; case self::ST_CDATA: if ($c === '>' and substr($src, $i-2, 2) === ']]') { $builder->onCDATASection($this->checkEncoding(substr($src, $mark, $i-$mark-2))); $mark = $i+1; // mark text start $state = self::ST_TEXT; } break; case self::ST_XMLDEC: if ($c === '?' && substr($src, $i, 2) === '?>') { $builder->onXmlDecl($this->checkEncoding(substr($src, $mark, $i-$mark+2))); $i++; // skip '>' $mark = $i+1; // mark text start $state = self::ST_TEXT; } break; case self::ST_DOCTYPE: if ($c === '[') { $customDoctype = true; } elseif ($customDoctype && $c === '>' && substr($src, $i-1, 2) === ']>') { $customDoctype = false; $builder->onDocType($this->checkEncoding(substr($src, $mark, $i-$mark+1))); $mark = $i+1; // mark text start $state = self::ST_TEXT; } elseif (!$customDoctype && $c === '>') { $customDoctype = false; $builder->onDocType($this->checkEncoding(substr($src, $mark, $i-$mark+1))); $mark = $i+1; // mark text start $state = self::ST_TEXT; } break; case self::ST_PREPROC: if ($c === '>' and substr($src, $i-1, 1) === '?') { $builder->onProcessingInstruction($this->checkEncoding(substr($src, $mark, $i-$mark+1))); $mark = $i+1; // mark text start $state = self::ST_TEXT; } break; case self::ST_ATTR_KEY: if ($c === '=' || self::isWhiteChar($c)) { $attribute = substr($src, $mark, $i-$mark); if (!$this->isValidQName($attribute)) { $this->raiseError("Invalid attribute name '$attribute' in < $tagname >"); } if (isset($attributes[$attribute])) { $this->raiseError("Attribute $attribute in < $tagname > is defined more than once"); } if ($c === '=') $state = self::ST_ATTR_VALUE; else /* white char */ $state = self::ST_ATTR_EQ; } elseif ($c === '/' || $c==='>') { $attribute = substr($src, $mark, $i-$mark); if (!$this->isValidQName($attribute)) { $this->raiseError("Invalid attribute name '$attribute'"); } $this->raiseError("Attribute $attribute does not have value (found end of tag instead of '=')"); } break; case self::ST_ATTR_EQ: if ($c === '=') { $state = self::ST_ATTR_VALUE; } elseif (!self::isWhiteChar($c)) { $this->raiseError("Attribute $attribute in < $tagname > does not have value (found character '$c' instead of '=')"); } break; case self::ST_ATTR_VALUE: if (self::isWhiteChar($c)) { } elseif ($c === '"' or $c === '\'') { $quoteStyle = $c; $state = self::ST_ATTR_QUOTE; $mark = $i+1; // mark attribute real value start } else { $this->raiseError("Value of attribute $attribute in < $tagname > is not in quotes (found character '$c' instead of quote)"); } break; case self::ST_ATTR_QUOTE: if ($c === $quoteStyle) { $attributes[$attribute] = $this->sanitizeEscapedText($this->checkEncoding(substr($src, $mark, $i-$mark))); // PHPTAL's code generator assumes input is escaped for double-quoted strings. Single-quoted attributes need to be converted. // FIXME: it should be escaped at later stage. $attributes[$attribute] = str_replace('"',""", $attributes[$attribute]); $state = self::ST_TAG_BETWEEN_ATTRIBUTE; } break; } } if ($state === self::ST_TEXT) // allows text past root node, which is in violation of XML spec { if ($i > $mark) { $text = substr($src, $mark, $i-$mark); if (!ctype_space($text)) $this->raiseError("Characters found after end of the root element (wrap document in < tal:block > to avoid this error)"); } } else { if ($state === self::ST_ROOT) { $msg = "Document does not have any tags"; } else { $msg = "Finished document in unexpected state: ".self::$state_names[$state]." is not finished"; } $this->raiseError($msg); } $builder->onDocumentEnd(); } catch(PHPTAL_TemplateException $e) { $e->hintSrcPosition($this->_file, $this->_line); throw $e; } return $builder; } private function isValidQName($name) { $name = $this->checkEncoding($name); return preg_match('/^([a-z_\x80-\xff]+[a-z0-9._\x80-\xff-]*:)?[a-z_\x80-\xff]+[a-z0-9._\x80-\xff-]*$/i', $name); } private function checkEncoding($str) { if ($str === '') return ''; if ($this->input_encoding === 'UTF-8') { // $match expression below somehow triggers quite deep recurrency and stack overflow in preg // to avoid this, check string bit by bit, omitting ASCII fragments. if (strlen($str) > 200) { $chunks = preg_split('/(?>[\x09\x0A\x0D\x20-\x7F]+)/',$str,null,PREG_SPLIT_NO_EMPTY); foreach ($chunks as $chunk) { if (strlen($chunk) < 200) { $this->checkEncoding($chunk); } } return $str; } // http://www.w3.org/International/questions/qa-forms-utf-8 $match = '[\x09\x0A\x0D\x20-\x7F]' // ASCII . '|[\xC2-\xDF][\x80-\xBF]' // non-overlong 2-byte . '|\xE0[\xA0-\xBF][\x80-\xBF]' // excluding overlongs . '|[\xE1-\xEC\xEE\xEE][\x80-\xBF]{2}' // straight 3-byte (exclude FFFE and FFFF) . '|\xEF[\x80-\xBE][\x80-\xBF]' // straight 3-byte . '|\xEF\xBF[\x80-\xBD]' // straight 3-byte . '|\xED[\x80-\x9F][\x80-\xBF]' // excluding surrogates . '|\xF0[\x90-\xBF][\x80-\xBF]{2}' // planes 1-3 . '|[\xF1-\xF3][\x80-\xBF]{3}' // planes 4-15 . '|\xF4[\x80-\x8F][\x80-\xBF]{2}'; // plane 16 if (!preg_match('/^(?:(?>'.$match.'))+$/s',$str)) { $res = preg_split('/((?>'.$match.')+)/s',$str,null,PREG_SPLIT_DELIM_CAPTURE); for($i=0; $i < count($res); $i+=2) { $res[$i] = self::convertBytesToEntities(array(1=>$res[$i])); } $this->raiseError("Invalid UTF-8 bytes: ".implode('', $res)); } } if ($this->input_encoding === 'ISO-8859-1') { // http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-RestrictedChar $forbid = '/((?>[\x00-\x08\x0B\x0C\x0E-\x1F\x7F-\x84\x86-\x9F]+))/s'; if (preg_match($forbid, $str)) { $str = preg_replace_callback($forbid, array('self', 'convertBytesToEntities'), $str); $this->raiseError("Invalid ISO-8859-1 characters: ".$str); } } return $str; } /** * preg callback * Changes all bytes to hexadecimal XML entities * * @param array $m first array element is used for input * * @return string */ private static function convertBytesToEntities(array $m) { $m = $m[1]; $out = ''; for($i=0; $i < strlen($m); $i++) { $out .= '&#X'.strtoupper(dechex(ord($m[$i]))).';'; } return $out; } /** * This is where this parser violates XML and refuses to be an annoying bastard. */ private function sanitizeEscapedText($str) { $str = str_replace(''', ''', $str); // PHP's html_entity_decode doesn't seem to support that! /* <?php ?> blocks can't reliably work in attributes (due to escaping impossible in XML) so they have to be converted into special TALES expression */ $types = version_compare(PHP_VERSION, '5.4.0') < 0 ? (ini_get('short_open_tag') ? 'php|=|' : 'php') : 'php|='; $str = preg_replace_callback("/<\?($types)(.*?)\?>/", array('self', 'convertPHPBlockToTALES'), $str); // corrects all non-entities and neutralizes potentially problematic CDATA end marker $str = strtr(preg_replace('/&(?!(?:#x?[a-f0-9]+|[a-z][a-z0-9]*);)/i', '&', $str), array('<'=>'<', ']]>'=>']]>')); return $str; } private static function convertPHPBlockToTALES($m) { list(, $type, $code) = $m; if ($type === '=') $code = 'echo '.$code; return '${structure phptal-internal-php-block:'.rawurlencode($code).'}'; } public function getSourceFile() { return $this->_file; } public function getLineNumber() { return $this->_line; } public static function isWhiteChar($c) { return strpos(" \t\n\r\0", $c) !== false; } protected function raiseError($errStr) { throw new PHPTAL_ParserException($errStr, $this->_file, $this->_line); } }
비교하기