[geeklog-hg] geeklog: Replaced kses with htmLawed (bug #0001084, bug #0001014)

geeklog-cvs at lists.geeklog.net geeklog-cvs at lists.geeklog.net
Thu Apr 4 09:00:16 EDT 2013


changeset 9035:2bc8e6300729
url:  http://project.geeklog.net/cgi-bin/hgwebdir.cgi/geeklog/rev/2bc8e6300729
user: Kenji ITO <mystralkk at gmail.com>
date: Thu Apr 04 21:56:26 2013 +0900
description:
Replaced kses with htmLawed (bug #0001084, bug #0001014)

diffstat:

 public_html/lib-common.php                  |    67 +-
 system/classes/htmlawed/htmLawed.php        |   723 +++++++++++
 system/classes/htmlawed/htmLawed_README.txt |  1730 +++++++++++++++++++++++++++
 system/classes/kses.class.php               |  1160 ------------------
 4 files changed, 2493 insertions(+), 1187 deletions(-)

diffs (truncated from 3728 to 300 lines):

diff -r 4e6d66840fd7 -r 2bc8e6300729 public_html/lib-common.php
--- a/public_html/lib-common.php	Wed Apr 03 15:19:34 2013 -0400
+++ b/public_html/lib-common.php	Thu Apr 04 21:56:26 2013 +0900
@@ -278,14 +278,6 @@
     $_USER['advanced_editor'] = $_CONF['advanced_editor'];
 }
 
-
-/**
-* Ulf Harnhammar's kses class
-*
-*/
-
-require_once( $_CONF['path_system'] . 'classes/kses.class.php' );
-
 /**
 * Multibyte functions
 *
@@ -3768,7 +3760,7 @@
     $str = str_replace('\\', '\', COM_stripslashes($str) );
 
     // Get rid of any newline characters
-    $str = preg_replace( "/\n/", '', $str );
+    $str = str_replace("\n", '', $str);
 
     // Replace any $ with $ (HTML equiv)
     $str = str_replace( '$', '$', $str );
@@ -3842,19 +3834,31 @@
         return $str;
     }
 
-    // strip_tags() gets confused by HTML comments ...
-    $str = preg_replace( '/<!--.+?-->/', '', $str );
-
-    $filter = new kses4;
-    if( isset( $_CONF['allowed_protocols'] ) && is_array( $_CONF['allowed_protocols'] ) && ( count( $_CONF['allowed_protocols'] ) > 0 ))
-    {
-        $filter->SetProtocols( $_CONF['allowed_protocols'] );
-    }
-    else
-    {
-        $filter->SetProtocols( array( 'http:', 'https:', 'ftp:' ));
-    }
-
+    require_once $_CONF['path_system'] . 'classes/htmlawed/htmLawed.php';
+
+    // Sets config options for htmLawed.  See http://www.bioinformatics.org/
+    // phplabware/internal_utilities/htmLawed/htmLawed_README.htm
+    $config = array(
+        'balance'        => 1, // Balance tags for well-formedness and proper nesting
+        'comment'        => 3, // Allow HTML comment
+        'css_expression' => 1, // Allow dynamic CSS expression in "style" attributes
+        'keep_bad'       => 1, // Neutralize both tags and element content
+        'tidy'           => 0, // Don't beautify or compact HTML code
+        'unique_ids'     => 1, // Remove duplicate and/or invalid ids
+        'valid_xhtml'    => 1, // Magic parameter to make input the most valid XHTML
+    );
+    
+    if (isset($_CONF['allowed_protocols']) &&
+            is_array($_CONF['allowed_protocols']) &&
+            (count($_CONF['allowed_protocols']) > 0)) {
+        $schemes = $_CONF['allowed_protocols'];
+    } else {
+        $schemes = array('http:', 'https:', 'ftp:');
+    }
+    
+    $schemes = str_replace(':', '', implode(', ', $schemes));
+    $config['schemes'] = 'href: ' . $schemes . '; *: ' . $schemes;
+    
     if( empty( $permissions) || !SEC_hasRights( $permissions ) ||
             empty( $_CONF['admin_html'] ))
     {
@@ -3872,14 +3876,23 @@
         }
     }
 
-    foreach( $html as $tag => $attr )
-    {
-        $filter->AddHTML( $tag, $attr );
-    }
+    foreach ($html as $tag => $attr) {
+        if (is_array($attr) && (count($attr) > 0)) {
+            $spec[] = $tag . '=' . implode(', ', array_keys($attr));
+        } else {
+            $spec[] = $tag . '=-*';
+        }
+
+        $elements[] = $tag;
+    }
+
+    $config['elements'] = implode(', ', $elements);
+    $spec = implode('; ', $spec);
+
     /* Replace [raw][/raw] with <!--raw--><!--/raw-->, note done "late" because
      * of the above noted // strip_tags() gets confused by HTML comments ...
      */
-    $str = $filter->Parse( $str );
+    $str = htmLawed($str, $config, $spec);
     $str = str_replace('[raw2]','<!--raw--><span class="raw">', $str);
     $str = str_replace('[/raw2]','</span><!--/raw-->', $str);
 
diff -r 4e6d66840fd7 -r 2bc8e6300729 system/classes/htmlawed/htmLawed.php
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/system/classes/htmlawed/htmLawed.php	Thu Apr 04 21:56:26 2013 +0900
@@ -0,0 +1,723 @@
+<?php
+
+/*
+htmLawed 1.1.14, 8 August 2012
+Copyright Santosh Patnaik
+Dual licensed with LGPL 3 and GPL 2+
+A PHP Labware internal utility; www.bioinformatics.org/phplabware/internal_utilities/htmLawed
+
+See htmLawed_README.txt/htm
+*/
+
+function htmLawed($t, $C=1, $S=array()){
+$C = is_array($C) ? $C : array();
+if(!empty($C['valid_xhtml'])){
+ $C['elements'] = empty($C['elements']) ? '*-center-dir-font-isindex-menu-s-strike-u' : $C['elements'];
+ $C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 2;
+ $C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 2;
+}
+// config eles
+$e = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'applet'=>1, 'area'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'blockquote'=>1, 'br'=>1, 'button'=>1, 'caption'=>1, 'center'=>1, 'cite'=>1, 'code'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'del'=>1, 'dfn'=>1, 'dir'=>1, 'div'=>1, 'dl'=>1, 'dt'=>1, 'em'=>1, 'embed'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'isindex'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'li'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'object'=>1, 'ol'=>1, 'optgroup'=>1, 'option'=>1, 'p'=>1, 'param'=>1, 'pre'=>1, 'q'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'table'=>1, 'tbody'=>1, 'td'=>1, 'textarea'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1, 'tt'=>1, 'u'=>1, 'ul'=>1, 'var'=>1); // 86/deprecated+embed+ruby
+if(!empty($C['safe'])){
+ unset($e['applet'], $e['embed'], $e['iframe'], $e['object'], $e['script']);
+}
+$x = !empty($C['elements']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['elements']) : '*';
+if($x == '-*'){$e = array();}
+elseif(strpos($x, '*') === false){$e = array_flip(explode(',', $x));}
+else{
+ if(isset($x[1])){
+  preg_match_all('`(?:^|-|\+)[^\-+]+?(?=-|\+|$)`', $x, $m, PREG_SET_ORDER);
+  for($i=count($m); --$i>=0;){$m[$i] = $m[$i][0];}
+  foreach($m as $v){
+   if($v[0] == '+'){$e[substr($v, 1)] = 1;}
+   if($v[0] == '-' && isset($e[($v = substr($v, 1))]) && !in_array('+'. $v, $m)){unset($e[$v]);}
+  }
+ }
+}
+$C['elements'] =& $e;
+// config attrs
+$x = !empty($C['deny_attribute']) ? str_replace(array("\n", "\r", "\t", ' '), '', $C['deny_attribute']) : '';
+$x = array_flip((isset($x[0]) && $x[0] == '*') ? explode('-', $x) : explode(',', $x. (!empty($C['safe']) ? ',on*' : '')));
+if(isset($x['on*'])){
+ unset($x['on*']);
+ $x += array('onblur'=>1, 'onchange'=>1, 'onclick'=>1, 'ondblclick'=>1, 'onfocus'=>1, 'onkeydown'=>1, 'onkeypress'=>1, 'onkeyup'=>1, 'onmousedown'=>1, 'onmousemove'=>1, 'onmouseout'=>1, 'onmouseover'=>1, 'onmouseup'=>1, 'onreset'=>1, 'onselect'=>1, 'onsubmit'=>1);
+}
+$C['deny_attribute'] = $x;
+// config URL
+$x = (isset($C['schemes'][2]) && strpos($C['schemes'], ':')) ? strtolower($C['schemes']) : 'href: aim, feed, file, ftp, gopher, http, https, irc, mailto, news, nntp, sftp, ssh, telnet; *:file, http, https';
+$C['schemes'] = array();
+foreach(explode(';', str_replace(array(' ', "\t", "\r", "\n"), '', $x)) as $v){
+ $x = $x2 = null; list($x, $x2) = explode(':', $v, 2);
+ if($x2){$C['schemes'][$x] = array_flip(explode(',', $x2));}
+}
+if(!isset($C['schemes']['*'])){$C['schemes']['*'] = array('file'=>1, 'http'=>1, 'https'=>1,);}
+if(!empty($C['safe']) && empty($C['schemes']['style'])){$C['schemes']['style'] = array('!'=>1);}
+$C['abs_url'] = isset($C['abs_url']) ? $C['abs_url'] : 0;
+if(!isset($C['base_url']) or !preg_match('`^[a-zA-Z\d.+\-]+://[^/]+/(.+?/)?$`', $C['base_url'])){
+ $C['base_url'] = $C['abs_url'] = 0;
+}
+// config rest
+$C['and_mark'] = empty($C['and_mark']) ? 0 : 1;
+$C['anti_link_spam'] = (isset($C['anti_link_spam']) && is_array($C['anti_link_spam']) && count($C['anti_link_spam']) == 2 && (empty($C['anti_link_spam'][0]) or hl_regex($C['anti_link_spam'][0])) && (empty($C['anti_link_spam'][1]) or hl_regex($C['anti_link_spam'][1]))) ? $C['anti_link_spam'] : 0;
+$C['anti_mail_spam'] = isset($C['anti_mail_spam']) ? $C['anti_mail_spam'] : 0;
+$C['balance'] = isset($C['balance']) ? (bool)$C['balance'] : 1;
+$C['cdata'] = isset($C['cdata']) ? $C['cdata'] : (empty($C['safe']) ? 3 : 0);
+$C['clean_ms_char'] = empty($C['clean_ms_char']) ? 0 : $C['clean_ms_char'];
+$C['comment'] = isset($C['comment']) ? $C['comment'] : (empty($C['safe']) ? 3 : 0);
+$C['css_expression'] = empty($C['css_expression']) ? 0 : 1;
+$C['direct_list_nest'] = empty($C['direct_list_nest']) ? 0 : 1;
+$C['hexdec_entity'] = isset($C['hexdec_entity']) ? $C['hexdec_entity'] : 1;
+$C['hook'] = (!empty($C['hook']) && function_exists($C['hook'])) ? $C['hook'] : 0;
+$C['hook_tag'] = (!empty($C['hook_tag']) && function_exists($C['hook_tag'])) ? $C['hook_tag'] : 0;
+$C['keep_bad'] = isset($C['keep_bad']) ? $C['keep_bad'] : 6;
+$C['lc_std_val'] = isset($C['lc_std_val']) ? (bool)$C['lc_std_val'] : 1;
+$C['make_tag_strict'] = isset($C['make_tag_strict']) ? $C['make_tag_strict'] : 1;
+$C['named_entity'] = isset($C['named_entity']) ? (bool)$C['named_entity'] : 1;
+$C['no_deprecated_attr'] = isset($C['no_deprecated_attr']) ? $C['no_deprecated_attr'] : 1;
+$C['parent'] = isset($C['parent'][0]) ? strtolower($C['parent']) : 'body';
+$C['show_setting'] = !empty($C['show_setting']) ? $C['show_setting'] : 0;
+$C['style_pass'] = empty($C['style_pass']) ? 0 : 1;
+$C['tidy'] = empty($C['tidy']) ? 0 : $C['tidy'];
+$C['unique_ids'] = isset($C['unique_ids']) ? $C['unique_ids'] : 1;
+$C['xml:lang'] = isset($C['xml:lang']) ? $C['xml:lang'] : 0;
+
+if(isset($GLOBALS['C'])){$reC = $GLOBALS['C'];}
+$GLOBALS['C'] = $C;
+$S = is_array($S) ? $S : hl_spec($S);
+if(isset($GLOBALS['S'])){$reS = $GLOBALS['S'];}
+$GLOBALS['S'] = $S;
+
+$t = preg_replace('`[\x00-\x08\x0b-\x0c\x0e-\x1f]`', '', $t);
+if($C['clean_ms_char']){
+ $x = array("\x7f"=>'', "\x80"=>'€', "\x81"=>'', "\x83"=>'ƒ', "\x85"=>'…', "\x86"=>'†', "\x87"=>'‡', "\x88"=>'ˆ', "\x89"=>'‰', "\x8a"=>'Š', "\x8b"=>'‹', "\x8c"=>'Œ', "\x8d"=>'', "\x8e"=>'Ž', "\x8f"=>'', "\x90"=>'', "\x95"=>'•', "\x96"=>'–', "\x97"=>'—', "\x98"=>'˜', "\x99"=>'™', "\x9a"=>'š', "\x9b"=>'›', "\x9c"=>'œ', "\x9d"=>'', "\x9e"=>'ž', "\x9f"=>'Ÿ');
+ $x = $x + ($C['clean_ms_char'] == 1 ? array("\x82"=>'‚', "\x84"=>'„', "\x91"=>'‘', "\x92"=>'’', "\x93"=>'“', "\x94"=>'”') : array("\x82"=>'\'', "\x84"=>'"', "\x91"=>'\'', "\x92"=>'\'', "\x93"=>'"', "\x94"=>'"'));
+ $t = strtr($t, $x);
+}
+if($C['cdata'] or $C['comment']){$t = preg_replace_callback('`<!(?:(?:--.*?--)|(?:\[CDATA\[.*?\]\]))>`sm', 'hl_cmtcd', $t);}
+$t = preg_replace_callback('`&([A-Za-z][A-Za-z0-9]{1,30}|#(?:[0-9]{1,8}|[Xx][0-9A-Fa-f]{1,7}));`', 'hl_ent', str_replace('&', '&', $t));
+if($C['unique_ids'] && !isset($GLOBALS['hl_Ids'])){$GLOBALS['hl_Ids'] = array();}
+if($C['hook']){$t = $C['hook']($t, $C, $S);}
+if($C['show_setting'] && preg_match('`^[a-z][a-z0-9_]*$`i', $C['show_setting'])){
+ $GLOBALS[$C['show_setting']] = array('config'=>$C, 'spec'=>$S, 'time'=>microtime());
+}
+// main
+$t = preg_replace_callback('`<(?:(?:\s|$)|(?:[^>]*(?:>|$)))|>`m', 'hl_tag', $t);
+$t = $C['balance'] ? hl_bal($t, $C['keep_bad'], $C['parent']) : $t;
+$t = (($C['cdata'] or $C['comment']) && strpos($t, "\x01") !== false) ? str_replace(array("\x01", "\x02", "\x03", "\x04", "\x05"), array('', '', '&', '<', '>'), $t) : $t;
+$t = $C['tidy'] ? hl_tidy($t, $C['tidy'], $C['parent']) : $t;
+unset($C, $e);
+if(isset($reC)){$GLOBALS['C'] = $reC;}
+if(isset($reS)){$GLOBALS['S'] = $reS;}
+return $t;
+// eof
+}
+
+function hl_attrval($t, $p){
+// check attr val against $S
+$o = 1; $l = strlen($t);
+foreach($p as $k=>$v){
+ switch($k){
+  case 'maxlen':if($l > $v){$o = 0;}
+  break; case 'minlen': if($l < $v){$o = 0;}
+  break; case 'maxval': if((float)($t) > $v){$o = 0;}
+  break; case 'minval': if((float)($t) < $v){$o = 0;}
+  break; case 'match': if(!preg_match($v, $t)){$o = 0;}
+  break; case 'nomatch': if(preg_match($v, $t)){$o = 0;}
+  break; case 'oneof':
+   $m = 0;
+   foreach(explode('|', $v) as $n){if($t == $n){$m = 1; break;}}
+   $o = $m;
+  break; case 'noneof':
+   $m = 1;
+   foreach(explode('|', $v) as $n){if($t == $n){$m = 0; break;}}
+   $o = $m;
+  break; default:
+  break;
+ }
+ if(!$o){break;}
+}
+return ($o ? $t : (isset($p['default']) ? $p['default'] : 0));
+// eof
+}
+
+function hl_bal($t, $do=1, $in='div'){
+// balance tags
+// by content
+$cB = array('blockquote'=>1, 'form'=>1, 'map'=>1, 'noscript'=>1); // Block
+$cE = array('area'=>1, 'br'=>1, 'col'=>1, 'embed'=>1, 'hr'=>1, 'img'=>1, 'input'=>1, 'isindex'=>1, 'param'=>1); // Empty
+$cF = array('button'=>1, 'del'=>1, 'div'=>1, 'dd'=>1, 'fieldset'=>1, 'iframe'=>1, 'ins'=>1, 'li'=>1, 'noscript'=>1, 'object'=>1, 'td'=>1, 'th'=>1); // Flow; later context-wise dynamic move of ins & del to $cI
+$cI = array('a'=>1, 'abbr'=>1, 'acronym'=>1, 'address'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'caption'=>1, 'cite'=>1, 'code'=>1, 'dfn'=>1, 'dt'=>1, 'em'=>1, 'font'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'i'=>1, 'kbd'=>1, 'label'=>1, 'legend'=>1, 'p'=>1, 'pre'=>1, 'q'=>1, 'rb'=>1, 'rt'=>1, 's'=>1, 'samp'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'tt'=>1, 'u'=>1, 'var'=>1); // Inline
+$cN = array('a'=>array('a'=>1), 'button'=>array('a'=>1, 'button'=>1, 'fieldset'=>1, 'form'=>1, 'iframe'=>1, 'input'=>1, 'label'=>1, 'select'=>1, 'textarea'=>1), 'fieldset'=>array('fieldset'=>1), 'form'=>array('form'=>1), 'label'=>array('label'=>1), 'noscript'=>array('script'=>1), 'pre'=>array('big'=>1, 'font'=>1, 'img'=>1, 'object'=>1, 'script'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1), 'rb'=>array('ruby'=>1), 'rt'=>array('ruby'=>1)); // Illegal
+$cN2 = array_keys($cN);
+$cR = array('blockquote'=>1, 'dir'=>1, 'dl'=>1, 'form'=>1, 'map'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'optgroup'=>1, 'rbc'=>1, 'rtc'=>1, 'ruby'=>1, 'select'=>1, 'table'=>1, 'tbody'=>1, 'tfoot'=>1, 'thead'=>1, 'tr'=>1, 'ul'=>1);
+$cS = array('colgroup'=>array('col'=>1), 'dir'=>array('li'=>1), 'dl'=>array('dd'=>1, 'dt'=>1), 'menu'=>array('li'=>1), 'ol'=>array('li'=>1), 'optgroup'=>array('option'=>1), 'option'=>array('#pcdata'=>1), 'rbc'=>array('rb'=>1), 'rp'=>array('#pcdata'=>1), 'rtc'=>array('rt'=>1), 'ruby'=>array('rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1), 'select'=>array('optgroup'=>1, 'option'=>1), 'script'=>array('#pcdata'=>1), 'table'=>array('caption'=>1, 'col'=>1, 'colgroup'=>1, 'tfoot'=>1, 'tbody'=>1, 'tr'=>1, 'thead'=>1), 'tbody'=>array('tr'=>1), 'tfoot'=>array('tr'=>1), 'textarea'=>array('#pcdata'=>1), 'thead'=>array('tr'=>1), 'tr'=>array('td'=>1, 'th'=>1), 'ul'=>array('li'=>1)); // Specific - immediate parent-child
+if($GLOBALS['C']['direct_list_nest']){$cS['ol'] = $cS['ul'] += array('ol'=>1, 'ul'=>1);}
+$cO = array('address'=>array('p'=>1), 'applet'=>array('param'=>1), 'blockquote'=>array('script'=>1), 'fieldset'=>array('legend'=>1, '#pcdata'=>1), 'form'=>array('script'=>1), 'map'=>array('area'=>1), 'object'=>array('param'=>1, 'embed'=>1)); // Other
+$cT = array('colgroup'=>1, 'dd'=>1, 'dt'=>1, 'li'=>1, 'option'=>1, 'p'=>1, 'td'=>1, 'tfoot'=>1, 'th'=>1, 'thead'=>1, 'tr'=>1); // Omitable closing
+// block/inline type; ins & del both type; #pcdata: text
+$eB = array('address'=>1, 'blockquote'=>1, 'center'=>1, 'del'=>1, 'dir'=>1, 'dl'=>1, 'div'=>1, 'fieldset'=>1, 'form'=>1, 'ins'=>1, 'h1'=>1, 'h2'=>1, 'h3'=>1, 'h4'=>1, 'h5'=>1, 'h6'=>1, 'hr'=>1, 'isindex'=>1, 'menu'=>1, 'noscript'=>1, 'ol'=>1, 'p'=>1, 'pre'=>1, 'table'=>1, 'ul'=>1);
+$eI = array('#pcdata'=>1, 'a'=>1, 'abbr'=>1, 'acronym'=>1, 'applet'=>1, 'b'=>1, 'bdo'=>1, 'big'=>1, 'br'=>1, 'button'=>1, 'cite'=>1, 'code'=>1, 'del'=>1, 'dfn'=>1, 'em'=>1, 'embed'=>1, 'font'=>1, 'i'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'ins'=>1, 'kbd'=>1, 'label'=>1, 'map'=>1, 'object'=>1, 'q'=>1, 'ruby'=>1, 's'=>1, 'samp'=>1, 'select'=>1, 'script'=>1, 'small'=>1, 'span'=>1, 'strike'=>1, 'strong'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1, 'tt'=>1, 'u'=>1, 'var'=>1);
+$eN = array('a'=>1, 'big'=>1, 'button'=>1, 'fieldset'=>1, 'font'=>1, 'form'=>1, 'iframe'=>1, 'img'=>1, 'input'=>1, 'label'=>1, 'object'=>1, 'ruby'=>1, 'script'=>1, 'select'=>1, 'small'=>1, 'sub'=>1, 'sup'=>1, 'textarea'=>1); // Exclude from specific ele; $cN values
+$eO = array('area'=>1, 'caption'=>1, 'col'=>1, 'colgroup'=>1, 'dd'=>1, 'dt'=>1, 'legend'=>1, 'li'=>1, 'optgroup'=>1, 'option'=>1, 'param'=>1, 'rb'=>1, 'rbc'=>1, 'rp'=>1, 'rt'=>1, 'rtc'=>1, 'script'=>1, 'tbody'=>1, 'td'=>1, 'tfoot'=>1, 'thead'=>1, 'th'=>1, 'tr'=>1); // Missing in $eB & $eI
+$eF = $eB + $eI;
+
+// $in sets allowed child
+$in = ((isset($eF[$in]) && $in != '#pcdata') or isset($eO[$in])) ? $in : 'div';
+if(isset($cE[$in])){
+ return (!$do ? '' : str_replace(array('<', '>'), array('<', '>'), $t));
+}
+if(isset($cS[$in])){$inOk = $cS[$in];}
+elseif(isset($cI[$in])){$inOk = $eI; $cI['del'] = 1; $cI['ins'] = 1;}
+elseif(isset($cF[$in])){$inOk = $eF; unset($cI['del'], $cI['ins']);}
+elseif(isset($cB[$in])){$inOk = $eB; unset($cI['del'], $cI['ins']);}
+if(isset($cO[$in])){$inOk = $inOk + $cO[$in];}
+if(isset($cN[$in])){$inOk = array_diff_assoc($inOk, $cN[$in]);}
+
+$t = explode('<', $t);
+$ok = $q = array(); // $q seq list of open non-empty ele
+ob_start();
+
+for($i=-1, $ci=count($t); ++$i<$ci;){
+ // allowed $ok in parent $p
+ if($ql = count($q)){
+  $p = array_pop($q);
+  $q[] = $p;
+  if(isset($cS[$p])){$ok = $cS[$p];}
+  elseif(isset($cI[$p])){$ok = $eI; $cI['del'] = 1; $cI['ins'] = 1;}
+  elseif(isset($cF[$p])){$ok = $eF; unset($cI['del'], $cI['ins']);}
+  elseif(isset($cB[$p])){$ok = $eB; unset($cI['del'], $cI['ins']);}
+  if(isset($cO[$p])){$ok = $ok + $cO[$p];}
+  if(isset($cN[$p])){$ok = array_diff_assoc($ok, $cN[$p]);}
+ }else{$ok = $inOk; unset($cI['del'], $cI['ins']);}
+ // bad tags, & ele content
+ if(isset($e) && ($do == 1 or (isset($ok['#pcdata']) && ($do == 3 or $do == 5)))){
+  echo '<', $s, $e, $a, '>';
+ }



More information about the geeklog-cvs mailing list