[geeklog-cvs] geeklog: New function COM_getTextContent turns HTML into continu...

geeklog-cvs at lists.geeklog.net geeklog-cvs at lists.geeklog.net
Sat Jul 25 15:49:06 EDT 2009


details:   http://project.geeklog.net/cgi-bin/hgweb.cgi/rev/dd235920fb85
changeset: 7186:dd235920fb85
user:      Dirk Haun <dirk at haun-online.de>
date:      Sat Jul 25 19:38:57 2009 +0200
description:
New function COM_getTextContent turns HTML into continuous text, e.g. for word counts and text excerpts

diffstat:

 public_html/lib-common.php      |  34 ++++++++++++++++++++++++++++++++++
 system/classes/search.class.php |   5 +----
 system/lib-story.php            |   2 +-
 3 files changed, 36 insertions(+), 5 deletions(-)

diffs (71 lines):

diff -r 85ab60faa47f -r dd235920fb85 public_html/lib-common.php
--- a/public_html/lib-common.php	Sat Jul 25 10:04:23 2009 +0200
+++ b/public_html/lib-common.php	Sat Jul 25 19:38:57 2009 +0200
@@ -6933,6 +6933,40 @@
 }
 
 /**
+* Turn a piece of HTML into continuous(!) plain text
+*
+* This function removes HTML tags, line breaks, etc. and returns one long
+* line of text. This is useful for word counts (do an explode() on the result)
+* and for text excerpts.
+*
+* @param    string  $text   original text, including HTML and line breaks
+* @return   string          continuous plain text
+* 
+*/
+function COM_getTextContent($text)
+{
+    // replace <br> with spaces so that Text<br>Text becomes two words
+    $text = preg_replace('/\<br(\s*)?\/?\>/i', ' ', $text);
+
+    // add extra space between tags, e.g. <p>Text</p><p>Text</p>
+    $text = str_replace('><', '> <', $text);
+
+    // only now remove all HTML tags
+    $text = strip_tags($text);
+
+    // replace all tabs, newlines,  and carrriage returns with spaces
+    $text = str_replace(array("\011", "\012", "\015"), ' ', $text);
+
+    // replace entities with plain spaces
+    $text = str_replace(array('', '&#160', ' '), ' ', $text);
+
+    // collapse whitespace
+    $text = preg_replace('/\s\s+/', ' ', $text);
+
+    return trim($text);
+}
+
+/**
 * Now include all plugin functions
 */
 foreach ($_PLUGINS as $pi_name) {
diff -r 85ab60faa47f -r dd235920fb85 system/classes/search.class.php
--- a/system/classes/search.class.php	Sat Jul 25 10:04:23 2009 +0200
+++ b/system/classes/search.class.php	Sat Jul 25 19:38:57 2009 +0200
@@ -795,10 +795,7 @@
     */
     function _shortenText($keyword, $text, $num_words = 7)
     {
-        $text = strip_tags($text);
-        $text = str_replace(array("\011", "\012", "\015"), ' ', trim($text));
-        $text = str_replace(' ', ' ', $text);
-        $text = preg_replace('/\s\s+/', ' ', $text);
+        $text = COM_getTextContent($text);
         $words = explode(' ', $text);
         $word_count = count($words);
         if ($word_count <= $num_words) {
diff -r 85ab60faa47f -r dd235920fb85 system/lib-story.php
--- a/system/lib-story.php	Sat Jul 25 10:04:23 2009 +0200
+++ b/system/lib-story.php	Sat Jul 25 19:38:57 2009 +0200
@@ -325,7 +325,7 @@
         {
             $article->set_var( 'lang_readmore', $LANG01[2] );
             $article->set_var( 'lang_readmore_words', $LANG01[62] );
-            $numwords = COM_numberFormat (sizeof( explode( ' ', strip_tags( $bodytext ))));
+            $numwords = COM_numberFormat(count(explode(' ', COM_getTextContent($bodytext))));
             $article->set_var( 'readmore_words', $numwords );
 
             $article->set_var( 'readmore_link',



More information about the geeklog-cvs mailing list