[geeklog-cvs] geeklog: New function COM_getTextContent turns HTML into continu...
geeklog-cvs at lists.geeklog.net
geeklog-cvs at lists.geeklog.net
Sat Jul 25 15:49:06 EDT 2009
details: http://project.geeklog.net/cgi-bin/hgweb.cgi/rev/dd235920fb85
changeset: 7186:dd235920fb85
user: Dirk Haun <dirk at haun-online.de>
date: Sat Jul 25 19:38:57 2009 +0200
description:
New function COM_getTextContent turns HTML into continuous text, e.g. for word counts and text excerpts
diffstat:
public_html/lib-common.php | 34 ++++++++++++++++++++++++++++++++++
system/classes/search.class.php | 5 +----
system/lib-story.php | 2 +-
3 files changed, 36 insertions(+), 5 deletions(-)
diffs (71 lines):
diff -r 85ab60faa47f -r dd235920fb85 public_html/lib-common.php
--- a/public_html/lib-common.php Sat Jul 25 10:04:23 2009 +0200
+++ b/public_html/lib-common.php Sat Jul 25 19:38:57 2009 +0200
@@ -6933,6 +6933,40 @@
}
/**
+* Turn a piece of HTML into continuous(!) plain text
+*
+* This function removes HTML tags, line breaks, etc. and returns one long
+* line of text. This is useful for word counts (do an explode() on the result)
+* and for text excerpts.
+*
+* @param string $text original text, including HTML and line breaks
+* @return string continuous plain text
+*
+*/
+function COM_getTextContent($text)
+{
+ // replace <br> with spaces so that Text<br>Text becomes two words
+ $text = preg_replace('/\<br(\s*)?\/?\>/i', ' ', $text);
+
+ // add extra space between tags, e.g. <p>Text</p><p>Text</p>
+ $text = str_replace('><', '> <', $text);
+
+ // only now remove all HTML tags
+ $text = strip_tags($text);
+
+ // replace all tabs, newlines, and carrriage returns with spaces
+ $text = str_replace(array("\011", "\012", "\015"), ' ', $text);
+
+ // replace entities with plain spaces
+ $text = str_replace(array('', ' ', ' '), ' ', $text);
+
+ // collapse whitespace
+ $text = preg_replace('/\s\s+/', ' ', $text);
+
+ return trim($text);
+}
+
+/**
* Now include all plugin functions
*/
foreach ($_PLUGINS as $pi_name) {
diff -r 85ab60faa47f -r dd235920fb85 system/classes/search.class.php
--- a/system/classes/search.class.php Sat Jul 25 10:04:23 2009 +0200
+++ b/system/classes/search.class.php Sat Jul 25 19:38:57 2009 +0200
@@ -795,10 +795,7 @@
*/
function _shortenText($keyword, $text, $num_words = 7)
{
- $text = strip_tags($text);
- $text = str_replace(array("\011", "\012", "\015"), ' ', trim($text));
- $text = str_replace(' ', ' ', $text);
- $text = preg_replace('/\s\s+/', ' ', $text);
+ $text = COM_getTextContent($text);
$words = explode(' ', $text);
$word_count = count($words);
if ($word_count <= $num_words) {
diff -r 85ab60faa47f -r dd235920fb85 system/lib-story.php
--- a/system/lib-story.php Sat Jul 25 10:04:23 2009 +0200
+++ b/system/lib-story.php Sat Jul 25 19:38:57 2009 +0200
@@ -325,7 +325,7 @@
{
$article->set_var( 'lang_readmore', $LANG01[2] );
$article->set_var( 'lang_readmore_words', $LANG01[62] );
- $numwords = COM_numberFormat (sizeof( explode( ' ', strip_tags( $bodytext ))));
+ $numwords = COM_numberFormat(count(explode(' ', COM_getTextContent($bodytext))));
$article->set_var( 'readmore_words', $numwords );
$article->set_var( 'readmore_link',
More information about the geeklog-cvs
mailing list