[geeklog-cvs] geeklog: Improved selection of text portion to display in search...

geeklog-cvs at lists.geeklog.net geeklog-cvs at lists.geeklog.net
Fri Jun 12 15:40:58 EDT 2009


details:   http://project.geeklog.net/cgi-bin/hgweb.cgi/rev/79731422caff
changeset: 7105:79731422caff
user:      Dirk Haun <dirk at haun-online.de>
date:      Fri Jun 12 11:55:12 2009 +0200
description:
Improved selection of text portion to display in search results

diffstat:

 system/classes/search.class.php |  61 +++++++++++++++++++++++++-----
 1 files changed, 51 insertions(+), 10 deletions(-)

diffs (105 lines):

diff -r 88ac73e3784a -r 79731422caff system/classes/search.class.php
--- a/system/classes/search.class.php	Thu Jun 11 22:27:08 2009 +0200
+++ b/system/classes/search.class.php	Fri Jun 12 11:55:12 2009 +0200
@@ -749,7 +749,7 @@
                 }
             }
 
-            $row['title'] = $this->_shortenText($this->_query, $row['title'], 6);
+            $row['title'] = $this->_shortenText($this->_query, $row['title'], 8);
             $row['title'] = stripslashes(str_replace('$', '$', $row['title']));
             $row['title'] = COM_createLink($row['title'], $row['url']);
 
@@ -786,10 +786,11 @@
     * @return string A short version of the text
     *
     */
-    function _shortenText( $keyword, $text, $num_words = 7 )
+    function _shortenText($keyword, $text, $num_words = 7)
     {
         $text = strip_tags($text);
         $text = str_replace(array("\011", "\012", "\015"), ' ', trim($text));
+        $text = str_replace(' ', ' ', $text);
         $text = preg_replace('/\s\s+/', ' ', $text);
         $words = explode(' ', $text);
         $word_count = count($words);
@@ -813,24 +814,28 @@
             else
             {
                 $str = substr($text, $pos, $pos_space - $pos);
-                $key = array_search($str, $words);
                 $m = (int) (($num_words - 1) / 2);
-                if ($key <= $m)
-                {
+                $key = $this->_arraySearch($keyword, $words);
+                if ($key === false) {
+                    // Keyword(s) not found - show start of text
+                    $key = 0;
+                    $start = 0;
+                    $end = $num_words - 1;
+                } elseif ($key <= $m) {
                     // Keyword at the start of text
                     $start = 0 - $key;
                     $end = $num_words - 1;
-                    $end = (($key + $m <= $word_count - 1) ? $key : $word_count - $m - 1);
+                    $end = ($key + $m <= $word_count - 1)
+                         ? $key : $word_count - $m - 1;
                     $abs_length = abs($start) + abs($end) + 1;
                     if ($abs_length < $num_words) {
                         $end += ($num_words - $abs_length);
                     }
-                }
-                else
-                {
+                } else {
                     // Keyword in the middle of text
                     $start = 0 - $m;
-                    $end = (($key + $m <= $word_count - 1) ? $m : $word_count - $key - 1);
+                    $end = ($key + $m <= $word_count - 1)
+                         ? $m : $word_count - $key - 1;
                     $abs_length = abs($start) + abs($end) + 1;
                     if ($abs_length < $num_words) {
                         $start -= ($num_words - $abs_length);
@@ -857,6 +862,42 @@
     }
 
     /**
+    * Search array of words for keyword(s)
+    *
+    * @param   string  $needle    keyword(s), separated by spaces
+    * @param   array   $haystack  array of words to search through
+    * @return  mixed              index in $haystack or false when not found
+    * @access  private
+    *
+    */
+    function _arraySearch($needle, $haystack)
+    {
+        $keywords = explode(' ', $needle);
+        $num_keywords = count($keywords);
+
+        foreach ($haystack as $key => $value) {
+            if ($this->_stripos($value, $keywords[0]) !== false) {
+                if ($num_keywords == 1) {
+                    return $key;
+                } else {
+                    $matched_all = true;
+                    for ($i = 1; $i < $num_keywords; $i++) {
+                        if ($this->_stripos($haystack[$key + $i], $keywords[$i]) === false) {
+                            $matched_all = false;
+                            break;
+                        }
+                    }
+                    if ($matched_all) {
+                        return $key;
+                    }
+                }
+            }
+        }
+
+        return false;
+    }
+
+    /**
     * Finds the similarities between heading names
     *
     * Returns the index of a heading that matches a



More information about the geeklog-cvs mailing list