2, 'exact_word_bonus' => 3, 'abs_length_weight' => 0.0, 'rel_length_weight' => 1.0, 'debug' => true ), $options); // Null suffix defaults to same as prefix if (is_null($suffix)) { $suffix = $prefix; } // Not enough to work with? if (strlen($text) <= $length) { return $text; } // Just in case if (!is_array($words)) { $words = array($words); } // Build the event list // [also calculate maximum word length for relative weight bonus] $events = array(); $maxWordLength = 0; foreach ($words as $word) { if (strlen($word) > $maxWordLength) { $maxWordLength = strlen($word); } $i = -1; while ( ($i = stripos($text, $word, $i+1)) !== false ) { // Basic score for a match is always 1 $score = 1; // Apply modifiers if (substr($text, $i, strlen($word)) == $word) { // Case matches exactly $score += $options['exact_case_bonus']; } if ($options['abs_length_weight'] != 0.0) { // Absolute length weight (longer words count for more) $score += strlen($word) * $options['abs_length_weight']; } if ($options['rel_length_weight'] != 0.0) { // Relative length weight (longer words count for more) $score += strlen($word) / $maxWordLength * $options['rel_length_weight']; } if (preg_match('/\W/', substr($text, $i-1, 1))) { // The start of the word matches exactly $score += $options['exact_word_bonus']; } if (preg_match('/\W/', substr($text, $i+strlen($word), 1))) { // The end of the word matches exactly $score += $options['exact_word_bonus']; } // Push event occurs when the word comes into range $events[] = array( 'type' => 'push', 'word' => $word, 'pos' => max(0, $i + strlen($word) - $length), 'score' => $score ); // Pop event occurs when the word goes out of range $events[] = array( 'type' => 'pop', 'word' => $word, 'pos' => $i + 1, 'score' => $score ); // Bump event makes it more attractive for words to be in the // middle of the excerpt [@todo: this needs work] $events[] = array( 'type' => 'bump', 'word' => $word, 'pos' => max(0, $i + floor(strlen($word)/2) - floor($length/2)), 'score' => 0.5 ); } } // If nothing is found then just truncate from the beginning if (empty($events)) { return substr($text, 0, $length) . $suffix; } // We want to handle each event in the order it occurs in // [i.e. we want an event queue] $events = sortByKey($events, 'pos'); $scores = array(); $score = 0; $current_words = array(); // Process each event in turn foreach ($events as $idx => $event) { $thisPos = floor($event['pos']); $word = strtolower($event['word']); switch ($event['type']) { case 'push': if (empty($current_words[$word])) { // First occurence of a word gets full value $current_words[$word] = 1; $score += $event['score']; } else { // Subsequent occurrences mean less and less $current_words[$word]++; $score += $event['score'] / sizeof($current_words[$word]); } break; case 'pop': if (($current_words[$word])==1) { unset($current_words[$word]); $score -= ($event['score']); } else { $current_words[$word]--; $score -= $event['score'] / sizeof($current_words[$word]); } break; case 'bump': if (!empty($event['score'])) { $score += $event['score']; } break; default: } // Close enough for government work... $score = round($score, 2); // Store the position/score entry $scores[$thisPos] = $score; // For use with debugging $debugWords[$thisPos] = $current_words; // Remove score bump if ($event['type'] == 'bump') { $score -= $event['score']; } } // Calculate the best score // Yeah, could have done this in the main event loop // but it's better here $bestScore = 0; foreach ($scores as $pos => $score) { if ($score > $bestScore) { $bestScore = $score; } } if ($options['debug']) { // This is really quick, really tatty debug information // (but it works) echo ""; echo ""; echo ""; foreach ($events as $event) { echo ""; echo ""; echo ""; } echo "
Events
PosTypeWordScore
{$event['pos']}{$event['type']}{$event['word']}{$event['score']}
"; echo ""; echo ""; $idx = 0; foreach ($scores as $pos => $score) { $excerpt = substr($text, $pos, $length); $style = ($score == $bestScore) ? 'background: #ff7;' : ''; //$score = floor($score + 0.5); echo ""; echo ""; echo ""; echo ""; echo ""; echo ""; echo ""; $idx++; } echo "
Positions and their scores
" . $idx . "" . $pos . "
" . $score . "
" . str_repeat('*', $score) . "
"; foreach ($debugWords[$pos] as $word => $count) { echo ""; } echo "
$word$count
" . (preg_replace('/(' . implode('|', $words) . ')/i', '\1', htmlentities($excerpt))) . "
"; } // Find all positions that correspond to the best score $positions = array(); foreach ($scores as $pos => $score) { if ($score == $bestScore) { $positions[] = $pos; } } if (sizeof($positions) > 1) { // Scores are tied => do something clever to choose one // @todo: Actually do something clever here $pos = $positions[0]; } else { $pos = $positions[0]; } // Extract the excerpt from the position, (pre|ap)pend the (pre|suf)fix $excerpt = substr($text, $pos, $length); if ($pos > 0) { $excerpt = $prefix . $excerpt; } if ($pos + $length < strlen($text)) { $excerpt .= $suffix; } return $excerpt; } } ?>