2,
'exact_word_bonus' => 3,
'abs_length_weight' => 0.0,
'rel_length_weight' => 1.0,
'debug' => true
), $options);
// Null suffix defaults to same as prefix
if (is_null($suffix)) {
$suffix = $prefix;
}
// Not enough to work with?
if (strlen($text) <= $length) {
return $text;
}
// Just in case
if (!is_array($words)) {
$words = array($words);
}
// Build the event list
// [also calculate maximum word length for relative weight bonus]
$events = array();
$maxWordLength = 0;
foreach ($words as $word) {
if (strlen($word) > $maxWordLength) {
$maxWordLength = strlen($word);
}
$i = -1;
while ( ($i = stripos($text, $word, $i+1)) !== false ) {
// Basic score for a match is always 1
$score = 1;
// Apply modifiers
if (substr($text, $i, strlen($word)) == $word) {
// Case matches exactly
$score += $options['exact_case_bonus'];
}
if ($options['abs_length_weight'] != 0.0) {
// Absolute length weight (longer words count for more)
$score += strlen($word) * $options['abs_length_weight'];
}
if ($options['rel_length_weight'] != 0.0) {
// Relative length weight (longer words count for more)
$score += strlen($word) / $maxWordLength * $options['rel_length_weight'];
}
if (preg_match('/\W/', substr($text, $i-1, 1))) {
// The start of the word matches exactly
$score += $options['exact_word_bonus'];
}
if (preg_match('/\W/', substr($text, $i+strlen($word), 1))) {
// The end of the word matches exactly
$score += $options['exact_word_bonus'];
}
// Push event occurs when the word comes into range
$events[] = array(
'type' => 'push',
'word' => $word,
'pos' => max(0, $i + strlen($word) - $length),
'score' => $score
);
// Pop event occurs when the word goes out of range
$events[] = array(
'type' => 'pop',
'word' => $word,
'pos' => $i + 1,
'score' => $score
);
// Bump event makes it more attractive for words to be in the
// middle of the excerpt [@todo: this needs work]
$events[] = array(
'type' => 'bump',
'word' => $word,
'pos' => max(0, $i + floor(strlen($word)/2) - floor($length/2)),
'score' => 0.5
);
}
}
// If nothing is found then just truncate from the beginning
if (empty($events)) {
return substr($text, 0, $length) . $suffix;
}
// We want to handle each event in the order it occurs in
// [i.e. we want an event queue]
$events = sortByKey($events, 'pos');
$scores = array();
$score = 0;
$current_words = array();
// Process each event in turn
foreach ($events as $idx => $event) {
$thisPos = floor($event['pos']);
$word = strtolower($event['word']);
switch ($event['type']) {
case 'push':
if (empty($current_words[$word])) {
// First occurence of a word gets full value
$current_words[$word] = 1;
$score += $event['score'];
}
else {
// Subsequent occurrences mean less and less
$current_words[$word]++;
$score += $event['score'] / sizeof($current_words[$word]);
}
break;
case 'pop':
if (($current_words[$word])==1) {
unset($current_words[$word]);
$score -= ($event['score']);
}
else {
$current_words[$word]--;
$score -= $event['score'] / sizeof($current_words[$word]);
}
break;
case 'bump':
if (!empty($event['score'])) {
$score += $event['score'];
}
break;
default:
}
// Close enough for government work...
$score = round($score, 2);
// Store the position/score entry
$scores[$thisPos] = $score;
// For use with debugging
$debugWords[$thisPos] = $current_words;
// Remove score bump
if ($event['type'] == 'bump') {
$score -= $event['score'];
}
}
// Calculate the best score
// Yeah, could have done this in the main event loop
// but it's better here
$bestScore = 0;
foreach ($scores as $pos => $score) {
if ($score > $bestScore) {
$bestScore = $score;
}
}
if ($options['debug']) {
// This is really quick, really tatty debug information
// (but it works)
echo "
";
echo "Events";
echo "| Pos | Type | Word | Score | ";
foreach ($events as $event) {
echo "
|---|
";
echo "| {$event['pos']} | {$event['type']} | {$event['word']} | {$event['score']} | ";
echo "
";
}
echo "
";
echo "";
echo "Positions and their scores";
$idx = 0;
foreach ($scores as $pos => $score) {
$excerpt = substr($text, $pos, $length);
$style = ($score == $bestScore) ? 'background: #ff7;' : '';
//$score = floor($score + 0.5);
echo "";
echo "| " . $idx . " | ";
echo "" . $pos . " | ";
echo "" . $score . " " . str_repeat('*', $score) . " | ";
echo "";
foreach ($debugWords[$pos] as $word => $count) {
echo "| $word | $count | ";
}
echo " | ";
echo "" . (preg_replace('/(' . implode('|', $words) . ')/i', '\1', htmlentities($excerpt))) . " | ";
echo "
";
$idx++;
}
echo "
";
}
// Find all positions that correspond to the best score
$positions = array();
foreach ($scores as $pos => $score) {
if ($score == $bestScore) {
$positions[] = $pos;
}
}
if (sizeof($positions) > 1) {
// Scores are tied => do something clever to choose one
// @todo: Actually do something clever here
$pos = $positions[0];
}
else {
$pos = $positions[0];
}
// Extract the excerpt from the position, (pre|ap)pend the (pre|suf)fix
$excerpt = substr($text, $pos, $length);
if ($pos > 0) {
$excerpt = $prefix . $excerpt;
}
if ($pos + $length < strlen($text)) {
$excerpt .= $suffix;
}
return $excerpt;
}
}
?>