Sphinx Behavior

by xumix
This behavior helps to use Sphinx search engine in your projects.
First, you need Sphinx http://sphinxsearch.com/ installed and configured. I hope that you've already set up.
Now, get the sphinxapi.php from the sphinx distribution and place it in app/vendors.
Download the code and save it to app/models/behaviors/sphinx.php

Behavior code:


<?php
/**
 * Behavior for simple usage of Sphinx search engine
 * http://www.sphinxsearch.com
 *
 * @copyright 2008, Vilen Tambovtsev
 * @author  Vilen Tambovtsev
 * @license      http://www.opensource.org/licenses/mit-license.php The MIT License
 */


class SphinxBehavior extends ModelBehavior
{
    
/**
     * Used for runtime configuration of model
     */
    
var $runtime = array();
    var 
$_defaults = array('server' => 'localhost''port' => 3312);

    
/**
     * Spinx client object
     *
     * @var SphinxClient
     */
    
var $sphinx null;

    function 
setup(&$model$config = array())
    {
        
$settings array_merge($this->_defaults, (array)$config);

        
$this->settings[$model->alias] = $settings;

        
App::import('Vendor''sphinxapi');
        
$this->runtime[$model->alias]['sphinx'] = new SphinxClient();
        
$this->runtime[$model->alias]['sphinx']->SetServer($this->settings[$model->alias]['server'],
                                                           
$this->settings[$model->alias]['port']);
    }

    
/**
     * beforeFind Callback
     *
     * @param array $query
     * @return array Modified query
     * @access public
     */
    
function beforeFind(&$model$query)
    {
        if (empty(
$query['sphinx']) || empty($query['search']))
            return 
true;

        if (
$model->findQueryType == 'count')
        {
            
$model->recursive = -1;
            
$query['limit'] = 1;
            
$query['page'] = 1;
        }
        else if (empty(
$query['limit']))
        {
            
$query['limit'] = 9999999;
            
$query['page'] = 1;
        }

        foreach (
$query['sphinx'] as $key => $setting)
        {

            switch (
$key)
            {
                case 
'filter':
                    foreach (
$setting as $arg)
                    {
                        
$arg[2] = empty($arg[2]) ? false $arg[2];
                        
$this->runtime[$model->alias]['sphinx']->SetFilter($arg[0], (array)$arg[1], $arg[2]);
                    }
                   break;
                case 
'filterRange':
                case 
'filterFloatRange':
                    
$method 'Set' $key;
                    foreach (
$setting as $arg)
                    {
                        
$arg[3] = empty($arg[3]) ? false $arg[3];
                        
$this->runtime[$model->alias]['sphinx']->{$method}($arg[0], (array)$arg[1], $arg[2], $arg[3]);
                    }
                   break;
                case 
'matchMode':
                   
$this->runtime[$model->alias]['sphinx']->SetMatchMode($setting);
                   break;
                case 
'sortMode':
                    
$this->runtime[$model->alias]['sphinx']->SetSortMode(key($setting), reset($setting));
                    break;
                default:
                    break;
            }
        }
        
$this->runtime[$model->alias]['sphinx']->SetLimits(($query['page'] - 1) * $query['limit'],
                                                           
$query['limit']);

        
$indexes = !empty($query['sphinx']['index']) ? implode(',' $query['sphinx']['index']) : '*';

        
$result $this->runtime[$model->alias]['sphinx']->Query($query['search'], $indexes);

        if (
$result === false)
        {
            
trigger_error("Search query failed: " $this->runtime[$model->alias]['sphinx']->GetLastError());
            return 
false;
        }
        else if(isset(
$result['matches']))
        {
            if (
$this->runtime[$model->alias]['sphinx']->GetLastWarning())
            {
                
trigger_error("Search query warning: " $this->runtime[$model->alias]['sphinx']->GetLastWarning());
            }
        }

        unset(
$query['conditions']);
        unset(
$query['order']);
        unset(
$query['offset']);
        
$query['page'] = 1;
        if (
$model->findQueryType == 'count')
        {
            
$result['total'] = !empty($result['total']) ? $result['total'] : 0;
            
$query['fields'] = 'ABS(' $result['total'] . ') AS count';

        }
        else
        {
            if (isset(
$result['matches']))
                
$ids array_keys($result['matches']);
            else
                
$ids = array(0);
            
$query['conditions'] = array($model->alias '.'.$model->primaryKey => $ids);
            
$query['order'] = 'FIND_IN_SET('.$model->alias.'.'.$model->primaryKey.', \'' implode(','$ids) . '\')';

        }

        return 
$query;
    }
}
?>


Usage:

Model Class:

<?php 
class Film extends AppModel {
var 
$actsAs = array('Sphinx');
}
?>


Controller Class:

<?php 
class FilmsController extends AppController
{
function 
index()
{
    
$sphinx = array('matchMode' => SPH_MATCH_ALL'sortMode' => array(SPH_SORT_EXTENDED => '@relevance DESC'));
    
$results $this->Film->find('all', array('search' => 'search string here''sphinx' => $sphinx));
}


function 
paging()
{
        
$pagination = array('Film' => array('contain' =>
                                       array(
'FilmType',
                                             
'Genre',
                                             
'FilmPicture' => array('conditions' => array('type' => 'smallposter')),
                                             
'Country',
                                             
'Person' => array('conditions' => array('FilmsPerson.profession_id' => array(134))),
                                             
'MediaRating'),
                                        
'order' => array('Film.modified' => 'desc'),
                                        
'conditions' => array('Film.active' => 1),
                                        
'limit' => 30));
        
$pagination['Film']['fields'] = array('Film.id''Film.imdb_rating''Film.title',
                                              
'Film.year''MediaRating.rating');


        
$pagination['Film']['sphinx']['filter'][] = array('country_id'$this->params['named']['country']);
        if (!empty(
$this->params['named']['search']))
        {
            
$search trim($this->params['named']['search']);

            
$sort ', modified DESC';
            if (!empty(
$this->params['named']['sort']))
            {
                
$sort explode('.'$this->params['named']['sort']);
                
$sort ', ' $sort[1] . ' DESC';
            }

            
$pagination['Film']['sphinx']['matchMode'] = SPH_MATCH_ALL;
            
$pagination['Film']['sphinx']['sortMode'] = array(SPH_SORT_EXTENDED => '@relevance DESC' $sort);

            
$pagination['Film']['search'] = $search;
        }
        
$this->paginate $pagination;
        
$films $this->paginate();

}

}
?>

Report

More on Behaviors

Tags

Advertising

Comments

  • 36degrees posted on 07/05/10 07:44:51 AM
    Hi,

    I've been using this for a while on a project I am working on. One thing we've noticed is that each page call actually triggers two calls to Sphinx - I believe this is because CakePHP's pagination makes seperate calls to get the total count and the results.

    This isn't a huge issue when there's not that much content, but we're now using this to search on 3 million rows of data, and the two calls can quite easily make a 5 second page load take 10 seconds.

    Given the Sphinx will return the total matches as part of a result anyway, is there an easy way to reduce this so that only one call is made, without modifying the cake core?

    Thanks,

    Ollie
  • wrksx posted on 05/03/10 07:40:30 AM
    Hi all.

    I was really pleased to find such a good contribution, so I decided to sumbit you this little piece of code. It has been designed in order to ensure a better compatibility between the Sphinx behavior and the paginator helper in views.

    The problem was when using the paginator to display sort buttons like the following:


    <?php echo $paginator->sort('sort by price''total_price'); ?>

    The behavior was not taking care of the variables defining the sort key (field) and direction (asc / desc) of the original request.

    Now the code to insert in the behavior:

    <?php
    $query
    ['order'] = array_reverse($query['order']);

    if (!empty(
    $query['order']) && is_array($query['order'])) 
    {
        
    $query['sphinx']['sortMode'][SPH_SORT_EXTENDED] = '';
        foreach (
    $query['order'] as $order)
        {
            if (!empty(
    $order) && is_array($order))
            {
                
    $field $key key($order);
            
    $field explode('.'$field);
                        
            if (
    count($field) === 2)    $field $field[1];
            else            
    $field $field[0];
                          
            
    $direction $order[$key];
                    
            
    $query['sphinx']['sortMode'][SPH_SORT_EXTENDED] .= $field ' ' $direction .', ';
            }
        }
    }
    $query['sphinx']['sortMode'][SPH_SORT_EXTENDED] .= '@relevance DESC';
    ?>

    I know this is perfectible, so let me now if you have corrections to submit.

    P.S.: i've tested it on version 1.3, and everything seems to work. Don't know about 1.2.
    • xumix posted on 05/04/10 01:35:24 AM
      Hi all.

      I was really pleased to find such a good contribution, so I decided to sumbit you this little piece of code. It has been designed in order to ensure a better compatibility between the Sphinx behavior and the paginator helper in views.

      The problem was when using the paginator to display sort buttons like the following:


      <?php echo $paginator->sort('sort by price''total_price'); ?>

      The behavior was not taking care of the variables defining the sort key (field) and direction (asc / desc) of the original request.

      Now the code to insert in the behavior:

      <?php
      $query
      ['order'] = array_reverse($query['order']);

      if (!empty(
      $query['order']) && is_array($query['order'])) 
      {
          
      $query['sphinx']['sortMode'][SPH_SORT_EXTENDED] = '';
          foreach (
      $query['order'] as $order)
          {
              if (!empty(
      $order) && is_array($order))
              {
                  
      $field $key key($order);
              
      $field explode('.'$field);
                          
              if (
      count($field) === 2)    $field $field[1];
              else            
      $field $field[0];
                            
              
      $direction $order[$key];
                      
              
      $query['sphinx']['sortMode'][SPH_SORT_EXTENDED] .= $field ' ' $direction .', ';
              }
          }
      }
      $query['sphinx']['sortMode'][SPH_SORT_EXTENDED] .= '@relevance DESC';
      ?>

      I know this is perfectible, so let me now if you have corrections to submit.

      P.S.: i've tested it on version 1.3, and everything seems to work. Don't know about 1.2.

      I've created a mercurial repo, post patches there if you want
      http://bitbucket.org/xumix/cakephp-sphinx-behavior
  • peterscampbell posted on 03/05/10 10:00:38 PM
    I just turned off email notifications, after the fourth or fifth spam delivered from the two comments that i've left here. Allowing spam to sit on your message boards is not reassuring. Isn't anyone paying attention?
    • predominant posted on 04/15/10 10:02:24 PM
      I just turned off email notifications, after the fourth or fifth spam delivered from the two comments that i've left here. Allowing spam to sit on your message boards is not reassuring. Isn't anyone paying attention?
      We are experiencing issues, and are working on this presently to resolve the amount of spam we are receiving. Apologies for the emails you would have received while we experience the spam increase.

      Please bear with us while we combat this issue.
    • xumix posted on 03/23/10 02:00:12 AM
      I just turned off email notifications, after the fourth or fifth spam delivered from the two comments that i've left here. Allowing spam to sit on your message boards is not reassuring. Isn't anyone paying attention? just deleted spam. i suppose captcha would be nice here
  • peterscampbell posted on 12/29/09 01:46:22 PM
    Never mind! I didn't have the attribute specified properly in sphinx.conf. It's working great now!
  • peterscampbell posted on 12/29/09 01:31:22 PM
    I set up the sphinx behaviors with sphinx API some months ago and have had no problems -- this is great, and I really appreciate the effort! The application I'm developing requires some flexible date searching, so I have date fields in my table and index, in unix timestamp format. My controller sets variables $min and $max and does this call (the date table is named 'dateopened'):

    Controller Class:

    <?php 
    $sphinx 
    = array('matchMode' => SPH_MATCH_EXTENDED'sortMode' => array( $sortmode => $sorttype), 'filterRange' => array('1' => 'dateopened''2' => $min'3' => $maxFALSE));

        
    $this->set('results'$this->Search->find('all', array('search' => $phrase.$openSearch'sphinx' => $sphinx)));
    ?>

    Using the original code in the behavior, this didn't work at all -- $arg[1] would be 'd', $arg[2] 'a', and so on, breaking the first parameter letter by letter. I'm having a hard time seeing how any of the foreach statements in there could work, since they seem to be doing a full array operation on individual elements. So I rewrote it like this:


    case 'filterRange':
    // case 'filterFloatRange':
        $method = 'Set' . $key;
        $i=1;
        foreach ($setting as $arg)
        {
        $para[$i] = $arg;
        echo $arg."!<br />"; //this works, shows me what I would expect
        $i++;
        }
        if (!empty($para[3])) $this->runtime[$model->alias]['sphinx']->SetfilterRange($para[1], $para[2], $para[3], $para[4]);
        break;

    The result is that my date range is ignored in the results. I'm using Sphinx 9.8.1 on this server, so it could be that SetFilterRange just doesn't work with that release. Or I might need something in my sphinx.conf file that I don't have -- I'm a bit unclear on that. But I'm certain (having debugged it) that the routine in the behavior is running, and I'm fairly certain that it's outputting a proper API call to SetFilterRange. Any idea why that call wouldn't be honored?

    Thanks!
  • inciteco posted on 12/15/09 06:12:13 PM
    i am having trouble to get the search to work in cake. sphinx on the server works fine but i dont understand how to use the above code with a search form. sorry about this newbie question. it would be great if someone could post a view on how to use the sphinx behavior.

    thanks
  • stebu posted on 12/15/09 12:21:48 PM

    Insert

    case 'fieldWeights':
       $this->runtime[$model->alias]['sphinx']->SetFieldWeights($setting);
       break; 
    between lines 93 and 94 of sphinx.php behavior.

    Usage within the controller (assuming your table owns fields 'title' and 'tags'):

    $pagination['Article']['sphinx']['matchMode'] = SPH_MATCH_EXTENDED;
    $pagination['Article']['sphinx']['sortMode'] = array (SPH_SORT_EXTENDED=>'@relevance DESC');
    $pagination['Article']['sphinx']['fieldWeights'] = array ('title'=>5, 'tags'=>50);

  • jdebernardo posted on 09/03/09 06:36:04 PM
    Thank you for the tutorial, it was great to me, but I have got any problems with the filters.
    The sphinx was implemented in my project but I can't to use the filters like the example in url http://planetcakephp.org/aggregator/items/1319-cakephp-sphinx-applying-filters-to-your-search
    I debugging the behaviors and the sphinx api but I didn't find nothing. The array structure that I use is
    'filter' => array( array('price >','300'))
    I prove too:
    'filter' => array( array('price','300')) - without '>'
    If anybody can help me, I great them very much.

    Thank you

    -------------UPDATED-----------------
    I can use Sphinx in my project. The problem was a configuration in sphinx because I didn't add the parameter in the file sphinx.conf.
    It's neccesary to add "sql_attr_uint with = " the name of the integer variable .

  • stebu posted on 07/13/09 11:43:31 AM
    Hi there,
    first of all:
    Thank you very much for this manual!!!!
    Since I'm using UUIDs (name:id) for my tables, I inserted an additional field "aiid" (auto-increment id) for sphinx search.
    Searching via console works perfectly.
    Searching via Cake does not work.
    Do I have to define that the Sphinx within Cake should use another table field than "id"?
    Best!
    stebu
    • stebu posted on 07/13/09 11:47:34 AM
      i just had to add
      var $primaryKey = 'aiid'; to my models.

      Hi there,
      first of all:
      Thank you very much for this manual!!!!
      Since I'm using UUIDs (name:id) for my tables, I inserted an additional field "aiid" (auto-increment id) for sphinx search.
      Searching via console works perfectly.
      Searching via Cake does not work.
      Do I have to define that the Sphinx within Cake should use another table field than "id"?
      Best!
      stebu
  • port23user posted on 07/10/09 06:01:27 AM
    I think I have this behavior and Sphinx installed correctly. However, when I first executed the code similar to what you had for the index action, I got two assertion errors (lines 390 and 392 of sphinxapi.php). I remedied this by adding a 'limit' parameter to my find(). Do you know if there's a way to use the find() method to retrieve ALL the results?

    Here's my code if it helps:

    Controller Class:

    <?php 
    $sphinx 
    = array('matchMode' => 'SPH_MATCH_ALL''sortMode' => array('SPH_SORT_EXTENDED' => '@relevance DESC'));
    $results $this->Resume->find('all', array('search' => 'of''limit'=>10'sphinx' => $sphinx));?>
    • xumix posted on 07/10/09 06:33:34 AM
      Replace if ($model->findQueryType == 'count') .......  with

              if ($model->findQueryType == 'count')
              {
                  $model->recursive = -1;
                  $query['limit'] = 1;
                  $query['page'] = 1;
              }
              else if (empty($query['limit']))
              {
                  $query['limit'] = 9999999;
              $query['page'] = 1;
              }
    • xumix posted on 07/10/09 06:25:43 AM
      I think I have this behavior and Sphinx installed correctly. However, when I first executed the code similar to what you had for the index action, I got two assertion errors (lines 390 and 392 of sphinxapi.php). I remedied this by adding a 'limit' parameter to my find(). Do you know if there's a way to use the find() method to retrieve ALL the results?

      limit => 100000000 ;)
  • port23user posted on 07/09/09 07:51:25 PM
    Thanks for sharing this. It's exactly what I'm looking for. I'm excited to try it out.
  • xumix posted on 12/03/08 03:16:31 AM
    added more examples and updated code
  • r0mk1n posted on 11/28/08 01:42:54 PM
    привет,
    можно ли получить небольшую консультацию по использованию сфинкса? при использовании Sphinx Behavior при вызове model->find('all' ...
    получаю assertion в sphinxapi, при добавлении limit в параметр query или испольовании пейджинатора получаю просто набор записей из таблицы без всякого поиска
  • nextri posted on 11/19/08 11:55:19 AM
    Thanks for this. Was just about to dive into sphinx, and this will certainly save me a lot of time.
login to post a comment.