array( 'useMbstring' => false, 'convert' => true, 'convertTo' => 'UTF-8', 'convertFrom' => array('ISO-8859-15','UTF-8') ), 'read' => array( 'useMbstring' => false, 'convert' => true, 'primaryOnly' => true, 'convertTo' => 'ISO-8859-15', 'convertFrom' => array('UTF-8') ) ); /** * List of valid encodings * * @var array */ var $validEncodings; /** * List of model settings * * @var array */ var $settings = array(); /** * Setup callback * * @param AppModel $model * @param array $config */ function setup(&$model, $config = array() ) { if( true === empty( $config ) ) { $config = array(); } // Merge user settings with default $settings = am($this->defaultSettings, $config ); foreach ( $settings AS $mode ) { if( true === $mode['useMbstring'] && false !== $mode['convertTo'] ) { if( false === function_exists('mb_convert_encoding') ) { trigger_error('Sorry, your PHP version does not support mbstring functions. Please read notes at http://php.net/mbstring',E_USER_ERROR); } // Check if we have a list of all valid encodings supported by PHP if( true === empty( $this->validEncodings ) ) { // Build the list of valid encodings $this->validEncodings = mb_list_encodings(); } // Check if we have valid encodings in our list if( false === array_search( $mode['convertTo'], $this->validEncodings ) ) { trigger_error('Invalid target encoding for "'.$model->name.'::find" - '. $mode['convertTo'] .' is not valid!', E_USER_ERROR ); } } } $this->settings[ $model->name ] = $settings; } /** * Callback for when model is saving * * @param AppModel $model */ function beforeSave(&$model) { $settings = $this->settings[ $model->name ]['save']; if( false === $settings['convertTo'] ) { return true; } // Should we encode using mbstring ? if( true === $settings['useMbstring'] ) { $model->data = $this->doMultibyte( $model->data, $settings ); } else { $model->data = $this->doEncode( $model->data, $settings ); } return true; } /** * Callback for when model is reading * * @param AppModel $model * @param array $results * @param boolean $primary */ function afterFind(&$model, $results, $primary) { $settings = $this->settings[ $model->name ]['read']; if( false === $settings['convert'] ) { return $results; } // Check if we should only handle primary model data if( true === $settings['primaryOnly'] && true !== $primary ) { return $results; } // Should we decode using mbstring ? if( true === $settings['useMbstring'] ) { return $this->doMultibyte( $results, $settings ); } // Normal utf8 decode to ISO-8859-1 return $this->doDecode( $results, $settings ); } /** * Decode UTF-8 to another encoding, with multibyte support * * @param mixed $data * @param array $settings * @return mixed */ function doMultibyte( $data, $settings ) { if( true === is_array( $data ) ) { if( 0 === count( $data ) ) { return $data; } foreach ( $data AS $key => $name ) { $data[ $key ] = $this->doDecode( $name, $settings ); } return $data; } return mb_convert_encoding( $data, $settings['convertTo'], $settings['convertFrom'] ); } /** * Decode UTF-8 back to ISO-8859-1 single-byte encoding * * @param mixed $data * @param array $settings * @return mixed */ function doDecode( $data, $settings ) { if( true === is_array( $data ) ) { if( 0 === count( $data ) ) { return $data; } foreach ( $data AS $key => $name ) { $data[ $key ] = $this->doDecode( $name, $settings ); } return $data; } return utf8_decode($data); } /** * Do the converting of data to UTF-8, recursive * * @param array $data * @param array $settings * @return array */ function doEncode( $data, $settings ) { if( true === is_array( $data ) ) { if( 0 === count( $data ) ) { return $data; } foreach ( $data AS $key => $name ) { $data[ $key ] = $this->doEncode( $name, $settings ); } return $data; } if( true === $this->isUTF8( $data ) ) { return $data; } return utf8_encode($data); } /** * Method to check if a string is UTF-8 * * @param string $string * @return boolean */ function isUTF8($string) { // from http://w3.org/International/questions/qa-forms-utf-8.html return 0 != preg_match('%^(?: [\x09\x0A\x0D\x20-\x7E] # ASCII | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 )*$%xs', $string); } } ?>