28 error_reporting(E_ALL);
29 if ((php_sapi_name() !=
'cli')) {
30 trigger_error(
"You can only run this script from the command line\n", E_USER_ERROR);
33 $SYSTEM_ROOT = getCLIArg(
'system');
35 echo
"ERROR: You need to supply the path to the System Root\n";
40 if (!is_dir($SYSTEM_ROOT) || !is_readable($SYSTEM_ROOT.
'/core/include/init.inc')) {
41 echo
"ERROR: Path provided doesn't point to a Matrix installation's System Root. Please provide correct path and try again.\n";
46 if (ini_get(
'memory_limit') !=
'-1') ini_set(
'memory_limit',
'-1');
48 $SYS_OLD_ENCODING = getCLIArg(
'old');
49 if (!$SYS_OLD_ENCODING || !isValidCharset($SYS_OLD_ENCODING)) {
50 echo
"\nERROR: The charset you specified '$SYS_OLD_ENCODING', as system's old encoding is not valid charset type.\n\n";
54 define(
'SYS_OLD_ENCODING',$SYS_OLD_ENCODING);
56 $SYS_NEW_ENCODING = getCLIArg(
'new');
57 if (!isValidCharset($SYS_NEW_ENCODING)) {
58 echo
"\nERROR: The charset you specified '".$SYS_NEW_ENCODING.
"', as system's new encoding is not valid charset type.\n\n";
63 if (!empty($SYS_NEW_ENCODING)) {
64 define(
'SYS_NEW_ENCODING', $SYS_NEW_ENCODING);
66 $config_file = file_get_contents($SYSTEM_ROOT.
'/data/private/conf/main.inc');
67 preg_match(
"|SQ_CONF_DEFAULT_CHARACTER_SET',\s*'(.*?)'\);|", $config_file, $match);
68 if (empty($match[1])) {
69 echo
"\nERROR: The default charset is not specified in the main.inc. Pleas specify the new charset to convert the system to.\n\n";
73 define(
'SYS_NEW_ENCODING', $match[1]);
76 $root_node_id = getCLIArg(
'rootnode');
77 $root_node_id = ($root_node_id) ? $root_node_id : 1;
79 $reportOnly = getCLIArg(
'report');
82 if (function_exists(
'iconv') == FALSE) {
83 echo
"This script requires the php iconv module which isn't available.\n";
84 echo
"Install that module and try again.\n";
99 'sq_ast_attr_val' => Array(
100 'assetid' =>
'assetid',
101 'contextid' =>
'contextid',
102 'value' =>
'custom_val',
105 'sq_ast_mdata_val' => Array(
106 'assetid' =>
'assetid',
107 'contextid' =>
'contextid',
111 'sq_ast_mdata_dflt_val' => Array(
112 'assetid' =>
'assetid',
113 'contextid' =>
'contextid',
114 'value' =>
'default_val',
117 'sq_ast_attr_uniq_val' => Array(
118 'assetid' =>
'assetid',
119 'contextid' =>
'contextid',
120 'value' =>
'custom_val',
121 'key' =>
'owning_attrid',
125 if (SYS_OLD_ENCODING == SYS_NEW_ENCODING) {
126 echo
"\nERROR: The old encoding ('" . SYS_OLD_ENCODING .
"') is the same as the current/new character set.\n\n";
131 if ($root_node_id == 1) {
132 echo
"\nWARNING: You are running this script on the whole system.\nThis is fine, but it may take a long time\n";
135 define(
'SCRIPT_LOG_FILE', $SYSTEM_ROOT.
'/data/private/logs/'.basename(__FILE__).
'.log');
138 echo
"\nIMPORTANT: This script will replace all the smart quote chars by their regular counterpart chars. And if value string is still\n";
139 echo
"invalid in the current system's charset then it performs charset conversion on string from older to current encoding\n";
140 echo
"YOU MUST BACKUP YOUR SYSTEM BEFORE RUNNING THIS SCRIPT\n";
141 echo
"Are you sure you want to proceed (Y/N)? \n";
143 $yes_no = rtrim(fgets(STDIN, 4094));
144 if (strtolower($yes_no) !=
'y') {
145 echo
"\nScript aborted. \n";
151 define(
'SYNC_FILE', $SYSTEM_ROOT.
'/data/temp/system_integrity_fix_char_encoding.data');
153 define(
'BATCH_SIZE',
'100');
156 $start_time = microtime(TRUE);
164 require_once $SYSTEM_ROOT.
'/core/include/init.inc';
166 $summary = fix_db($root_node_id, $tables);
170 $affected_assetids = get_affected_assetids($summary[
'affected_assetids']);
173 $contextids = array_keys($GLOBALS[
'SQ_SYSTEM']->getAllContexts());
175 file_put_contents(SYNC_FILE, serialize(Array(
'affected_assetids' => $affected_assetids,
'db_summary' => $summary,
'contextids' => $contextids)));
181 if (!is_file(SYNC_FILE)) {
182 echo
"Expected sync file containing the affected assetids not found. Only database was updated\n";
186 $summary = unserialize(file_get_contents(SYNC_FILE));
189 if ($reportOnly == FALSE) {
190 regenerate_filesystem_content($summary[
'affected_assetids'], $summary[
'contextids']);
192 echo
"Number of db records replaced successfully: ".$summary[
'db_summary'][
'records_fixed_count'].
"\n";
193 echo
"Total errors recorded: ".$summary[
'db_summary'][
'error_count'].
"\n";
195 echo
"Number of db records that need replacing: ".$summary[
'db_summary'][
'records_fixed_count'].
"\n";
198 echo
"Total time taken to run the script: ".round(microtime(TRUE)-$start_time, 2).
" second(s)\n";
200 if ($summary[
'db_summary'][
'error_count'] > 0) {
201 echo
"\nPlease check ".SCRIPT_LOG_FILE.
" file for errors\n\n";
218 function fix_db($root_node, $tables)
222 $target_assetids = array_keys($GLOBALS[
'SQ_SYSTEM']->am->getChildren($root_node));
223 array_unshift($target_assetids, $root_node);
225 if (empty($target_assetids)) {
226 echo
"\n\nAsset #${root_node} not found or no assets found underneath\n";
229 echo
"\n\nNumber of assets to look into : ".count($target_assetids).
" \n";
232 $records_fixed_count = 0;
233 $invalid_asset_records = Array();
236 $affected_assetids = Array();
238 $GLOBALS[
'SQ_SYSTEM']->changeDatabaseConnection(
'db2');
240 $chunks = array_chunk($target_assetids, 50);
244 foreach ($chunks as $assetids) {
245 foreach($tables as $table => $fields) {
246 $sql =
'SELECT '.trim(implode(
',',$fields),
',').
' FROM '.$table;
247 $sql .=
' WHERE assetid IN (\''.implode(
'\',\
'', $assetids).
'\')
';
249 $results = MatrixDAL::executeSqlAssoc($sql);
251 foreach($results as $record) {
253 if ($count % 100 == 0) {
257 $value = isset($record[$fields['value
']]) ? $record[$fields['value
']] : NULL;
258 $assetid = isset($record[$fields['assetid
']]) ? $record[$fields['assetid
']] : NULL;
259 $key = isset($record[$fields['key
']]) ? $record[$fields['key
']] : NULL;
260 $contextid = isset($record[$fields['contextid
']]) ? $record[$fields['contextid
']] : NULL;
262 if (is_null($value) || is_null($assetid) || is_null($contextid) || ($fields['key
'] && is_null($key))) {
270 // If it's the same in the
new and old encodings, that
's good.
271 $checked = @iconv(SYS_OLD_ENCODING, SYS_NEW_ENCODING.'
273 if ($value === $checked) {
277 $update_required = FALSE;
278 $invalid_asset_records[] = array(
285 if (!isValidValue($value)) {
288 $converted_value = @iconv(SYS_OLD_ENCODING, SYS_NEW_ENCODING.
'//IGNORE', $value);
292 if ($converted_value != $value && isValidValue($converted_value)) {
293 $value = $converted_value;
294 $update_required = TRUE;
298 if ($update_required) {
300 $GLOBALS[
'SQ_SYSTEM']->doTransaction(
'BEGIN');
305 ".$fields[
'value'].
"=:value
307 ".$fields[
'assetid'].
"=:assetid".
308 " AND ".$fields[
'contextid'].
"=:contextid".
309 (!is_null($key) ?
" AND ".$fields[
'key'].
"=:key" :
"");
320 if (count($execute) > 1) {
321 $sql = str_replace(
':assetid', $assetid, $sql);
322 $sql = str_replace(
':contextid', $contextid, $sql);
323 $sql = str_replace(
':contextid', $contextid, $sql);
324 $sql = !is_null($key) ? str_replace(
':key', $key, $sql) : $sql;
331 $msg =
"Executing query \"$sql\" will affect ".count($execute).
" (more than 1) records! Ignoring this sql.";
334 $GLOBALS[
'SQ_SYSTEM']->doTransaction(
'ROLLBACK');
339 $GLOBALS[
'SQ_SYSTEM']->doTransaction(
'COMMIT');
340 $records_fixed_count++;
341 $affected_assetids[$table][] = $assetid;
343 }
catch (Exception $e) {
348 $msg =
"Unexpected error occured while updating database: ".$e->getMessage();
351 $GLOBALS[
'SQ_SYSTEM']->doTransaction(
'ROLLBACK');
354 $records_fixed_count++;
356 $affected_assetids[$table][] = $assetid;
368 $msg =
"Asset with ".$fields[
'assetid'].
"=#$assetid, ".
369 (!is_null($key) ? $fields[
'key'].
"=#$key, and " :
"and ").
370 $fields[
'contextid'].
"=#$contextid in table $table ".
371 "contains invalid char(s), which were not replaced because ".
372 "either those invalid chars were not defined in the replacement array or the charset conversion was not successful".
373 "\nPotentially invalid characters include: ".listProblematicCharacters($value);
381 $GLOBALS[
'SQ_SYSTEM']->restoreDatabaseConnection();
383 unset($target_assetids);
387 $invalid_count =
sizeof(array_keys($invalid_asset_records));
388 echo
"Number of db records with invalid char(s): ".$invalid_count.
"\n";
389 if ($invalid_count > 0) {
390 foreach ($invalid_asset_records as $k => $details) {
391 echo
"\tAsset: ".$details[
'asset'].
" in table ".$details[
'table'];
392 echo
"\tPossibly problematic characters: ".listProblematicCharacters($details[
'value']).
"\n";
398 'error_count' =>
sizeof(array_keys($errors)),
399 'records_fixed_count' => $records_fixed_count,
400 'affected_assetids' => $affected_assetids,
415 function get_affected_assetids($data)
418 $affected_assetids = Array(
419 'bodycopy_content_file' => Array(),
420 'metadata_file' => Array(),
421 'design_file' => Array(),
424 echo
"Getting the list of assetids that needs content regeneration ...";
425 foreach($data as $table_type => $assetids) {
426 switch($table_type) {
427 case 'sq_ast_mdata_val':
428 $affected_assetids[
'metadata_file'] = array_merge($affected_assetids[
'metadata_file'], $assetids);
432 case 'sq_ast_mdata_dflt_val':
433 $mm = $GLOBALS[
'SQ_SYSTEM']->getMetadataManager();
434 foreach($assetids as $mfield_assetid) {
436 $schemaid = array_keys($GLOBALS[
'SQ_SYSTEM']->am->getParents($mfield_assetid,
'metadata_schema'));
437 $affected_assetids[
'metadata_file'] = array_merge($affected_assetids[
'metadata_file'], $mm->getSchemaAssetids());
442 case 'sq_ast_attr_val':
444 $affected_assetids[
'design_file'] = array_keys($GLOBALS[
'SQ_SYSTEM']->am->getAssetInfo($assetids, Array(
'design',
'design_css'), TRUE));
448 $content_type_assetids = array_keys($GLOBALS[
'SQ_SYSTEM']->am->getAssetInfo($assetids, Array(
'content_type'), FALSE));
449 foreach($content_type_assetids as $assetid) {
450 $bodycopy_container_link = $GLOBALS[
'SQ_SYSTEM']->am->getLinks($assetid, SQ_LINK_TYPE_2, Array(
'bodycopy_container'), FALSE,
'minor');
451 if (isset($bodycopy_container_link[0][
'majorid'])) {
453 $affected_assetids[
'bodycopy_content_file'][] = $bodycopy_container_link[0][
'majorid'];
463 $affected_assetids[
'metadata_file'] = array_unique($affected_assetids[
'metadata_file']);
464 $affected_assetids[
'bodycopy_content_file'] = array_unique($affected_assetids[
'bodycopy_content_file']);
465 $affected_assetids[
'design_file'] = array_unique($affected_assetids[
'design_file']);
468 $batched_assetids = Array();
469 foreach($affected_assetids as $type => $type_assetids) {
471 $asset_count = count($type_assetids);
472 $batched_assetids[$type] = Array();
473 while($start_index < $asset_count) {
474 $batched_assetids[$type][] = array_slice($type_assetids, $start_index, BATCH_SIZE);
475 $start_index += BATCH_SIZE;
479 unset($affected_assetids);
482 return $batched_assetids;
500 function regenerate_filesystem_content($assets_data, $contextids)
506 foreach($assets_data as $type => $assets_batch) {
507 if (empty($assets_batch)) {
511 echo
"Regenerating the ".str_replace(
'_',
' ', $type).
" ...";
512 foreach($assets_batch as $assetids) {
517 require_once $SYSTEM_ROOT.
'/core/include/init.inc';
518 $root_user = $GLOBALS[
'SQ_SYSTEM']->am->getSystemAsset(
'root_user');
519 $GLOBALS[
'SQ_SYSTEM']->setCurrentUser($root_user);
521 $mm = $GLOBALS[
'SQ_SYSTEM']->getMetadataManager();
522 $GLOBALS[
'SQ_SYSTEM']->setRunLevel(SQ_RUN_LEVEL_FORCED);
524 foreach($contextids as $contextid) {
525 $GLOBALS[
'SQ_SYSTEM']->changeContext($contextid);
527 foreach($assetids as $assetid) {
528 $asset = $GLOBALS[
'SQ_SYSTEM']->am->getAsset($assetid);
529 if (is_null($asset)) {
532 if ($type ==
'bodycopy_content_file') {
534 $bodycopy_container_edit_fns = $asset->getEditFns();
535 $bodycopy_container_edit_fns->generateContentFile($asset);
536 }
else if ($type ==
'metadata_file') {
538 $mm->regenerateMetadata($assetid, NULL, FALSE);
541 if (!($asset instanceof
Design))
continue;
542 $design_edit_fns = $asset->getEditFns();
544 if (@$design_edit_fns->parseAndProcessFile($asset)) @$asset->generateDesignFile(
false);
546 $customisation_links = $GLOBALS[
'SQ_SYSTEM']->am->getLinks($assetid, SQ_LINK_TYPE_2,
'design_customisation',
true,
'major',
'customisation');
547 foreach($customisation_links as $link) {
548 $customisation = $GLOBALS[
'SQ_SYSTEM']->am->getAsset($link[
'minorid'], $link[
'minor_type_code']);
549 if (is_null($customisation))
continue;
550 @$customisation->updateFromParent($design);
551 $GLOBALS[
'SQ_SYSTEM']->am->forgetAsset($customisation);
555 $asset = $GLOBALS[
'SQ_SYSTEM']->am->forgetAsset($asset);
560 $GLOBALS[
'SQ_SYSTEM']->restoreContext();
563 $GLOBALS[
'SQ_SYSTEM']->restoreRunLevel();
564 $GLOBALS[
'SQ_SYSTEM']->restoreCurrentUser();
585 function isValidValue($value, $charset=SYS_NEW_ENCODING)
587 $result = ($value == @iconv($charset, $charset.
"//IGNORE", $value));
600 function isValidCharset($charset)
602 return 'test' == @iconv($charset, $charset,
'test');
610 function log_error_msg($msg)
612 $msg = date(
'j-m-y h-i-s').
": ".$msg.
"\n";
613 file_put_contents(SCRIPT_LOG_FILE, $msg, FILE_APPEND);
625 $child_pid = pcntl_fork();
627 switch ($child_pid) {
629 trigger_error(
"Forking failed!");
637 pcntl_waitpid(-1, $status);
654 function getCLIArg($arg)
656 return (count($match = array_values(preg_grep(
"/--" . $arg .
"(\=(.*)|)/i",$_SERVER[
'argv']))) > 0 === TRUE) ? ((preg_match(
'/--(.*)=(.*)/',$match[0],$reg)) ? $reg[2] :
true) :
false;
666 function print_usage()
668 echo
"\nThis script replaces all the non-utf8 smart quotes chars by their respective regular couterpart chars.";
669 echo
"\nIf string is still invalid in current charset encoding aftet the replacement then script will perform chaset";
670 echo
"\nconversion on string from previous charset to the current one.\n\n";
672 echo
"Usage: php ".basename(__FILE__).
" --system=<SYSTEM_ROOT> --old=<OLD_CHARSET> [--new=<NEW_CHARSET>] [--rootnode=<ROOT_NODE>] [--report]\n\n";
673 echo
"\t<SYSTEM_ROOT> : The root directory of Matrix system.\n";
674 echo
"\t<OLD_CHARSET> : Previous charset of the system. (eg. UTF-8, Windows-1252, etc)\n";
675 echo
"\t<NEW_CHARSET> : New charset of the system. (eg. UTF-8, Windows-1252, etc)\n";
676 echo
"\t<ROOT_NODE> : Assetid of the rootnode (all children of the rootnode will be processed by the script).\n";
677 echo
"\t<--report> : Issue a report only instead of also trying to convert the assets.\n";
679 echo
"\nWARNING: IT IS STRONGLY RECOMMENDED THAT YOU BACKUP YOUR SYSTEM BEFORE RUNNING THIS SCRIPT\n\n";
691 function htmlallentities($str)
694 $strlen = strlen($str);
695 for ($i = 0; $i < $strlen; $i++) {
696 $byte = ord($str[$i]);
700 $res .= '&
#'.ord($str[$i]).';';
702 $res .= '&
#'.((63&$byte)*64 + (63&ord($str[++$i]))).';';
704 $res .= '&
#'.((15&$byte)*4096 + (63&ord($str[++$i]))*64 + (63&ord($str[++$i]))).';';
706 $res .= '&
#'.((15&$byte)*262144 + (63&ord($str[++$i]))*4096 + (63&ord($str[++$i]))*64 + (63&ord($str[++$i]))).';';
721 function listProblematicCharacters($value)
723 $entified = htmlallentities($value);
724 preg_match_all(
'/&#([0-9]+);/', $entified, $matches);
725 $codes = array_unique($matches[1]);
727 foreach ($codes as $code) {
728 $probChars .= html_entity_decode(
'&#'.$code.
';', ENT_COMPAT,
'utf-8').
' ('.$code.
'), ';
731 return preg_replace(
'/,\s*$/',
'', $probChars);