102 function printUsage()
104 printStdErr(
"CSV and metadata mapping to XML converter\n");
105 printStdErr(
'Usage: csv_to_xml_actions [system root] [csv file] [mapping file] [parent id] [asset type] (-unique) (-ignore_blank)');
106 printStdErr(
'system root : The Matrix System root directory');
107 printStdErr(
'csv file : A comma separated values file that represents the site structure');
108 printStdErr(
'mapping file : An XML file containing column name to metadata mapping data');
109 printStdErr(
'parent id : The parent asset ID');
110 printStdErr(
'asset type : Asset type to create for each entry (eg; data_record)');
111 printStdErr(
'-unique : The "-unique" option will instruct the exporter to exclude duplicate records');
112 printStdErr(
'-ignore_blank: The "-ignore_blank" option will not attempt to import fields which are blank*');
113 printStdErr(
" (* = empty fields or those composed entirely of spaces or dashes)\n");
130 function printCreateAssetAction($action_id, $action_type, $type_code, $link_type, $parent_id)
133 echo
' <action_id>'.$action_id.
"</action_id>\n";
134 echo
' <action_type>'.$action_type.
"</action_type>\n";
135 echo
' <type_code>'.$type_code.
"</type_code>\n";
136 echo
' <link_type>'.$link_type.
"</link_type>\n";
137 echo
' <parentid>'.$parent_id.
"</parentid>\n";
154 function printCreateTriggerAction($action_id, $action_type, $asset, $settings=Array())
157 echo
' <action_id>'.$action_id.
"</action_id>\n";
158 echo
' <action_type>'.$action_type.
"</action_type>\n";
159 echo
' <asset>[['.$asset.
"]]</asset>\n";
161 foreach ($settings as $setting => $value) {
162 echo
' <'.$setting.
'>'.$value.
'</'.$setting.
">\n";
178 function compactSpaces($string)
180 return compactCharacters($string,
' ');
194 function compactCharacters($string, $char)
197 while ($orig_string != $string) {
198 $orig_string = $string;
199 $string = str_replace($char.$char, $char, $string);
215 function getMetadataMapping($mapping_filename)
217 include_once $GLOBALS[
'SYSTEM_ROOT'].
'/fudge/general/xml_converter.inc';
219 $metadata_mapping = Array(
220 'metadata_schema_id' => 0,
221 'metadata_fields' => Array(),
222 'metadata_ignore_fields' => Array(),
223 'metadata_required_fields' => Array(),
224 'metadata_field_types' => Array(),
229 $xml_array = $xml->getArrayFromFile($mapping_filename);
232 $num_schemata = count($xml_array);
235 if ($num_schemata != 1) {
236 printStdErr(
"* A single metadata schema must be defined in the mapping file <schema id=\"[id]\" group_by_field=\"[field_id]\">...</schema>\n");
240 $schema = $xml_array[
'schema'];
241 if ($num_schemata != 1) {
242 printStdErr(
"* The mapping file must contain one root element named 'schema'\n");
246 $metadata_mapping[
'metadata_schema_id'] = (int)(trim($schema[0][
'@id']));
247 if ($metadata_mapping[
'metadata_schema_id'] == 0) {
248 printStdErr(
"* A metadata schema asset id must be specified in the 'id' attribute of the <schema> tag\n");
252 $metadata_mapping[
'metadata_schema_group_field'] = 0;
253 if (isset($schema[0][
'@group_by_field_id'])) {
254 $metadata_mapping[
'metadata_schema_group_field'] = (int)(trim($schema[0][
'@group_by_field_id']));
257 $metadata_mapping[
'metadata_schema_asset_field'] =
'';
258 if (isset($schema[0][
'@group_by_asset_field'])) {
259 $metadata_mapping[
'metadata_schema_asset_field'] = trim($schema[0][
'@group_by_asset_field']);
262 $metadata_mapping[
'metadata_schema_name_field'] = 0;
263 if (isset($schema[0][
'@name_field'])) {
264 $metadata_mapping[
'metadata_schema_name_field'] = trim($schema[0][
'@name_field']);
268 $num_levels = count($schema);
269 if ($num_levels != 1) {
270 printStdErr(
"* A single level of field definitions should be defined in the <schema> section in the form <field id=\"[id]\" alias=\"[alias]\" />...\n");
275 $schema = $schema[0];
278 $fields = $schema[
'field'];
280 $num_fields = count($fields);
283 if ($num_fields == 0) {
284 printStdErr(
"* Metadata fields must be defined in the <schema> section in the form <field id=\"[id]\" alias=\"[alias]\" />...\n");
288 $group_by_field_specified = ($metadata_mapping[
'metadata_schema_group_field'] > 0);
289 $group_by_field_found = FALSE;
291 $group_by_asset_specified = ($metadata_mapping[
'metadata_schema_asset_field'] !=
'');
292 if ($group_by_field_specified && $group_by_asset_specified) {
293 printStdErr(
"* Group by field and group by parent asset must be used separately\n");
297 $name_field_specified = ($metadata_mapping[
'metadata_schema_name_field'] !=
'');
298 $name_field_found = FALSE;
301 foreach ($fields as $field) {
302 $field_id = (int)trim($field[
'@id']);
303 $field_alias = trim($field[
'@alias']);
307 $ignore_field = ((isset($field[
'@ignore'])) ? ((
int)trim($field[
'@ignore']) == 1) : 0);
309 if ($field_id == 0) {
310 printStdErr(
"* A field id must be specified in the 'id' attribute of the <field /> tag\n");
315 if ($group_by_field_specified) {
316 if ($field_id == $metadata_mapping[
'metadata_schema_group_field']) {
317 $group_by_field_found = TRUE;
321 $metadata_mapping[
'metadata_fields'][$field_alias] = $field_id;
324 $metadata_mapping[
'metadata_ignore_fields'][$field_alias] = 1;
330 if ($group_by_field_specified && !$group_by_field_found) {
331 printStdErr(
"* The specified group_by_field ID was not defined as a <field> in the mapping file\n");
336 if (!$name_field_specified) {
337 printStdErr(
"* A name_field must be specified in the <schema> tag\n");
341 return $metadata_mapping;
354 function printStdErr($string)
356 fwrite(STDERR,
"$string\n");
369 function validateMetadataMapping(&$metadata_mapping)
372 printStdErr(
'- Initialising Matrix...');
375 $schema_asset = $GLOBALS[
'SQ_SYSTEM']->am->getAsset($metadata_mapping[
'metadata_schema_id']);
376 if (!$schema_asset) {
377 printStdErr(
'* The supplied schema (ID: '.$metadata_mapping[
'metadata_schema_id'].
") could not be found in the system\n");
380 if ($schema_asset->type() !=
'metadata_schema') {
381 printStdErr(
'* The supplied schema (ID: '.$metadata_mapping[
'metadata_schema_id'].
") is not a valid Metadata Schema in the system\n");
386 $mm = $GLOBALS[
'SQ_SYSTEM']->getMetadataManager();
387 $metadata_fields = $mm->getMetadataFields(Array($metadata_mapping[
'metadata_schema_id']));
390 $metadata_system_field_ids = array_keys($metadata_fields);
391 $metadata_mapping_field_ids = array_values($metadata_mapping[
'metadata_fields']);
393 $metadata_differences = array_diff($metadata_system_field_ids, $metadata_mapping_field_ids);
395 if (count($metadata_differences) > 0) {
396 printStdErr(
'* One or more system metadata fields were not specified in the mapping file');
397 printStdErr(
' The following metadata field IDs are required:');
398 foreach ($metadata_differences as $metadata_field_id) {
399 printStdErr(
' '.$metadata_field_id);
406 $metadata_mapping[
'metadata_field_types'] = $metadata_fields;
408 $am = $GLOBALS[
'SQ_SYSTEM']->am;
410 foreach ($metadata_fields as $metadata_field_id => $metadata_field_type) {
411 $metadata_field_type = $metadata_field_type[0][
'type_code'];
412 $field = $am->getAsset($metadata_field_id, $metadata_field_type);
413 $metadata_mapping[
'metadata_field_objects'][$metadata_field_id] = $field;
415 if ($field->attr(
'required')) {
416 if (isset($metadata_mapping[
'metadata_ignored_fields'][$metadata_field_id])) {
417 printStdErr(
'* The metadata field (ID '.$metadata_field_id.
') cannot be ignored upon import as it is mandatory');
421 $metadata_mapping[
'metadata_required_fields'][$metadata_field_id] = 1;
437 function validateMetadataField(&$metadata_field_object, $value)
439 $valid_value = FALSE;
441 $metadata_field_type = get_class($metadata_field_object);
442 if ($metadata_field_type ==
'Metadata_Field_Text') {
444 }
else if ($metadata_field_type ==
'Metadata_Field_Select') {
446 $selection = $metadata_field_object->getSelectionAttribute();
447 $valid_value = $selection->validateValue($value);
448 }
else if ($metadata_field_type ==
'Metadata_Field_Thesaurus') {
450 $asset_id = (int)($value);
451 if (($asset_id == $value) && ($asset_id > 0)) {
452 $thesaurus = $GLOBALS[
'SQ_SYSTEM']->am->getAsset($asset_id);
453 $asset_id = $thesaurus->id;
455 $valid_value = ($asset_id > 0);
456 }
else if ($metadata_field_type ==
'Metadata_Field_Date') {
457 include_once $GLOBALS[
'SYSTEM_ROOT'].
'/fudge/general/datetime.inc';
458 $valid_value = is_iso8601($value);
468 if ((php_sapi_name() !=
'cli')) {
469 trigger_error(
"You can only run this script from the command line\n", E_USER_ERROR);
473 $argv = $_SERVER[
'argv'];
474 $GLOBALS[
'SYSTEM_ROOT'] = (isset($argv[1])) ? $argv[1] :
'';
475 if (empty($GLOBALS[
'SYSTEM_ROOT'])) {
477 printStdErr(
"* The Matrix system root directory must be specified as the first parameter\n");
481 require_once $GLOBALS[
'SYSTEM_ROOT'].
'/core/include/init.inc';
484 $csv_filename = $argv[2];
485 if (empty($csv_filename)) {
487 printStdErr(
"* A CSV filename must be specified as the second parameter\n");
492 $mapping_filename = $argv[3];
493 if (empty($mapping_filename)) {
495 printStdErr(
"* A mapping filename must be specified as the third parameter\n");
500 $global_parent_id = $argv[4];
501 if (empty($global_parent_id)) {
503 printStdErr(
"* A parent ID must be specified as the fourth parameter\n");
508 $global_asset_type = $argv[5];
509 if (empty($global_asset_type)) {
511 printStdErr(
"* An asset type must be specified as the fifth parameter\n");
515 $export_unique_records_only = FALSE;
516 $ignore_blank_fields = FALSE;
518 if (isset($argv[6])) {
519 $export_unique_records_only = (strtolower($argv[6]) ==
'-unique');
520 $ignore_blank_fields = (strtolower($argv[6]) ==
'-ignore_blank');
523 if (isset($argv[7])) {
524 $export_unique_records_only = ($export_unique_records_only || (strtolower($argv[7]) ==
'-unique'));
525 $ignore_blank_fields = ($ignore_blank_fields || (strtolower($argv[7]) ==
'-ignore_blank'));
529 $csv_fd = fopen($csv_filename,
'r');
532 printStdErr(
"* The supplied CSV file was not found\n");
536 $mapping_fd = fopen($mapping_filename,
'r');
539 printStdErr(
"* The supplied mapping file was not found\n");
546 $metadata_mapping = getMetadataMapping($mapping_filename);
548 $group_by_field = $metadata_mapping[
'metadata_schema_group_field'];
549 $name_field = $metadata_mapping[
'metadata_schema_name_field'];
551 $is_header_line = TRUE;
554 $action_ids = Array();
556 validateMetadataMapping($metadata_mapping);
558 printStdErr(
"- Exporting XML...\n");
560 echo
"<?xml version=\"1.0\" encoding=\"iso-8859-1\" ?>\n";
563 $group_names = Array();
566 $group_by_field_name =
'';
567 $group_by_field_column = -1;
568 $group_by_asset_field_column = -1;
569 $name_field_column = -1;
571 if ($group_by_field > 0) {
572 foreach ($metadata_mapping[
'metadata_fields'] as $field_name => $field_id) {
573 if ($field_id == $group_by_field) {
574 $group_by_field_name = $field_name;
580 $num_folders_created = 0;
581 $num_assets_created = 0;
582 $num_fields_ignored = 0;
583 $num_fields_blank = 0;
584 $num_records_ignored = 0;
586 $imported_records = Array();
588 while (($data = fgetcsv($csv_fd, 1024,
',')) !== FALSE) {
589 $num_fields = count($data);
591 if ($num_fields >= 1) {
592 if ($is_header_line) {
596 if ($group_by_field_name !=
'') {
597 foreach ($headers as $key => $field_name) {
598 if ($field_name == $group_by_field_name) {
599 $group_by_field_column = $key;
606 if (isset($metadata_mapping[
'metadata_schema_asset_field'])) {
607 if ($metadata_mapping[
'metadata_schema_asset_field'] !=
'') {
608 foreach ($headers as $key => $field_name) {
609 if ($field_name == $metadata_mapping[
'metadata_schema_asset_field']) {
610 $group_by_asset_field_column = $key;
618 foreach ($headers as $key => $field_name) {
619 if ($field_name == $name_field) {
620 $name_field_column = $key;
625 if (!isset($metadata_mapping[
'metadata_fields'][$name_field])) {
626 $metadata_mapping[
'metadata_ignore_fields'][$name_field] = 1;
634 if ($name_field_column < 0) {
635 printStdErr(
'* The specified name field "'.$name_field.
"\" was not found in the CSV file\n");
639 $is_header_line = FALSE;
643 if (count($metadata_mapping[
'metadata_required_fields'])) {
644 $data_available = TRUE;
646 foreach ($metadata_mapping[
'metadata_required_fields'] as $required_field_id => $val) {
648 for ($n=0; $n<count($headers); $n++) {
649 $column_name = $headers[$n];
650 if (!isset($metadata_mapping[
'metadata_ignore_fields'][$column_name])) {
651 $metadata_field_id = $metadata_mapping[
'metadata_fields'][$column_name];
653 if ($metadata_field_id == $required_field_id) {
654 $value = trim(compactSpaces($data[$n]));
657 $data_available = FALSE;
664 if (!$data_available)
break;
669 if (!$data_available) {
670 $num_records_ignored++;
676 $name = trim($data[$name_field_column]);
677 $name = compactSpaces($name);
680 $action_id = strtolower($name);
681 $action_id = ereg_replace(
' ',
'_', $action_id);
682 $action_id = ereg_replace(
',',
'', $action_id);
686 if (isset($action_ids[$action_id])) {
688 while (isset($action_ids[$action_id.
'_'.$n])) {
694 if ($n > 0) $action_id = $action_id.
'_'.$n;
695 $action_ids[$action_id] = 1;
698 $group_by_folder_name =
'';
699 $folder_name_orig =
'';
701 if ($group_by_field_column >= 0) {
702 $value = trim($data[$group_by_field_column]);
703 $value = compactSpaces($value);
705 $folder_name_orig = $value;
706 $folder_name = ereg_replace(
' ',
'_', $value);
707 $folder_name = ereg_replace(
',',
' ', $folder_name);
709 $group_by_folder_name =
'create_folder_'.$folder_name;
712 if (!isset($group_names[$value])) {
713 $group_names[$value] = 1;
716 printStdErr(
'- Creating folder '.$value);
717 printCreateAssetAction(
'create_folder_'.$folder_name,
'create_asset',
'folder',
'1', $global_parent_id);
719 $num_folders_created++;
723 'attribute' =>
'name',
726 printCreateTriggerAction(
'set_folder_'.$folder_name.
'_name',
'set_attribute_value',
'output://create_folder_'.$folder_name.
'.assetid', $settings);
731 'path' => $folder_name,
733 printCreateTriggerAction(
'set_folder_'.$folder_name.
'_path',
'add_web_path',
'output://create_folder_'.$folder_name.
'.assetid', $settings);
737 $asset_parent_id = $global_parent_id;
738 if ($folder_name !=
'') {
739 $asset_parent_id =
'[[output://create_folder_'.$folder_name.
'.assetid]]';
743 $record_metadata = Array();
744 for ($n=0; $n<$num_fields; $n++) {
745 $column_name = trim($headers[$n]);
748 if (!isset($metadata_mapping[
'metadata_ignore_fields'][$column_name])) {
750 $value = trim(compactSpaces($data[$n]));
752 if ($ignore_blank_fields && ((compactCharacters($value,
'-') ==
'-') || ($value ==
''))) {
757 $metadata_field_id = $metadata_mapping[
'metadata_fields'][$column_name];
759 if (!isset($metadata_field_id)) {
760 printStdErr(
'* Metadata schema mapping is missing for the "'.$column_name.
"\" field. Cannot continue\n");
764 $record_metadata[$metadata_field_id] = $value;
769 if ($export_unique_records_only) {
770 $record_serialised = serialize($record_metadata);
772 if (isset($imported_records[$record_serialised])) {
774 printStdErr(
'--- Ignoring duplicate asset '.$name.(($folder_name_orig !=
'') ? (
' in folder "'.$folder_name_orig).
'"' :
''));
775 $num_records_ignored++;
778 $imported_records[$record_serialised] = 1;
783 if ($group_by_asset_field_column >= 0) {
784 $asset_parent_id = (int)$data[$group_by_asset_field_column];
785 printStdErr(
'-- Targeting asset '.$asset_parent_id);
788 printStdErr(
'-- Creating asset '.$name.(($folder_name_orig !=
'') ? (
' in folder "'.$folder_name_orig).
'"' :
''));
789 printCreateAssetAction($action_id,
'create_asset', $global_asset_type,
'1', $asset_parent_id);
791 $num_assets_created++;
795 'attribute' =>
'name',
798 printCreateTriggerAction(
'set_'.$action_id.
'_name',
'set_attribute_value',
'output://'.$action_id.
'.assetid', $settings);
802 'path' => $action_id,
804 printCreateTriggerAction(
'set_'.$action_id.
'_path',
'add_web_path',
'output://'.$action_id.
'.assetid', $settings);
808 'schemaid' => $metadata_mapping[
'metadata_schema_id'],
811 printCreateTriggerAction(
'set_'.$action_id.
'_metadata_schema',
'set_metadata_schema',
'output://'.$action_id.
'.assetid', $settings);
814 foreach ($record_metadata as $metadata_field_id => $value) {
815 $metadata_value_valid = validateMetadataField($metadata_mapping[
'metadata_field_objects'][$metadata_field_id], $value);
817 if ($metadata_value_valid) {
819 'fieldid' => $metadata_field_id,
822 printCreateTriggerAction(
'set_'.$action_id.
'_metadata_value_'.$metadata_field_id,
'set_metadata_value',
'output://'.$action_id.
'.assetid', $settings);
824 printStdErr(
"\n* The value (".$value.
') set for field '.$metadata_field_id.
' is invalid for a '.get_class($metadata_mapping[
'metadata_field_objects'][$metadata_field_id]).
" field\n");
825 printStdErr(
'User Options ------------');
826 printStdErr(
'(I)gnore this field and continue exporting subsequent records');
827 printStdErr(
'(C)ancel export');
829 $valid_choice = FALSE;
830 while (!$valid_choice) {
831 $user_choice = rtrim(strtolower(fgets(STDIN, 3)));
832 $valid_choice = (($user_choice ==
'i') || ($user_choice ==
'c'));
835 if ($user_choice ==
'c') {
836 printStdErr(
"\n- Export cancelled by user");
840 $num_fields_ignored++;
855 printStdErr(
"\n- All done, stats below:");
856 printStdErr(
'Folders created: '.$num_folders_created);
857 printStdErr(
'Assets created : '.$num_assets_created);
858 printStdErr(
'Records ignored: '.$num_records_ignored);
859 printStdErr(
'Fields ignored (by user) : '.$num_fields_ignored);
860 printStdErr(
'Fields ignored (blank) : '.$num_fields_blank.
"\n");