17 require_once SQ_SYSTEM_ROOT.
'/core/hipo/hipo_job.inc';
18 require_once SQ_FUDGE_PATH.
'/general/file_system.inc';
45 $this->uses_trans = FALSE;
59 return 'HIPO_Job_Structured_File_Import-'.$this->_running_vars[
'root_assetid'].
'-'.$this->_running_vars[
'import_file'];
72 return 'Hipo Job Large Document Import';
88 'name' =>
'Tidying Document',
89 'function_call' => Array(
90 'process_function' =>
'processTidying',
92 'running_mode' =>
'server',
97 'allow_cancel' => TRUE,
100 'name' =>
'Processing Headings',
101 'function_call' => Array(
102 'process_function' =>
'processHeadings',
104 'running_mode' =>
'server',
109 'allow_cancel' => TRUE,
112 'name' =>
'Processing Images',
113 'function_call' => Array(
114 'process_function' =>
'processImages',
116 'running_mode' =>
'server',
121 'allow_cancel' => TRUE,
124 'name' =>
'Scanning Document',
125 'function_call' => Array(
126 'process_function' =>
'processScanning',
128 'running_mode' =>
'server',
133 'allow_cancel' => TRUE,
136 'name' =>
'Splitting Document',
137 'function_call' => Array(
138 'process_function' =>
'processSplitting',
140 'running_mode' =>
'server',
145 'allow_cancel' => TRUE,
148 'name' =>
'Performing Additional Operations',
149 'function_call' => Array(
150 'process_function' =>
'processAdditional',
152 'running_mode' =>
'server',
157 'allow_cancel' => TRUE,
177 while (count($this->_running_vars[
'headings_to_process']) != 0) {
183 while ($this->_running_vars[
'image_count'] != count($this->_running_vars[
'image_names'])) {
193 }
while ($this->_running_vars[
'running_count'] != 0);
195 while ($this->_running_vars[
'top_level_count'] != $this->_running_vars[
'running_count']) {
221 $this->_running_vars[
'styles'] = Array();
222 $file_data = file_get_contents($this->_running_vars[
'import_file']);
227 $style_matches = Array();
228 preg_match_all(
'/<style[^>]*>([^<]*)<\/\s*style>/i', $file_data, $style_matches);
230 foreach ($style_matches[1] as $style_data) {
231 $this->_running_vars[
'styles'][] = $style_data;
234 $title_matches = Array();
235 preg_match(
'/<title[^>]*>([^<]*)<\/\s*title>/i', $file_data, $title_matches);
236 if (!empty($title_matches)) {
237 $this->_running_vars[
'site_title'] = $title_matches[1];
239 $this->_running_vars[
'site_title'] =
'';
244 $file_data = str_replace($style_matches[0],
'', $file_data);
247 $file_data = str_replace(
"\r",
"\n", $file_data);
250 $file_data = str_replace(chr(145),
'\'', $file_data);
251 $file_data = str_replace(chr(146),
'\'', $file_data);
252 $file_data = str_replace(chr(147),
'"', $file_data);
253 $file_data = str_replace(chr(148),
'"', $file_data);
254 $file_data = str_replace(chr(149),
'-', $file_data);
255 $file_data = str_replace(chr(150),
'-', $file_data);
257 $file_data = str_replace(chr(92),
'\'', $file_data);
258 $file_data = str_replace(chr(97),
'a', $file_data);
259 $file_data = str_replace(chr(160),
' ', $file_data);
261 $body_start_match = Array();
262 $body_end_match = Array();
263 preg_match(
'/<body[^>]*>/i', $file_data, $body_start_match, PREG_OFFSET_CAPTURE);
264 preg_match(
'/<\/\s*body\s*>/i', $file_data, $body_end_match, PREG_OFFSET_CAPTURE);
266 if (empty($body_start_match)) {
269 $body_start = $body_start_match[0][1];
272 if (empty($body_end_match)) {
273 $body_end = strlen($file_data);
275 $body_end = $body_end_match[0][1];
277 $body = substr($file_data, $body_start, $body_end - $body_start);
279 if (!empty($body_start_match)) {
280 $body = str_replace($body_start_match[0][0],
'', $body);
284 if ($this->_running_vars[
'remove_word']) {
286 $body = preg_replace(
'/class=([a-z]+[a-z\d]*)/i',
'class="\\1"', $body);
287 $body = preg_replace(
'/lang=([a-z]+[a-z\d-]*)/i',
'lang="\\1"', $body);
288 $body = preg_replace(
'/align=[a-z]+\s/i',
' ', $body);
289 $body = preg_replace(
'/<[\/]?st1:[^>]+>/i',
' ', $body);
292 $body = preg_replace(
'/<(\/)?v:shapetype[^>]*>/',
'', $body);
293 $body = preg_replace(
'/<(\/)?v:shape[^>]*>/',
'', $body);
294 $body = preg_replace(
'/<v:stroke[^>]*>/',
'', $body);
295 $body = preg_replace(
'/<(\/)?v:formulas[^>]*>/',
'', $body);
296 $body = preg_replace(
'/<v:f[^>]*>/',
'', $body);
297 $body = preg_replace(
'/<v:path[^>]*>/',
'', $body);
298 $body = preg_replace(
'/<v:rect[^>]*>/',
'', $body);
299 $body = preg_replace(
'/<v:line[^>]*>/',
'', $body);
301 $body = preg_replace(
'/<(\/)?o:p[^>]*>/',
'', $body);
302 $body = preg_replace(
'/<!\[if ![a-zA-Z !&]+\]>/',
'', $body);
303 $body = str_replace(
'<![endif]>',
'', $body);
307 if ($this->_running_vars[
'run_tidy']) {
309 if (is_writable($this->_running_vars[
'import_dir'])) {
310 $tmp_fname = $this->_running_vars[
'import_file'].
'_tmp';
311 if (($fh = fopen($tmp_fname,
'w')) !== FALSE) {
312 if (fwrite($fh, $body) !== FALSE) {
319 if (file_exists(SQ_TOOL_HTML_TIDY_PATH)) {
324 'output-xhtml' => TRUE,
325 'preserve-entities' => TRUE,
326 'show-body-only' => TRUE,
329 'show-warnings' => FALSE,
331 'force-output' => TRUE,
332 'quote-marks' => TRUE,
335 $tidy->parseFile($tmp_fname, $config);
336 $tidy->cleanRepair();
347 $this->_running_vars[
'headings_to_process'] = Array();
349 foreach ($this->_running_vars[
'headings'] as $heading) {
350 $tidy_matches = Array();
351 preg_match_all(
'/<\s*'.$heading.
'/i', $body, $tidy_matches, PREG_OFFSET_CAPTURE);
352 foreach ($tidy_matches[0] as $data) {
353 $this->_running_vars[
'headings_to_process'][] = Array(
'heading' => $heading,
'data' => $data);
358 $this->_running_vars[
'file_data'] = $body;
359 $this->_running_vars[
'started_scanning'] = FALSE;
362 if ($this->_running_vars[
'create_new_site'] == TRUE) {
363 $GLOBALS[
'SQ_SYSTEM']->am->includeAsset(
'site');
364 $root_folder = $GLOBALS[
'SQ_SYSTEM']->am->getSystemAsset(
'root_folder');
365 if (!empty($this->_running_vars[
'root_assetid'])) {
366 $root_link_parent = $GLOBALS[
'SQ_SYSTEM']->am->getAsset($this->_running_vars[
'root_assetid']);
368 $root_link_parent =& $root_folder;
370 $root_link = Array(
'asset' => &$root_link_parent,
'link_type' => SQ_LINK_TYPE_1);
372 $new_site =
new Site();
374 $new_site_name = $this->_running_vars[
'new_site_name'];
376 if (trim($new_site_name) ==
'') {
377 if ($this->_running_vars[
'site_title'] ==
'') {
378 $file_name = basename($this->_running_vars[
'import_file']);
379 $strrpos_file_name = strrpos($file_name,
'.');
380 $file_name = substr($file_name, 0, $strrpos_file_name);
381 $replaced_file_name = str_replace(
'_',
' ', $file_name);
382 $new_site_name = ucwords($replaced_file_name);
385 $new_site_name = $this->_running_vars[
'site_title'];
389 $new_site->setAttrValue(
'name', $new_site_name);
391 $new_site->create($root_link);
393 $this->_running_vars[
'root_assetid'] = $new_site->id;
394 $this->_running_vars[
'create_new_site'] = FALSE;
397 $this->_running_vars[
'processed_images'] = FALSE;
398 $this->_running_vars[
'image_folder_id'] =
'0';
399 $this->_running_vars[
'image_count'] = 0;
400 $this->_running_vars[
'image_names'] = Array();
401 $step_data[
'complete'] = TRUE;
402 $step_data[
'percent_done'] = 100;
420 if (!isset($this->_running_vars[
'initial_heading_count'])) {
421 $this->_running_vars[
'initial_heading_count'] = count($this->_running_vars[
'headings_to_process']);
425 foreach ($this->_running_vars[
'headings_to_process'] as $id => $top_data) {
426 $data = $top_data[
'data'];
427 $heading = $top_data[
'heading'];
428 $strpos_file_data = (strpos($this->_running_vars[
'file_data'],
'</'.$heading.
'>', $data[1]) - $data[1]);
429 $replace = substr($this->_running_vars[
'file_data'], $data[1], $strpos_file_data);
430 $replace_array = Array(
"\n",
"\r");
431 $new_text = str_replace($replace_array,
' ', $replace);
432 $this->_running_vars[
'file_data'] = str_replace($replace, $new_text, $this->_running_vars[
'file_data']);
433 unset($this->_running_vars[
'headings_to_process'][$id]);
435 if ($i == 100)
break;
437 if (count($this->_running_vars[
'headings_to_process']) != 0) {
438 $step_data[
'percent_done'] = floor((count($this->_running_vars[
'headings_to_process'])/$this->_running_vars[
'initial_heading_count']) * 100);
439 $step_data[
'complete'] = FALSE;
441 $step_data[
'percent_done'] = 100;
442 $step_data[
'complete'] = TRUE;
461 $image_dir_name = preg_replace(
'/\.[a-zA-Z\(\) ]+/i',
'_files', $this->_running_vars[
'import_file']);
463 $body_data = $this->_running_vars[
'file_data'];
465 $image_names = Array();
467 if ($this->_running_vars[
'processed_images'] == FALSE && is_dir($image_dir_name)) {
469 $image_dir = opendir($image_dir_name);
471 while (FALSE !== ($filename = readdir($image_dir))) {
472 switch (get_file_type($filename)) {
477 $image_names[] = $image_dir_name.
'/'.$filename;
482 $this->_running_vars[
'image_names'] = $image_names;
485 if (empty($this->_running_vars[
'image_names'])) {
486 $step_data[
'complete'] = TRUE;
490 if ($this->_running_vars[
'image_folder_id'] ==
'0') {
492 $GLOBALS[
'SQ_SYSTEM']->am->includeAsset(
'folder');
494 $image_folder =
new Folder();
496 $image_folder->setAttrValue(
'name',
'Images');
498 $root_asset = $GLOBALS[
'SQ_SYSTEM']->am->getAsset($this->_running_vars[
'root_assetid']);
499 $folder_import_link = Array(
'asset' => &$root_asset,
'link_type' => SQ_LINK_TYPE_2);
501 $image_folder->create($folder_import_link);
502 $this->_running_vars[
'image_folder_id'] = $image_folder->id;
504 $image_folder = $GLOBALS[
'SQ_SYSTEM']->am->getAsset($this->_running_vars[
'image_folder_id']);
507 $filename = $this->_running_vars[
'image_names'][$this->_running_vars[
'image_count']];
509 $GLOBALS[
'SQ_SYSTEM']->am->includeAsset(
'image');
511 $import_link = Array(
'asset' => &$image_folder,
'link_type' => SQ_LINK_TYPE_1);
513 $temp_info = Array(
'name' => basename($filename),
'tmp_name' => $filename,
'non_uploaded_file' => TRUE);
515 $new_file =
new Image();
516 $new_file->_tmp[
'uploading_file'] = TRUE;
517 $new_file->setAttrValue(
'name', basename($filename));
519 if (!$new_file->create($import_link, $temp_info)) {
520 trigger_error(
'Failed to import Image: '.$filename, E_USER_WARNING);
523 $strrpost_image_dir_name = strrpos($image_dir_name,
'/') + 1;
524 $find = substr($image_dir_name, $strrpost_image_dir_name);
525 $test_reg =
'/[\.\/]?'.$find.
'\/'.str_replace(
'.',
'\.', basename($filename)).
'/i';
526 $test_reg = str_replace(
'(',
'\(', $test_reg);
527 $test_reg = str_replace(
')',
'\)', $test_reg);
528 $replace_asset =
'./?a='.$new_file->id;
529 preg_replace($test_reg, $replace_asset, $this->_running_vars[
'file_data']);
531 $body_data = preg_replace($test_reg, $replace_asset, $body_data);
536 preg_match_all(
'/<v:imagedata\s+src=["\']{1}([^>]+)["\']{1}>/i', $body_data, $result);
537 if (!empty($result)) {
538 for ($i=0; $i < count($result[1]); $i++) {
539 $str = trim($result[1][$i]);
541 $body_data = str_replace($result[0][$i],
'<img src="'.$result[1][$i].
'" />', $body_data);
548 $this->_running_vars[
'file_data'] = $body_data;
550 $this->_running_vars[
'image_count']++;
552 $step_data[
'percent_done'] = round($this->_running_vars[
'image_count'] / count($this->_running_vars[
'image_names']) * 100);
554 $step_data[
'complete'] = $this->_running_vars[
'image_count'] == count($this->_running_vars[
'image_names']) ? TRUE : FALSE;
572 if ($this->_running_vars[
'fix_anchors']) {
574 $this->_running_vars[
'anchor_links'] = Array();
575 $this->_running_vars[
'anchor_names'] = Array();
577 if (!$this->_running_vars[
'use_headings']) {
579 $initial_data = Array();
580 $single_page_name = $this->_running_vars[
'site_title'];
581 if ($single_page_name ==
'') {
582 $file_name = basename($this->_running_vars[
'import_file']);
583 $strrpos_file_name = strrpos($file_name,
'.');
584 $file_name = substr($file_name, 0, $strrpos_file_name);
585 $str_replace_file_name = str_replace(
'_',
' ', $file_name);
586 $single_page_name = ucwords($str_replace_file_name);
588 $initial_data[] = Array (
589 'name' => $single_page_name,
591 'children' => Array(),
592 'data' => $this->_running_vars[
'file_data'],
594 $this->_running_vars[
'data_structure'] = $initial_data;
595 $step_data[
'complete'] = TRUE;
596 $step_data[
'percent_done'] = 100;
597 $this->_running_vars[
'running_count'] = 0;
598 $this->_running_vars[
'top_level_count'] = 1;
604 $file_data = $this->_running_vars[
'file_data'];
605 if ($this->_running_vars[
'started_scanning'] == FALSE) {
608 $this->_running_vars[
'running_count'] = 0;
609 $heading = current($this->_running_vars[
'headings']);
610 if (!empty($this->_running_vars[
'headings']) && $heading === FALSE) {
611 reset($this->_running_vars[
'headings']);
612 $heading = current($this->_running_vars[
'headings']);
614 $e =
'/(<\s*'.$heading.
'[^>]*>.*<\/\s*'.$heading.
'>)/i';
615 $matches = preg_split($e, $file_data, -1, PREG_SPLIT_DELIM_CAPTURE);
617 $this->_running_vars[
'started_scanning'] = TRUE;
618 $this->_running_vars[
'top_level_count'] = count($matches);
620 $initial_data = Array();
621 $leading_update = FALSE;
623 if ($this->_running_vars[
'leading_create'] == TRUE) {
625 if ((count($matches) % 2) == 1) {
626 $initial_data[] = Array (
627 'name' => $this->_running_vars[
'leading_text'],
629 'children' => Array(),
630 'data' => $matches[0],
632 $leading_update = TRUE;
635 if ((count($matches) % 2) == 1) $counter++;
637 for ($i=$counter; $i < count($matches); $i+=2) {
638 $initial_data[] = Array (
640 'title' => $matches[$i],
641 'children' => Array(),
642 'data' => $matches[$i + 1],
646 $this->_running_vars[
'data_structure'] = $initial_data;
647 $this->_running_vars[
'top_level_count'] = count($initial_data);
650 $data = $this->_running_vars[
'data_structure'][$this->_running_vars[
'running_count']];
652 $this->_running_vars[
'data_structure'][$this->_running_vars[
'running_count']][
'children'] = $this->
scanChildren($this->_running_vars[
'headings'], $this->_running_vars[
'data_structure'][$this->_running_vars[
'running_count']][
'data']);
654 $this->_running_vars[
'data_structure'][$this->_running_vars[
'running_count']][
'data'] = $this->_running_vars[
'data_structure'][$this->_running_vars[
'running_count']][
'children'][
'parent_data'];
655 unset($this->_running_vars[
'data_structure'][$this->_running_vars[
'running_count']][
'children'][
'parent_data']);
657 $this->_running_vars[
'running_count']++;
659 if ($this->_running_vars[
'running_count'] >= $this->_running_vars[
'top_level_count']) {
660 $step_data[
'complete'] = TRUE;
661 $this->_running_vars[
'running_count'] = 0;
663 $step_data[
'message'] =
'Scanning Page: '.$data[
'name'].
' and it\'s children';
664 $step_data[
'percent_done'] = ($this->_running_vars[
'top_level_count'] == 0) ? 100 : round($this->_running_vars[
'running_count'] / $this->_running_vars[
'top_level_count'] * 100);
682 $headings = array_slice($headings, 1);
683 if (empty($headings)) {
685 'parent_data' => $data,
689 $heading = current($headings);
691 $e =
'/(<\s*'.$heading.
'[^>]*>.*<\/\s*'.$heading.
'>)/i';
692 $matches = preg_split($e, $data, -1, PREG_SPLIT_DELIM_CAPTURE);
694 $match_data = Array();
697 if ((count($matches) % 2) == 1) {
698 $match_data[
'parent_data'] = $matches[0];
702 for ($i = $counter; $i < count($matches); $i += 2) {
706 'title' => $matches[$i],
707 'data' => $matches[$i + 1],
710 $new_match[
'children'] = $this->
scanChildren($headings, $matches[$i + 1]);
711 $new_match[
'data'] = $new_match[
'children'][
'parent_data'];
712 unset($new_match[
'children'][
'parent_data']);
713 $match_data[] = $new_match;
731 $GLOBALS[
'SQ_SYSTEM']->am->includeAsset(
'page_standard');
733 $data = $this->_running_vars[
'data_structure'][$this->_running_vars[
'running_count']];
739 $this->_running_vars[
'running_count']++;
741 $percent_done = $this->_running_vars[
'top_level_count'] == 0 ? 100 : round($this->_running_vars[
'running_count'] / $this->_running_vars[
'top_level_count'] * 100);
743 $step_data[
'percent_done'] = $percent_done;
745 $step_data[
'complete'] = $this->_running_vars[
'running_count'] == $this->_running_vars[
'top_level_count'];
746 $step_data[
'message'] =
'Splitting Page: '.$data[
'name'].
' and it\'s children';
747 if ($this->_running_vars[
'top_level_count'] == 0) {
748 $step_data[
'complete'] = TRUE;
769 if (empty($page_data))
return TRUE;
772 if (is_null($parent_asset)) {
773 $parent_asset = $GLOBALS[
'SQ_SYSTEM']->am->getAsset($this->_running_vars[
'root_assetid']);
776 $import_link = Array(
'asset' => &$parent_asset,
'link_type' => SQ_LINK_TYPE_1);
779 if (trim($page_data[
'name']) ==
'') {
780 $page_data[
'name'] =
'Preface';
783 $stripped_tag_name = strip_tags($page_data[
'name']);
784 $trimmed_tag_name = trim($stripped_tag_name);
785 $new_page->setAttrValue(
'name', $trimmed_tag_name);
786 $new_page->create($import_link);
788 if ($this->_running_vars[
'fix_anchors']) {
790 $anchor_record = Array();
791 $anchor_tags = Array();
792 preg_match_all(
'/<a[^>]*>/i', $page_data[
'data'], $anchor_tags);
793 foreach ($anchor_tags[0] as $tag) {
794 $name_matches = Array();
795 preg_match(
'/name="([_a-z\d]+)"/i', $tag, $name_matches);
797 if (!empty($name_matches)) {
798 $this->_running_vars[
'anchor_names'][$name_matches[1]] = $new_page->id;
801 $link_matches = Array();
802 preg_match(
'/href="#([_a-z\d]+)"/i', $tag, $link_matches);
804 if (!empty($link_matches)) {
805 $anchor_record[] = $link_matches[1];
809 if (!empty($anchor_record)) {
810 $this->_running_vars[
'anchor_links'][$new_page->id] = $anchor_record;
817 foreach ($page_data[
'children'] as $child_data) {
818 $this->
splitData($child_data, $new_page);
837 $bc = $page->getBodycopy();
838 $containers = $bc->getContainers();
839 $div = current($containers);
841 $content_types = $GLOBALS[
'SQ_SYSTEM']->am->getLinks($div->id, SQ_LINK_TYPE_2,
'content_type', FALSE);
842 $ct_link = current($content_types);
844 $ct = $GLOBALS[
'SQ_SYSTEM']->am->getAsset($ct_link[
'minorid']);
845 $GLOBALS[
'SQ_SYSTEM']->setRunLevel(SQ_RUN_LEVEL_FORCED);
846 $ct->setAttrValue(
'html', $content);
847 $ct->saveAttributes();
848 $GLOBALS[
'SQ_SYSTEM']->restoreRunLevel();
865 $bc = $page->getBodycopy();
866 $containers = $bc->getContainers();
867 $div = current($containers);
869 $content_types = $GLOBALS[
'SQ_SYSTEM']->am->getLinks($div->id, SQ_LINK_TYPE_2,
'content_type', FALSE);
870 $ct_link = current($content_types);
872 $ct = $GLOBALS[
'SQ_SYSTEM']->am->getAsset($ct_link[
'minorid']);
873 return $ct->attr(
'html');
890 $parent_asset = $GLOBALS[
'SQ_SYSTEM']->am->getAsset($this->_running_vars[
'root_assetid']);
892 $import_link = Array(
'asset' => &$parent_asset,
'link_type' => SQ_LINK_TYPE_2);
895 if ($this->_running_vars[
'create_css']) {
896 $styles = $this->_running_vars[
'styles'];
897 $style_output = implode(
"\n", $styles);
898 $css_file = fopen($this->_running_vars[
'import_dir'].
'/css_file.css',
'w');
899 fputs($css_file, $style_output);
903 $import_path = $this->_running_vars[
'import_dir'].
'/css_file.css';
904 $filename =
'css_file.css';
906 $temp_info = Array(
'name' => $filename,
'tmp_name' => $import_path,
'non_uploaded_file' => TRUE);
908 $GLOBALS[
'SQ_SYSTEM']->am->includeAsset(
'text_file');
911 $new_file->_tmp[
'uploading_file'] = TRUE;
912 $new_file->setAttrValue(
'name', $filename);
914 if (!$new_file->create($import_link, $temp_info)) {
915 trigger_error(
'Failed to import CSS File '.$filename, E_USER_WARNING);
920 if ($this->_running_vars[
'fix_anchors']) {
921 foreach ($this->_running_vars[
'anchor_links'] as $id => $links) {
922 $page = $GLOBALS[
'SQ_SYSTEM']->am->getAsset($id);
924 foreach ($links as $link) {
925 $new_asset =
'./?a=';
926 if (isset($this->_running_vars[
'anchor_names']) && isset($this->_running_vars[
'anchor_names'][$link])) {
927 $new_asset .= $this->_running_vars[
'anchor_names'][$link];
931 $html = str_replace(
'#'.$link, $new_asset.
'#'.$link, $html);
940 if ($this->_running_vars[
'create_site_map']) {
941 $GLOBALS[
'SQ_SYSTEM']->am->includeAsset(
'page_site_map');
944 $import_link[
'link_type'] = SQ_LINK_TYPE_1;
945 $site_map->setAttrValue(
'name',
'Table Of Contents');
946 if (!$site_map->create($import_link)) {
947 trigger_error(
'Failed to Create Site Map', E_USER_WARNING);
949 $exclude_list = Array();
950 $exclude_list[$site_map->id] = $site_map->id;
951 $site_map->setAttrValue(
'exclude_list', $exclude_list);
955 $step_data[
'complete'] = TRUE;
956 $step_data[
'percent_done'] = 100;
973 $open_matches = Array();
974 $close_matches = Array();
975 preg_match_all(
'/<!--/', $html, $open_matches, PREG_OFFSET_CAPTURE);
976 preg_match_all(
'/-->/', $html, $close_matches, PREG_OFFSET_CAPTURE);
980 if (count($open_matches[0]) == count($close_matches[0])) {
981 $style_open_matches = Array();
982 $style_close_matches = Array();
983 preg_match_all(
'/<style>/', $html, $style_open_matches, PREG_OFFSET_CAPTURE);
984 preg_match_all(
'/<\/style>/', $html, $style_close_matches, PREG_OFFSET_CAPTURE);
985 $style_pos = Array();
986 if (count($style_open_matches[0]) == count($style_close_matches[0])) {
988 for ($i = 0; $i < count($style_open_matches[0]); $i++) {
989 $style_pos[] = Array(
'o' => (
int) $style_open_matches[0][$i][1],
'c' => (
int) $style_close_matches[0][$i][1]);
993 for ($i = 0; $i < count($open_matches[0]); $i++) {
994 $open_pos = (int) $open_matches[0][$i][1];
995 $close_pos = (int) $close_matches[0][$i][1];
997 if (!$prev_close_pos) {
998 if ($open_pos > $close_pos) $verified = FALSE;
1000 if (($open_pos > $close_pos) || ($prev_close_pos > $open_pos)) {
1004 $prev_close_pos = $close_pos;
1007 $open_matches = array_reverse($open_matches);
1008 $close_matches = array_reverse($close_matches);
1009 for ($i = 0; $i < count($open_matches[0]); $i++) {
1010 $open_pos = (int) $open_matches[0][$i][1];
1011 $close_pos = (int) $close_matches[0][$i][1];
1013 foreach ($style_pos as $style_tag) {
1014 if (($style_tag[
'o'] < $open_pos) && ($style_tag[
'c'] > $close_pos)) {
1019 $str = substr($html, $open_pos, ($close_pos + 3 - $open_pos));
1026 foreach ($comments as $comment) {
1027 if (strpos($html, $comment) !== FALSE) {
1028 $html = str_replace($comment,
'', $html);
1045 $heading = strip_tags($heading);
1046 $heading = preg_replace(
'/( ){2,}/',
' ', $heading);
1047 $heading = str_replace(
' ',
' ', $heading);
1048 $heading = preg_replace(
'/ {2,}/',
' ', $heading);