3 * GeSHi - Generic Syntax Highlighter
5 * The GeSHi class for Generic Syntax Highlighting. Please refer to the
6 * documentation at http://qbnz.com/highlighter/documentation.php for more
7 * information about how to use this class.
9 * For changes, release notes, TODOs etc, see the relevant files in the docs/
12 * This file is part of GeSHi.
14 * GeSHi is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
19 * GeSHi is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with GeSHi; if not, write to the Free Software
26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
31 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
32 * @license http://gnu.org/copyleft/gpl.html GNU GPL
38 // You should use these constant names in your programs instead of
39 // their values - you never know when a value may change in a future
43 /** The version of this GeSHi file */
44 define('GESHI_VERSION', '1.0.8.3');
46 // Define the root directory for the GeSHi code tree
47 if (!defined('GESHI_ROOT')) {
48 /** The root directory for GeSHi */
49 define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
51 /** The language file directory for GeSHi
53 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
55 // Define if GeSHi should be paranoid about security
56 if (!defined('GESHI_SECURITY_PARANOID')) {
57 /** Tells GeSHi to be paranoid about security settings */
58 define('GESHI_SECURITY_PARANOID', false);
61 // Line numbers - use with enable_line_numbers()
62 /** Use no line numbers when building the result */
63 define('GESHI_NO_LINE_NUMBERS', 0);
64 /** Use normal line numbers when building the result */
65 define('GESHI_NORMAL_LINE_NUMBERS', 1);
66 /** Use fancy line numbers when building the result */
67 define('GESHI_FANCY_LINE_NUMBERS', 2);
69 // Container HTML type
70 /** Use nothing to surround the source */
71 define('GESHI_HEADER_NONE', 0);
72 /** Use a "div" to surround the source */
73 define('GESHI_HEADER_DIV', 1);
74 /** Use a "pre" to surround the source */
75 define('GESHI_HEADER_PRE', 2);
76 /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
77 define('GESHI_HEADER_PRE_VALID', 3);
79 * Use a "table" to surround the source:
82 * <thead><tr><td colspan="2">$header</td></tr></thead>
83 * <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
84 * <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
87 * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
88 * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
89 * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
91 define('GESHI_HEADER_PRE_TABLE', 4);
93 // Capatalisation constants
94 /** Lowercase keywords found */
95 define('GESHI_CAPS_NO_CHANGE', 0);
96 /** Uppercase keywords found */
97 define('GESHI_CAPS_UPPER', 1);
98 /** Leave keywords found as the case that they are */
99 define('GESHI_CAPS_LOWER', 2);
101 // Link style constants
102 /** Links in the source in the :link state */
103 define('GESHI_LINK', 0);
104 /** Links in the source in the :hover state */
105 define('GESHI_HOVER', 1);
106 /** Links in the source in the :active state */
107 define('GESHI_ACTIVE', 2);
108 /** Links in the source in the :visited state */
109 define('GESHI_VISITED', 3);
111 // Important string starter/finisher
112 // Note that if you change these, they should be as-is: i.e., don't
113 // write them as if they had been run through htmlentities()
114 /** The starter for important parts of the source */
115 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
116 /** The ender for important parts of the source */
117 define('GESHI_END_IMPORTANT', '<END GeSHi>');
122 // When strict mode applies for a language
123 /** Strict mode never applies (this is the most common) */
124 define('GESHI_NEVER', 0);
125 /** Strict mode *might* apply, and can be enabled or
126 disabled by {@link GeSHi->enable_strict_mode()} */
127 define('GESHI_MAYBE', 1);
128 /** Strict mode always applies */
129 define('GESHI_ALWAYS', 2);
131 // Advanced regexp handling constants, used in language files
132 /** The key of the regex array defining what to search for */
133 define('GESHI_SEARCH', 0);
134 /** The key of the regex array defining what bracket group in a
135 matched search to use as a replacement */
136 define('GESHI_REPLACE', 1);
137 /** The key of the regex array defining any modifiers to the regular expression */
138 define('GESHI_MODIFIERS', 2);
139 /** The key of the regex array defining what bracket group in a
140 matched search to put before the replacement */
141 define('GESHI_BEFORE', 3);
142 /** The key of the regex array defining what bracket group in a
143 matched search to put after the replacement */
144 define('GESHI_AFTER', 4);
145 /** The key of the regex array defining a custom keyword to use
146 for this regexp's html tag class */
147 define('GESHI_CLASS', 5);
149 /** Used in language files to mark comments */
150 define('GESHI_COMMENTS', 0);
152 /** Used to work around missing PHP features **/
153 define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1));
155 /** make sure we can call stripos **/
156 if (!function_exists('stripos')) {
157 // the offset param of preg_match is not supported below PHP 4.3.3
158 if (GESHI_PHP_PRE_433) {
162 function stripos($haystack, $needle, $offset = null) {
163 if (!is_null($offset)) {
164 $haystack = substr($haystack, $offset);
166 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) {
176 function stripos($haystack, $needle, $offset = null) {
177 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) {
185 /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
186 regular expressions. Set this to false if your PCRE lib is up to date
187 @see GeSHi->optimize_regexp_list()
189 define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
190 /** it's also important not to generate too long regular expressions
191 be generous here... but keep in mind, that when reaching this limit we
192 still have to close open patterns. 12k should do just fine on a 16k limit.
193 @see GeSHi->optimize_regexp_list()
195 define('GESHI_MAX_PCRE_LENGTH', 12288);
197 //Number format specification
198 /** Basic number format for integers */
199 define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+
200 /** Enhanced number format for integers like seen in C */
201 define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]?
202 /** Number format to highlight binary numbers with a suffix "b" */
203 define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB]
204 /** Number format to highlight binary numbers with a prefix % */
205 define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+
206 /** Number format to highlight binary numbers with a prefix 0b (C) */
207 define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+
208 /** Number format to highlight octal numbers with a leading zero */
209 define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+
210 /** Number format to highlight octal numbers with a suffix of o */
211 define('GESHI_NUMBER_OCT_SUFFIX', 512); //[0-7]+[oO]
212 /** Number format to highlight hex numbers with a prefix 0x */
213 define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+
214 /** Number format to highlight hex numbers with a suffix of h */
215 define('GESHI_NUMBER_HEX_SUFFIX', 8192); //[0-9][0-9a-fA-F]*h
216 /** Number format to highlight floating-point numbers without support for scientific notation */
217 define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+
218 /** Number format to highlight floating-point numbers without support for scientific notation */
219 define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f
220 /** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
221 define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+
222 /** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
223 define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+
224 //Custom formats are passed by RX array
226 // Error detection - use these to analyse faults
227 /** No sourcecode to highlight was specified
230 define('GESHI_ERROR_NO_INPUT', 1);
231 /** The language specified does not exist */
232 define('GESHI_ERROR_NO_SUCH_LANG', 2);
233 /** GeSHi could not open a file for reading (generally a language file) */
234 define('GESHI_ERROR_FILE_NOT_READABLE', 3);
235 /** The header type passed to {@link GeSHi->set_header_type()} was invalid */
236 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
237 /** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
238 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
245 * Please refer to the documentation for GeSHi 1.0.X that is available
246 * at http://qbnz.com/highlighter/documentation.php for more information
247 * about how to use this class.
250 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
251 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
258 * The source code to highlight
264 * The language to use when highlighting
270 * The data for the language used
273 var $language_data = array();
276 * The path to the language files
279 var $language_path = GESHI_LANG_ROOT;
282 * The error message associated with an error
284 * @todo check err reporting works
289 * Possible error messages
292 var $error_messages = array(
293 GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
294 GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
295 GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
296 GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
300 * Whether highlighting is strict or not
303 var $strict_mode = false;
306 * Whether to use CSS classes in output
309 var $use_classes = false;
312 * The type of header to use. Can be one of the following
315 * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
316 * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
317 * - GESHI_HEADER_NONE: No header is outputted.
321 var $header_type = GESHI_HEADER_PRE;
324 * Array of permissions for which lexics should be highlighted
327 var $lexic_permissions = array(
328 'KEYWORDS' => array(),
329 'COMMENTS' => array('MULTI' => true),
330 'REGEXPS' => array(),
331 'ESCAPE_CHAR' => true,
341 * The time it took to parse the code
347 * The content of the header block
350 var $header_content = '';
353 * The content of the footer block
356 var $footer_content = '';
359 * The style of the header block
362 var $header_content_style = '';
365 * The style of the footer block
368 var $footer_content_style = '';
371 * Tells if a block around the highlighted source should be forced
372 * if not using line numbering
375 var $force_code_block = false;
378 * The styles for hyperlinks in the code
381 var $link_styles = array();
384 * Whether important blocks should be recognised or not
387 * @todo REMOVE THIS FUNCTIONALITY!
389 var $enable_important_blocks = false;
392 * Styles for important parts of the code
395 * @todo As above - rethink the whole idea of important blocks as it is buggy and
396 * will be hard to implement in 1.2
398 var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
401 * Whether CSS IDs should be added to the code
404 var $add_ids = false;
407 * Lines that should be highlighted extra
410 var $highlight_extra_lines = array();
413 * Styles of lines that should be highlighted extra
416 var $highlight_extra_lines_styles = array();
419 * Styles of extra-highlighted lines
422 var $highlight_extra_lines_style = 'background-color: #ffc;';
426 * If null, nl2br() will be used on the result string.
427 * Otherwise, all instances of \n will be replaced with $line_ending
430 var $line_ending = null;
433 * Number at which line numbers should start at
436 var $line_numbers_start = 1;
439 * The overall style for this code block
442 var $overall_style = 'font-family:monospace;';
445 * The style for the actual code
448 var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
451 * The overall class for this code block
454 var $overall_class = '';
457 * The overall ID for this code block
460 var $overall_id = '';
466 var $line_style1 = 'font-weight: normal; vertical-align:top;';
469 * Line number styles for fancy lines
472 var $line_style2 = 'font-weight: bold; vertical-align:top;';
475 * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
478 var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
481 * Flag for how line numbers are displayed
484 var $line_numbers = GESHI_NO_LINE_NUMBERS;
487 * Flag to decide if multi line spans are allowed. Set it to false to make sure
488 * each tag is closed before and reopened after each linefeed.
491 var $allow_multiline_span = true;
494 * The "nth" value for fancy line highlighting
497 var $line_nth_row = 0;
500 * The size of tab stops
506 * Should we use language-defined tab stop widths?
509 var $use_language_tab_width = false;
512 * Default target for keyword links
515 var $link_target = '';
518 * The encoding to use for entity encoding
519 * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
522 var $encoding = 'utf-8';
525 * Should keywords be linked?
528 var $keyword_links = true;
531 * Currently loaded language file
535 var $loaded_language = '';
538 * Wether the caches needed for parsing are built or not
543 var $parse_cache_built = false;
546 * Work around for Suhosin Patch with disabled /e modifier
548 * Note from suhosins author in config file:
550 * The /e modifier inside <code>preg_replace()</code> allows code execution.
551 * Often it is the cause for remote code execution exploits. It is wise to
552 * deactivate this feature and test where in the application it is used.
553 * The developer using the /e modifier should be made aware that he should
554 * use <code>preg_replace_callback()</code> instead
560 var $_kw_replace_group = 0;
564 * some "callback parameters" for handle_multiline_regexps
570 var $_hmr_before = '';
571 var $_hmr_replace = '';
572 var $_hmr_after = '';
578 * Creates a new GeSHi object, with source and language
580 * @param string The source code to highlight
581 * @param string The language to highlight the source with
582 * @param string The path to the language file directory. <b>This
583 * is deprecated!</b> I've backported the auto path
584 * detection from the 1.1.X dev branch, so now it
585 * should be automatically set correctly. If you have
586 * renamed the language directory however, you will
587 * still need to set the path using this parameter or
588 * {@link GeSHi->set_language_path()}
591 function GeSHi($source = '', $language = '', $path = '') {
592 if (!empty($source)) {
593 $this->set_source($source);
595 if (!empty($language)) {
596 $this->set_language($language);
598 $this->set_language_path($path);
602 * Returns an error message associated with the last GeSHi operation,
603 * or false if no error has occured
605 * @return string|false An error message if there has been an error, else false
610 //Put some template variables for debugging here ...
611 $debug_tpl_vars = array(
612 '{LANGUAGE}' => $this->language,
613 '{PATH}' => $this->language_path
616 array_keys($debug_tpl_vars),
617 array_values($debug_tpl_vars),
618 $this->error_messages[$this->error]);
620 return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
626 * Gets a human-readable language name (thanks to Simon Patterson
629 * @return string The name for the current language
632 function get_language_name() {
633 if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
634 return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
636 return $this->language_data['LANG_NAME'];
640 * Sets the source code for this object
642 * @param string The source code to highlight
645 function set_source($source) {
646 $this->source = $source;
647 $this->highlight_extra_lines = array();
651 * Sets the language for this object
653 * @note since 1.0.8 this function won't reset language-settings by default anymore!
654 * if you need this set $force_reset = true
656 * @param string The name of the language to use
659 function set_language($language, $force_reset = false) {
661 $this->loaded_language = false;
664 //Clean up the language name to prevent malicious code injection
665 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
667 $language = strtolower($language);
669 //Retreive the full filename
670 $file_name = $this->language_path . $language . '.php';
671 if ($file_name == $this->loaded_language) {
672 // this language is already loaded!
676 $this->language = $language;
678 $this->error = false;
679 $this->strict_mode = GESHI_NEVER;
681 //Check if we can read the desired file
682 if (!is_readable($file_name)) {
683 $this->error = GESHI_ERROR_NO_SUCH_LANG;
687 // Load the language for parsing
688 $this->load_language($file_name);
692 * Sets the path to the directory containing the language files. Note
693 * that this path is relative to the directory of the script that included
694 * geshi.php, NOT geshi.php itself.
696 * @param string The path to the language directory
698 * @deprecated The path to the language files should now be automatically
699 * detected, so this method should no longer be needed. The
700 * 1.1.X branch handles manual setting of the path differently
701 * so this method will disappear in 1.2.0.
703 function set_language_path($path) {
704 if(strpos($path,':')) {
705 //Security Fix to prevent external directories using fopen wrappers.
706 if(DIRECTORY_SEPARATOR == "\\") {
707 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
714 if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
715 //Security Fix to prevent external directories using fopen wrappers.
718 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
719 //Security Fix to prevent external directories using fopen wrappers.
722 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
723 //Security Fix to prevent external directories using fopen wrappers.
727 $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
728 $this->set_language($this->language); // otherwise set_language_path has no effect
733 * Sets the type of header to be used.
735 * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
736 * means more source code but more control over tab width and line-wrapping.
737 * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
738 * control. Default is GESHI_HEADER_PRE.
740 * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
741 * should be outputted.
743 * @param int The type of header to be used
746 function set_header_type($type) {
747 //Check if we got a valid header type
748 if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
749 GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
750 $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
754 //Set that new header type
755 $this->header_type = $type;
759 * Sets the styles for the code that will be outputted
760 * when this object is parsed. The style should be a
761 * string of valid stylesheet declarations
763 * @param string The overall style for the outputted code block
764 * @param boolean Whether to merge the styles with the current styles or not
767 function set_overall_style($style, $preserve_defaults = false) {
768 if (!$preserve_defaults) {
769 $this->overall_style = $style;
771 $this->overall_style .= $style;
776 * Sets the overall classname for this block of code. This
777 * class can then be used in a stylesheet to style this object's
780 * @param string The class name to use for this block of code
783 function set_overall_class($class) {
784 $this->overall_class = $class;
788 * Sets the overall id for this block of code. This id can then
789 * be used in a stylesheet to style this object's output
791 * @param string The ID to use for this block of code
794 function set_overall_id($id) {
795 $this->overall_id = $id;
799 * Sets whether CSS classes should be used to highlight the source. Default
800 * is off, calling this method with no arguments will turn it on
802 * @param boolean Whether to turn classes on or not
805 function enable_classes($flag = true) {
806 $this->use_classes = ($flag) ? true : false;
810 * Sets the style for the actual code. This should be a string
811 * containing valid stylesheet declarations. If $preserve_defaults is
812 * true, then styles are merged with the default styles, with the
813 * user defined styles having priority
815 * Note: Use this method to override any style changes you made to
816 * the line numbers if you are using line numbers, else the line of
817 * code will have the same style as the line number! Consult the
818 * GeSHi documentation for more information about this.
820 * @param string The style to use for actual code
821 * @param boolean Whether to merge the current styles with the new styles
824 function set_code_style($style, $preserve_defaults = false) {
825 if (!$preserve_defaults) {
826 $this->code_style = $style;
828 $this->code_style .= $style;
833 * Sets the styles for the line numbers.
835 * @param string The style for the line numbers that are "normal"
836 * @param string|boolean If a string, this is the style of the line
837 * numbers that are "fancy", otherwise if boolean then this
838 * defines whether the normal styles should be merged with the
839 * new normal styles or not
840 * @param boolean If set, is the flag for whether to merge the "fancy"
841 * styles with the current styles or not
844 function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
845 //Check if we got 2 or three parameters
846 if (is_bool($style2)) {
847 $preserve_defaults = $style2;
851 //Actually set the new styles
852 if (!$preserve_defaults) {
853 $this->line_style1 = $style1;
854 $this->line_style2 = $style2;
856 $this->line_style1 .= $style1;
857 $this->line_style2 .= $style2;
862 * Sets whether line numbers should be displayed.
864 * Valid values for the first parameter are:
866 * - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
867 * - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
868 * - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
870 * For fancy line numbers, the second parameter is used to signal which lines
871 * are to be fancy. For example, if the value of this parameter is 5 then every
872 * 5th line will be fancy.
874 * @param int How line numbers should be displayed
875 * @param int Defines which lines are fancy
878 function enable_line_numbers($flag, $nth_row = 5) {
879 if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
880 && GESHI_FANCY_LINE_NUMBERS != $flag) {
881 $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
883 $this->line_numbers = $flag;
884 $this->line_nth_row = $nth_row;
888 * Sets wether spans and other HTML markup generated by GeSHi can
889 * span over multiple lines or not. Defaults to true to reduce overhead.
890 * Set it to false if you want to manipulate the output or manually display
891 * the code in an ordered list.
893 * @param boolean Wether multiline spans are allowed or not
896 function enable_multiline_span($flag) {
897 $this->allow_multiline_span = (bool) $flag;
901 * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
903 * @see enable_multiline_span
906 function get_multiline_span() {
907 return $this->allow_multiline_span;
911 * Sets the style for a keyword group. If $preserve_defaults is
912 * true, then styles are merged with the default styles, with the
913 * user defined styles having priority
915 * @param int The key of the keyword group to change the styles of
916 * @param string The style to make the keywords
917 * @param boolean Whether to merge the new styles with the old or just
921 function set_keyword_group_style($key, $style, $preserve_defaults = false) {
922 //Set the style for this keyword group
923 if (!$preserve_defaults) {
924 $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
926 $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
929 //Update the lexic permissions
930 if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
931 $this->lexic_permissions['KEYWORDS'][$key] = true;
936 * Turns highlighting on/off for a keyword group
938 * @param int The key of the keyword group to turn on or off
939 * @param boolean Whether to turn highlighting for that group on or off
942 function set_keyword_group_highlighting($key, $flag = true) {
943 $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
947 * Sets the styles for comment groups. If $preserve_defaults is
948 * true, then styles are merged with the default styles, with the
949 * user defined styles having priority
951 * @param int The key of the comment group to change the styles of
952 * @param string The style to make the comments
953 * @param boolean Whether to merge the new styles with the old or just
957 function set_comments_style($key, $style, $preserve_defaults = false) {
958 if (!$preserve_defaults) {
959 $this->language_data['STYLES']['COMMENTS'][$key] = $style;
961 $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
966 * Turns highlighting on/off for comment groups
968 * @param int The key of the comment group to turn on or off
969 * @param boolean Whether to turn highlighting for that group on or off
972 function set_comments_highlighting($key, $flag = true) {
973 $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
977 * Sets the styles for escaped characters. If $preserve_defaults is
978 * true, then styles are merged with the default styles, with the
979 * user defined styles having priority
981 * @param string The style to make the escape characters
982 * @param boolean Whether to merge the new styles with the old or just
986 function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
987 if (!$preserve_defaults) {
988 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
990 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
995 * Turns highlighting on/off for escaped characters
997 * @param boolean Whether to turn highlighting for escape characters on or off
1000 function set_escape_characters_highlighting($flag = true) {
1001 $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1005 * Sets the styles for brackets. If $preserve_defaults is
1006 * true, then styles are merged with the default styles, with the
1007 * user defined styles having priority
1009 * This method is DEPRECATED: use set_symbols_style instead.
1010 * This method will be removed in 1.2.X
1012 * @param string The style to make the brackets
1013 * @param boolean Whether to merge the new styles with the old or just
1016 * @deprecated In favour of set_symbols_style
1018 function set_brackets_style($style, $preserve_defaults = false) {
1019 if (!$preserve_defaults) {
1020 $this->language_data['STYLES']['BRACKETS'][0] = $style;
1022 $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1027 * Turns highlighting on/off for brackets
1029 * This method is DEPRECATED: use set_symbols_highlighting instead.
1030 * This method will be remove in 1.2.X
1032 * @param boolean Whether to turn highlighting for brackets on or off
1034 * @deprecated In favour of set_symbols_highlighting
1036 function set_brackets_highlighting($flag) {
1037 $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1041 * Sets the styles for symbols. If $preserve_defaults is
1042 * true, then styles are merged with the default styles, with the
1043 * user defined styles having priority
1045 * @param string The style to make the symbols
1046 * @param boolean Whether to merge the new styles with the old or just
1048 * @param int Tells the group of symbols for which style should be set.
1051 function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1052 // Update the style of symbols
1053 if (!$preserve_defaults) {
1054 $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1056 $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1059 // For backward compatibility
1061 $this->set_brackets_style ($style, $preserve_defaults);
1066 * Turns highlighting on/off for symbols
1068 * @param boolean Whether to turn highlighting for symbols on or off
1071 function set_symbols_highlighting($flag) {
1072 // Update lexic permissions for this symbol group
1073 $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1075 // For backward compatibility
1076 $this->set_brackets_highlighting ($flag);
1080 * Sets the styles for strings. If $preserve_defaults is
1081 * true, then styles are merged with the default styles, with the
1082 * user defined styles having priority
1084 * @param string The style to make the escape characters
1085 * @param boolean Whether to merge the new styles with the old or just
1089 function set_strings_style($style, $preserve_defaults = false) {
1090 if (!$preserve_defaults) {
1091 $this->language_data['STYLES']['STRINGS'][0] = $style;
1093 $this->language_data['STYLES']['STRINGS'][0] .= $style;
1098 * Turns highlighting on/off for strings
1100 * @param boolean Whether to turn highlighting for strings on or off
1103 function set_strings_highlighting($flag) {
1104 $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1108 * Sets the styles for numbers. If $preserve_defaults is
1109 * true, then styles are merged with the default styles, with the
1110 * user defined styles having priority
1112 * @param string The style to make the numbers
1113 * @param boolean Whether to merge the new styles with the old or just
1117 function set_numbers_style($style, $preserve_defaults = false) {
1118 if (!$preserve_defaults) {
1119 $this->language_data['STYLES']['NUMBERS'][0] = $style;
1121 $this->language_data['STYLES']['NUMBERS'][0] .= $style;
1126 * Turns highlighting on/off for numbers
1128 * @param boolean Whether to turn highlighting for numbers on or off
1131 function set_numbers_highlighting($flag) {
1132 $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1136 * Sets the styles for methods. $key is a number that references the
1137 * appropriate "object splitter" - see the language file for the language
1138 * you are highlighting to get this number. If $preserve_defaults is
1139 * true, then styles are merged with the default styles, with the
1140 * user defined styles having priority
1142 * @param int The key of the object splitter to change the styles of
1143 * @param string The style to make the methods
1144 * @param boolean Whether to merge the new styles with the old or just
1148 function set_methods_style($key, $style, $preserve_defaults = false) {
1149 if (!$preserve_defaults) {
1150 $this->language_data['STYLES']['METHODS'][$key] = $style;
1152 $this->language_data['STYLES']['METHODS'][$key] .= $style;
1157 * Turns highlighting on/off for methods
1159 * @param boolean Whether to turn highlighting for methods on or off
1162 function set_methods_highlighting($flag) {
1163 $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1167 * Sets the styles for regexps. If $preserve_defaults is
1168 * true, then styles are merged with the default styles, with the
1169 * user defined styles having priority
1171 * @param string The style to make the regular expression matches
1172 * @param boolean Whether to merge the new styles with the old or just
1176 function set_regexps_style($key, $style, $preserve_defaults = false) {
1177 if (!$preserve_defaults) {
1178 $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1180 $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1185 * Turns highlighting on/off for regexps
1187 * @param int The key of the regular expression group to turn on or off
1188 * @param boolean Whether to turn highlighting for the regular expression group on or off
1191 function set_regexps_highlighting($key, $flag) {
1192 $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1196 * Sets whether a set of keywords are checked for in a case sensitive manner
1198 * @param int The key of the keyword group to change the case sensitivity of
1199 * @param boolean Whether to check in a case sensitive manner or not
1202 function set_case_sensitivity($key, $case) {
1203 $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1207 * Sets the case that keywords should use when found. Use the constants:
1209 * - GESHI_CAPS_NO_CHANGE: leave keywords as-is
1210 * - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
1211 * - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
1213 * @param int A constant specifying what to do with matched keywords
1216 function set_case_keywords($case) {
1217 if (in_array($case, array(
1218 GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
1219 $this->language_data['CASE_KEYWORDS'] = $case;
1224 * Sets how many spaces a tab is substituted for
1226 * Widths below zero are ignored
1228 * @param int The tab width
1231 function set_tab_width($width) {
1232 $this->tab_width = intval($width);
1234 //Check if it fit's the constraints:
1235 if ($this->tab_width < 1) {
1236 //Return it to the default
1237 $this->tab_width = 8;
1242 * Sets whether or not to use tab-stop width specifed by language
1244 * @param boolean Whether to use language-specific tab-stop widths
1247 function set_use_language_tab_width($use) {
1248 $this->use_language_tab_width = (bool) $use;
1252 * Returns the tab width to use, based on the current language and user
1255 * @return int Tab width
1258 function get_real_tab_width() {
1259 if (!$this->use_language_tab_width ||
1260 !isset($this->language_data['TAB_WIDTH'])) {
1261 return $this->tab_width;
1263 return $this->language_data['TAB_WIDTH'];
1268 * Enables/disables strict highlighting. Default is off, calling this
1269 * method without parameters will turn it on. See documentation
1270 * for more details on strict mode and where to use it.
1272 * @param boolean Whether to enable strict mode or not
1275 function enable_strict_mode($mode = true) {
1276 if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1277 $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1282 * Disables all highlighting
1285 * @todo Rewrite with array traversal
1286 * @deprecated In favour of enable_highlighting
1288 function disable_highlighting() {
1289 $this->enable_highlighting(false);
1293 * Enables all highlighting
1295 * The optional flag parameter was added in version 1.0.7.21 and can be used
1296 * to enable (true) or disable (false) all highlighting.
1299 * @param boolean A flag specifying whether to enable or disable all highlighting
1300 * @todo Rewrite with array traversal
1302 function enable_highlighting($flag = true) {
1303 $flag = $flag ? true : false;
1304 foreach ($this->lexic_permissions as $key => $value) {
1305 if (is_array($value)) {
1306 foreach ($value as $k => $v) {
1307 $this->lexic_permissions[$key][$k] = $flag;
1310 $this->lexic_permissions[$key] = $flag;
1315 $this->enable_important_blocks = $flag;
1319 * Given a file extension, this method returns either a valid geshi language
1320 * name, or the empty string if it couldn't be found
1322 * @param string The extension to get a language name for
1323 * @param array A lookup array to use instead of the default one
1325 * @todo Re-think about how this method works (maybe make it private and/or make it
1326 * a extension->lang lookup?)
1329 function get_language_name_from_extension( $extension, $lookup = array() ) {
1330 if ( !is_array($lookup) || empty($lookup)) {
1332 'actionscript' => array('as'),
1333 'ada' => array('a', 'ada', 'adb', 'ads'),
1334 'apache' => array('conf'),
1335 'asm' => array('ash', 'asm', 'inc'),
1336 'asp' => array('asp'),
1337 'bash' => array('sh'),
1338 'bf' => array('bf'),
1339 'c' => array('c', 'h'),
1340 'c_mac' => array('c', 'h'),
1341 'caddcl' => array(),
1342 'cadlisp' => array(),
1343 'cdfg' => array('cdfg'),
1344 'cobol' => array('cbl'),
1345 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1346 'csharp' => array('cs'),
1347 'css' => array('css'),
1349 'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1350 'diff' => array('diff', 'patch'),
1351 'dos' => array('bat', 'cmd'),
1352 'gettext' => array('po', 'pot'),
1353 'gml' => array('gml'),
1354 'gnuplot' => array('plt'),
1355 'groovy' => array('groovy'),
1356 'haskell' => array('hs'),
1357 'html4strict' => array('html', 'htm'),
1358 'ini' => array('ini', 'desktop'),
1359 'java' => array('java'),
1360 'javascript' => array('js'),
1361 'klonec' => array('kl1'),
1362 'klonecpp' => array('klx'),
1363 'latex' => array('tex'),
1364 'lisp' => array('lisp'),
1365 'lua' => array('lua'),
1366 'matlab' => array('m'),
1368 'mysql' => array('sql'),
1372 'oracle8' => array(),
1373 'oracle10' => array(),
1374 'pascal' => array('pas'),
1375 'perl' => array('pl', 'pm'),
1376 'php' => array('php', 'php5', 'phtml', 'phps'),
1377 'povray' => array('pov'),
1378 'providex' => array('pvc', 'pvx'),
1379 'prolog' => array('pl'),
1380 'python' => array('py'),
1381 'qbasic' => array('bi'),
1382 'reg' => array('reg'),
1383 'ruby' => array('rb'),
1384 'sas' => array('sas'),
1385 'scala' => array('scala'),
1386 'scheme' => array('scm'),
1387 'scilab' => array('sci'),
1388 'smalltalk' => array('st'),
1389 'smarty' => array(),
1390 'tcl' => array('tcl'),
1391 'vb' => array('bas'),
1393 'visualfoxpro' => array(),
1394 'whitespace' => array('ws'),
1395 'xml' => array('xml', 'svg'),
1396 'z80' => array('z80', 'asm', 'inc')
1400 foreach ($lookup as $lang => $extensions) {
1401 if (in_array($extension, $extensions)) {
1409 * Given a file name, this method loads its contents in, and attempts
1410 * to set the language automatically. An optional lookup table can be
1411 * passed for looking up the language name. If not specified a default
1414 * The language table is in the form
1416 * 'lang_name' => array('extension', 'extension', ...),
1420 * @param string The filename to load the source from
1421 * @param array A lookup array to use instead of the default one
1422 * @todo Complete rethink of this and above method
1425 function load_from_file($file_name, $lookup = array()) {
1426 if (is_readable($file_name)) {
1427 $this->set_source(file_get_contents($file_name));
1428 $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1430 $this->error = GESHI_ERROR_FILE_NOT_READABLE;
1435 * Adds a keyword to a keyword group for highlighting
1437 * @param int The key of the keyword group to add the keyword to
1438 * @param string The word to add to the keyword group
1441 function add_keyword($key, $word) {
1442 if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1443 $this->language_data['KEYWORDS'][$key][] = $word;
1445 //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1446 if ($this->parse_cache_built) {
1447 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1448 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1454 * Removes a keyword from a keyword group
1456 * @param int The key of the keyword group to remove the keyword from
1457 * @param string The word to remove from the keyword group
1458 * @param bool Wether to automatically recompile the optimized regexp list or not.
1459 * Note: if you set this to false and @see GeSHi->parse_code() was already called once,
1460 * for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
1461 * or the removed keyword will stay in cache and still be highlighted! On the other hand
1462 * it might be too expensive to recompile the regexp list for every removal if you want to
1463 * remove a lot of keywords.
1466 function remove_keyword($key, $word, $recompile = true) {
1467 $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1468 if ($key_to_remove !== false) {
1469 unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1471 //NEW in 1.0.8, optionally recompile keyword group
1472 if ($recompile && $this->parse_cache_built) {
1473 $this->optimize_keyword_group($key);
1479 * Creates a new keyword group
1481 * @param int The key of the keyword group to create
1482 * @param string The styles for the keyword group
1483 * @param boolean Whether the keyword group is case sensitive ornot
1484 * @param array The words to use for the keyword group
1487 function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1488 $words = (array) $words;
1489 if (empty($words)) {
1490 // empty word lists mess up highlighting
1494 //Add the new keyword group internally
1495 $this->language_data['KEYWORDS'][$key] = $words;
1496 $this->lexic_permissions['KEYWORDS'][$key] = true;
1497 $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1498 $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1500 //NEW in 1.0.8, cache keyword regexp
1501 if ($this->parse_cache_built) {
1502 $this->optimize_keyword_group($key);
1507 * Removes a keyword group
1509 * @param int The key of the keyword group to remove
1512 function remove_keyword_group ($key) {
1513 //Remove the keyword group internally
1514 unset($this->language_data['KEYWORDS'][$key]);
1515 unset($this->lexic_permissions['KEYWORDS'][$key]);
1516 unset($this->language_data['CASE_SENSITIVE'][$key]);
1517 unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1520 unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1524 * compile optimized regexp list for keyword group
1526 * @param int The key of the keyword group to compile & optimize
1529 function optimize_keyword_group($key) {
1530 $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1531 $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1532 $space_as_whitespace = false;
1533 if(isset($this->language_data['PARSER_CONTROL'])) {
1534 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1535 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1536 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1538 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1539 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1540 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1545 if($space_as_whitespace) {
1546 foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1547 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1548 str_replace(" ", "\\s+", $rxv);
1554 * Sets the content of the header block
1556 * @param string The content of the header block
1559 function set_header_content($content) {
1560 $this->header_content = $content;
1564 * Sets the content of the footer block
1566 * @param string The content of the footer block
1569 function set_footer_content($content) {
1570 $this->footer_content = $content;
1574 * Sets the style for the header content
1576 * @param string The style for the header content
1579 function set_header_content_style($style) {
1580 $this->header_content_style = $style;
1584 * Sets the style for the footer content
1586 * @param string The style for the footer content
1589 function set_footer_content_style($style) {
1590 $this->footer_content_style = $style;
1594 * Sets whether to force a surrounding block around
1595 * the highlighted code or not
1597 * @param boolean Tells whether to enable or disable this feature
1600 function enable_inner_code_block($flag) {
1601 $this->force_code_block = (bool)$flag;
1605 * Sets the base URL to be used for keywords
1607 * @param int The key of the keyword group to set the URL for
1608 * @param string The URL to set for the group. If {FNAME} is in
1609 * the url somewhere, it is replaced by the keyword
1610 * that the URL is being made for
1613 function set_url_for_keyword_group($group, $url) {
1614 $this->language_data['URLS'][$group] = $url;
1618 * Sets styles for links in code
1620 * @param int A constant that specifies what state the style is being
1621 * set for - e.g. :hover or :visited
1622 * @param string The styles to use for that state
1625 function set_link_styles($type, $styles) {
1626 $this->link_styles[$type] = $styles;
1630 * Sets the target for links in code
1632 * @param string The target for links in the code, e.g. _blank
1635 function set_link_target($target) {
1637 $this->link_target = '';
1639 $this->link_target = ' target="' . $target . '"';
1644 * Sets styles for important parts of the code
1646 * @param string The styles to use on important parts of the code
1649 function set_important_styles($styles) {
1650 $this->important_styles = $styles;
1654 * Sets whether context-important blocks are highlighted
1656 * @param boolean Tells whether to enable or disable highlighting of important blocks
1657 * @todo REMOVE THIS SHIZ FROM GESHI!
1661 function enable_important_blocks($flag) {
1662 $this->enable_important_blocks = ( $flag ) ? true : false;
1666 * Whether CSS IDs should be added to each line
1668 * @param boolean If true, IDs will be added to each line.
1671 function enable_ids($flag = true) {
1672 $this->add_ids = ($flag) ? true : false;
1676 * Specifies which lines to highlight extra
1678 * The extra style parameter was added in 1.0.7.21.
1680 * @param mixed An array of line numbers to highlight, or just a line
1681 * number on its own.
1682 * @param string A string specifying the style to use for this line.
1683 * If null is specified, the default style is used.
1684 * If false is specified, the line will be removed from
1685 * special highlighting
1687 * @todo Some data replication here that could be cut down on
1689 function highlight_lines_extra($lines, $style = null) {
1690 if (is_array($lines)) {
1691 //Split up the job using single lines at a time
1692 foreach ($lines as $line) {
1693 $this->highlight_lines_extra($line, $style);
1696 //Mark the line as being highlighted specially
1697 $lines = intval($lines);
1698 $this->highlight_extra_lines[$lines] = $lines;
1700 //Decide on which style to use
1701 if ($style === null) { //Check if we should use default style
1702 unset($this->highlight_extra_lines_styles[$lines]);
1703 } else if ($style === false) { //Check if to remove this line
1704 unset($this->highlight_extra_lines[$lines]);
1705 unset($this->highlight_extra_lines_styles[$lines]);
1707 $this->highlight_extra_lines_styles[$lines] = $style;
1713 * Sets the style for extra-highlighted lines
1715 * @param string The style for extra-highlighted lines
1718 function set_highlight_lines_extra_style($styles) {
1719 $this->highlight_extra_lines_style = $styles;
1723 * Sets the line-ending
1725 * @param string The new line-ending
1728 function set_line_ending($line_ending) {
1729 $this->line_ending = (string)$line_ending;
1733 * Sets what number line numbers should start at. Should
1734 * be a positive integer, and will be converted to one.
1736 * <b>Warning:</b> Using this method will add the "start"
1737 * attribute to the <ol> that is used for line numbering.
1738 * This is <b>not</b> valid XHTML strict, so if that's what you
1739 * care about then don't use this method. Firefox is getting
1740 * support for the CSS method of doing this in 1.1 and Opera
1741 * has support for the CSS method, but (of course) IE doesn't
1742 * so it's not worth doing it the CSS way yet.
1744 * @param int The number to start line numbers at
1747 function start_line_numbers_at($number) {
1748 $this->line_numbers_start = abs(intval($number));
1752 * Sets the encoding used for htmlspecialchars(), for international
1755 * NOTE: This is not needed for now because htmlspecialchars() is not
1756 * being used (it has a security hole in PHP4 that has not been patched).
1757 * Maybe in a future version it may make a return for speed reasons, but
1760 * @param string The encoding to use for the source
1763 function set_encoding($encoding) {
1765 $this->encoding = strtolower($encoding);
1770 * Turns linking of keywords on or off.
1772 * @param boolean If true, links will be added to keywords
1775 function enable_keyword_links($enable = true) {
1776 $this->keyword_links = (bool) $enable;
1780 * Setup caches needed for styling. This is automatically called in
1781 * parse_code() and get_stylesheet() when appropriate. This function helps
1782 * stylesheet generators as they rely on some style information being
1788 function build_style_cache() {
1789 //Build the style cache needed to highlight numbers appropriate
1790 if($this->lexic_permissions['NUMBERS']) {
1791 //First check what way highlighting information for numbers are given
1792 if(!isset($this->language_data['NUMBERS'])) {
1793 $this->language_data['NUMBERS'] = 0;
1796 if(is_array($this->language_data['NUMBERS'])) {
1797 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1799 $this->language_data['NUMBERS_CACHE'] = array();
1800 if(!$this->language_data['NUMBERS']) {
1801 $this->language_data['NUMBERS'] =
1802 GESHI_NUMBER_INT_BASIC |
1803 GESHI_NUMBER_FLT_NONSCI;
1806 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1807 //Rearrange style indices if required ...
1808 if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1809 $this->language_data['STYLES']['NUMBERS'][$i] =
1810 $this->language_data['STYLES']['NUMBERS'][1<<$i];
1811 unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1814 //Check if this bit is set for highlighting
1816 //So this bit is set ...
1817 //Check if it belongs to group 0 or the actual stylegroup
1818 if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1819 $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1821 if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1822 $this->language_data['NUMBERS_CACHE'][0] = 0;
1824 $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1833 * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
1834 * This function makes stylesheet generators much faster as they do not need these caches.
1839 function build_parse_cache() {
1840 // cache symbol regexp
1841 //As this is a costy operation, we avoid doing it for multiple groups ...
1842 //Instead we perform it for all symbols at once.
1844 //For this to work, we need to reorganize the data arrays.
1845 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1846 $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1848 $this->language_data['SYMBOL_DATA'] = array();
1849 $symbol_preg_multi = array(); // multi char symbols
1850 $symbol_preg_single = array(); // single char symbols
1851 foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1852 if (is_array($symbols)) {
1853 foreach ($symbols as $sym) {
1854 $sym = $this->hsc($sym);
1855 if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1856 $this->language_data['SYMBOL_DATA'][$sym] = $key;
1857 if (isset($sym[1])) { // multiple chars
1858 $symbol_preg_multi[] = preg_quote($sym, '/');
1859 } else { // single char
1861 // don't trigger range out of order error
1862 $symbol_preg_single[] = '\-';
1864 $symbol_preg_single[] = preg_quote($sym, '/');
1870 $symbols = $this->hsc($symbols);
1871 if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
1872 $this->language_data['SYMBOL_DATA'][$symbols] = 0;
1873 if (isset($symbols[1])) { // multiple chars
1874 $symbol_preg_multi[] = preg_quote($symbols, '/');
1875 } else if ($symbols == '-') {
1876 // don't trigger range out of order error
1877 $symbol_preg_single[] = '\-';
1878 } else { // single char
1879 $symbol_preg_single[] = preg_quote($symbols, '/');
1885 //Now we have an array with each possible symbol as the key and the style as the actual data.
1886 //This way we can set the correct style just the moment we highlight ...
1888 //Now we need to rewrite our array to get a search string that
1889 $symbol_preg = array();
1890 if (!empty($symbol_preg_multi)) {
1891 rsort($symbol_preg_multi);
1892 $symbol_preg[] = implode('|', $symbol_preg_multi);
1894 if (!empty($symbol_preg_single)) {
1895 rsort($symbol_preg_single);
1896 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
1898 $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
1901 // cache optimized regexp for keyword matching
1903 $this->language_data['CACHED_KEYWORD_LISTS'] = array();
1904 foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
1905 if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
1906 $this->lexic_permissions['KEYWORDS'][$key]) {
1907 $this->optimize_keyword_group($key);
1912 if ($this->lexic_permissions['BRACKETS']) {
1913 $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
1914 if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
1915 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
1916 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">[|>',
1917 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">]|>',
1918 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">(|>',
1919 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">)|>',
1920 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">{|>',
1921 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">}|>',
1925 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
1926 '<| class="br0">[|>',
1927 '<| class="br0">]|>',
1928 '<| class="br0">(|>',
1929 '<| class="br0">)|>',
1930 '<| class="br0">{|>',
1931 '<| class="br0">}|>',
1936 //Build the parse cache needed to highlight numbers appropriate
1937 if($this->lexic_permissions['NUMBERS']) {
1938 //Check if the style rearrangements have been processed ...
1939 //This also does some preprocessing to check which style groups are useable ...
1940 if(!isset($this->language_data['NUMBERS_CACHE'])) {
1941 $this->build_style_cache();
1944 //Number format specification
1945 //All this formats are matched case-insensitively!
1946 static $numbers_format = array(
1947 GESHI_NUMBER_INT_BASIC =>
1948 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z\.])',
1949 GESHI_NUMBER_INT_CSTYLE =>
1950 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z\.])',
1951 GESHI_NUMBER_BIN_SUFFIX =>
1952 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?b(?![0-9a-z\.])',
1953 GESHI_NUMBER_BIN_PREFIX_PERCENT =>
1954 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z\.])',
1955 GESHI_NUMBER_BIN_PREFIX_0B =>
1956 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z\.])',
1957 GESHI_NUMBER_OCT_PREFIX =>
1958 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z\.])',
1959 GESHI_NUMBER_OCT_SUFFIX =>
1960 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z\.])',
1961 GESHI_NUMBER_HEX_PREFIX =>
1962 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-f]+?(?![0-9a-z\.])',
1963 GESHI_NUMBER_HEX_SUFFIX =>
1964 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-f]*?h(?![0-9a-z\.])',
1965 GESHI_NUMBER_FLT_NONSCI =>
1966 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z\.])',
1967 GESHI_NUMBER_FLT_NONSCI_F =>
1968 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z\.])',
1969 GESHI_NUMBER_FLT_SCI_SHORT =>
1970 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z\.])',
1971 GESHI_NUMBER_FLT_SCI_ZERO =>
1972 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z\.])'
1975 //At this step we have an associative array with flag groups for a
1976 //specific style or an string denoting a regexp given its index.
1977 $this->language_data['NUMBERS_RXCACHE'] = array();
1978 foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
1979 if(is_string($rxdata)) {
1982 //This is a bitfield of number flags to highlight:
1983 //Build an array, implode them together and make this the actual RX
1985 for($i = 1; $i <= $rxdata; $i<<=1) {
1987 $rxuse[] = $numbers_format[$i];
1990 $regexp = implode("|", $rxuse);
1993 $this->language_data['NUMBERS_RXCACHE'][$key] =
1994 "/(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!\|>)/i";
1998 $this->parse_cache_built = true;
2002 * Returns the code in $this->source, highlighted and surrounded by the
2005 * This should only be called ONCE, cos it's SLOW! If you want to highlight
2006 * the same source multiple times, you're better off doing a whole lot of
2007 * str_replaces to replace the <span>s
2011 function parse_code () {
2013 $start_time = microtime();
2015 // Firstly, if there is an error, we won't highlight
2017 //Escape the source for output
2018 $result = $this->hsc($this->source);
2020 //This fix is related to SF#1923020, but has to be applied regardless of
2021 //actually highlighting symbols.
2022 $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2024 // Timing is irrelevant
2025 $this->set_time($start_time, $start_time);
2026 $this->finalise($result);
2030 // make sure the parse cache is up2date
2031 if (!$this->parse_cache_built) {
2032 $this->build_parse_cache();
2035 // Replace all newlines to a common form.
2036 $code = str_replace("\r\n", "\n", $this->source);
2037 $code = str_replace("\r", "\n", $code);
2039 // Add spaces for regular expression matching and line numbers
2040 // $code = "\n" . $code . "\n";
2042 // Initialise various stuff
2043 $length = strlen($code);
2044 $COMMENT_MATCHED = false;
2045 $stuff_to_parse = '';
2048 // "Important" selections are handled like multiline comments
2049 // @todo GET RID OF THIS SHIZ
2050 if ($this->enable_important_blocks) {
2051 $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2054 if ($this->strict_mode) {
2055 // Break the source into bits. Each bit will be a portion of the code
2056 // within script delimiters - for example, HTML between < and >
2060 $next_match_pointer = null;
2061 // we use a copy to unset delimiters on demand (when they are not found)
2062 $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2064 while ($i < $length) {
2065 $next_match_pos = $length + 1; // never true
2066 foreach ($delim_copy as $dk => $delimiters) {
2067 if(is_array($delimiters)) {
2068 foreach ($delimiters as $open => $close) {
2069 // make sure the cache is setup properly
2070 if (!isset($matches[$dk][$open])) {
2071 $matches[$dk][$open] = array(
2075 'open' => $open, // needed for grouping of adjacent code blocks (see below)
2076 'open_strlen' => strlen($open),
2079 'close_strlen' => strlen($close),
2082 // Get the next little bit for this opening string
2083 if ($matches[$dk][$open]['next_match'] < $i) {
2084 // only find the next pos if it was not already cached
2085 $open_pos = strpos($code, $open, $i);
2086 if ($open_pos === false) {
2087 // no match for this delimiter ever
2088 unset($delim_copy[$dk][$open]);
2091 $matches[$dk][$open]['next_match'] = $open_pos;
2093 if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2094 //So we got a new match, update the close_pos
2095 $matches[$dk][$open]['close_pos'] =
2096 strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2098 $next_match_pointer =& $matches[$dk][$open];
2099 $next_match_pos = $matches[$dk][$open]['next_match'];
2103 //So we should match an RegExp as Strict Block ...
2105 * The value in $delimiters is expected to be an RegExp
2106 * containing exactly 2 matching groups:
2107 * - Group 1 is the opener
2108 * - Group 2 is the closer
2110 if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
2111 preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2112 //We got a match ...
2113 $matches[$dk] = array(
2114 'next_match' => $matches_rx[1][1],
2117 'close_strlen' => strlen($matches_rx[2][0]),
2118 'close_pos' => $matches_rx[2][1],
2121 // no match for this delimiter ever
2122 unset($delim_copy[$dk]);
2126 if ($matches[$dk]['next_match'] <= $next_match_pos) {
2127 $next_match_pointer =& $matches[$dk];
2128 $next_match_pos = $matches[$dk]['next_match'];
2132 // non-highlightable text
2134 1 => substr($code, $i, $next_match_pos - $i)
2138 if ($next_match_pos > $length) {
2139 // out of bounds means no next match was found
2143 // highlightable code
2144 $parts[$k][0] = $next_match_pointer['dk'];
2146 //Only combine for non-rx script blocks
2147 if(is_array($delim_copy[$next_match_pointer['dk']])) {
2148 // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2149 $i = $next_match_pos + $next_match_pointer['open_strlen'];
2151 $close_pos = strpos($code, $next_match_pointer['close'], $i);
2152 if ($close_pos == false) {
2155 $i = $close_pos + $next_match_pointer['close_strlen'];
2156 if ($i == $length) {
2159 if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2160 substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2161 // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2162 foreach ($matches as $submatches) {
2163 foreach ($submatches as $match) {
2164 if ($match['next_match'] == $i) {
2165 // a different block already matches here!
2175 $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2179 if ($close_pos === false) {
2180 // no closing delimiter found!
2181 $parts[$k][1] = substr($code, $next_match_pos);
2185 $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2189 unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2192 if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2193 // when we have only one part, we don't have anything to highlight at all.
2194 // if we have a "maybe" strict language, this should be handled as highlightable code
2209 // Not strict mode - simply dump the source into
2210 // the array at index 1 (the first highlightable block)
2224 //Unset variables we won't need any longer
2227 //Preload some repeatedly used values regarding hardquotes ...
2228 $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2229 $hq_strlen = strlen($hq);
2231 //Preload if line numbers are to be generated afterwards
2232 //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2233 $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2234 !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2236 //preload the escape char for faster checking ...
2237 $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2239 // this is used for single-line comments
2240 $sc_disallowed_before = "";
2241 $sc_disallowed_after = "";
2243 if (isset($this->language_data['PARSER_CONTROL'])) {
2244 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2245 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2246 $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2248 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2249 $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2254 //Fix for SF#1932083: Multichar Quotemarks unsupported
2255 $is_string_starter = array();
2256 if ($this->lexic_permissions['STRINGS']) {
2257 foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2258 if (!isset($is_string_starter[$quotemark[0]])) {
2259 $is_string_starter[$quotemark[0]] = (string)$quotemark;
2260 } else if (is_string($is_string_starter[$quotemark[0]])) {
2261 $is_string_starter[$quotemark[0]] = array(
2262 $is_string_starter[$quotemark[0]],
2265 $is_string_starter[$quotemark[0]][] = $quotemark;
2270 // Now we go through each part. We know that even-indexed parts are
2271 // code that shouldn't be highlighted, and odd-indexed parts should
2273 for ($key = 0; $key < $num_parts; ++$key) {
2276 // If this block should be highlighted...
2278 // Else not a block to highlight
2279 $endresult .= $this->hsc($parts[$key][1]);
2280 unset($parts[$key]);
2285 $part = $parts[$key][1];
2287 $highlight_part = true;
2288 if ($this->strict_mode && !is_null($parts[$key][0])) {
2289 // get the class key for this block of code
2290 $script_key = $parts[$key][0];
2291 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2292 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2293 $this->lexic_permissions['SCRIPT']) {
2294 // Add a span element around the source to
2295 // highlight the overall source block
2296 if (!$this->use_classes &&
2297 $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2298 $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2300 $attributes = ' class="sc' . $script_key . '"';
2302 $result .= "<span$attributes>";
2303 $STRICTATTRS = $attributes;
2307 if ($highlight_part) {
2308 // Now, highlight the code in this block. This code
2309 // is really the engine of GeSHi (along with the method
2310 // parse_non_string_part).
2312 // cache comment regexps incrementally
2313 $next_comment_regexp_key = '';
2314 $next_comment_regexp_pos = -1;
2315 $next_comment_multi_pos = -1;
2316 $next_comment_single_pos = -1;
2317 $comment_regexp_cache_per_key = array();
2318 $comment_multi_cache_per_key = array();
2319 $comment_single_cache_per_key = array();
2320 $next_open_comment_multi = '';
2321 $next_comment_single_key = '';
2322 $escape_regexp_cache_per_key = array();
2323 $next_escape_regexp_key = '';
2324 $next_escape_regexp_pos = -1;
2326 $length = strlen($part);
2327 for ($i = 0; $i < $length; ++$i) {
2328 // Get the next char
2332 // update regexp comment cache if needed
2333 if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2334 $next_comment_regexp_pos = $length;
2335 foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2337 if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2338 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2339 $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2340 // we have already matched something
2341 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2342 // this comment is never matched
2345 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2347 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2348 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
2349 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
2351 $match_i = $match[0][1];
2352 if (GESHI_PHP_PRE_433) {
2356 $comment_regexp_cache_per_key[$comment_key] = array(
2357 'key' => $comment_key,
2358 'length' => strlen($match[0][0]),
2362 $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2366 if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2367 $next_comment_regexp_pos = $match_i;
2368 $next_comment_regexp_key = $comment_key;
2369 if ($match_i === $i) {
2376 $string_started = false;
2378 if (isset($is_string_starter[$char])) {
2379 // Possibly the start of a new string ...
2381 //Check which starter it was ...
2382 //Fix for SF#1932083: Multichar Quotemarks unsupported
2383 if (is_array($is_string_starter[$char])) {
2385 foreach ($is_string_starter[$char] as $testchar) {
2386 if ($testchar === substr($part, $i, strlen($testchar)) &&
2387 strlen($testchar) > strlen($char_new)) {
2388 $char_new = $testchar;
2389 $string_started = true;
2392 if ($string_started) {
2396 $testchar = $is_string_starter[$char];
2397 if ($testchar === substr($part, $i, strlen($testchar))) {
2399 $string_started = true;
2402 $char_len = strlen($char);
2405 if ($string_started && $i != $next_comment_regexp_pos) {
2406 // Hand out the correct style information for this string
2407 $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2408 if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
2409 !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2413 // parse the stuff before this
2414 $result .= $this->parse_non_string_part($stuff_to_parse);
2415 $stuff_to_parse = '';
2417 if (!$this->use_classes) {
2418 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2420 $string_attributes = ' class="st'.$string_key.'"';
2423 // now handle the string
2424 $string = "<span$string_attributes>" . GeSHi::hsc($char);
2425 $start = $i + $char_len;
2426 $string_open = true;
2428 if(empty($this->language_data['ESCAPE_REGEXP'])) {
2429 $next_escape_regexp_pos = $length;
2433 //Get the regular ending pos ...
2434 $close_pos = strpos($part, $char, $start);
2435 if(false === $close_pos) {
2436 $close_pos = $length;
2439 if($this->lexic_permissions['ESCAPE_CHAR']) {
2440 // update escape regexp cache if needed
2441 if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2442 $next_escape_regexp_pos = $length;
2443 foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2445 if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2446 ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2447 $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2448 // we have already matched something
2449 if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2450 // this comment is never matched
2453 $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2455 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2456 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) ||
2457 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start))
2459 $match_i = $match[0][1];
2460 if (GESHI_PHP_PRE_433) {
2464 $escape_regexp_cache_per_key[$escape_key] = array(
2465 'key' => $escape_key,
2466 'length' => strlen($match[0][0]),
2470 $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2474 if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2475 $next_escape_regexp_pos = $match_i;
2476 $next_escape_regexp_key = $escape_key;
2477 if ($match_i === $start) {
2484 //Find the next simple escape position
2485 if('' != $this->language_data['ESCAPE_CHAR']) {
2486 $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2487 if(false === $simple_escape) {
2488 $simple_escape = $length;
2491 $simple_escape = $length;
2494 $next_escape_regexp_pos = $length;
2495 $simple_escape = $length;
2498 if($simple_escape < $next_escape_regexp_pos &&
2499 $simple_escape < $length &&
2500 $simple_escape < $close_pos) {
2501 //The nexxt escape sequence is a simple one ...
2502 $es_pos = $simple_escape;
2504 //Add the stuff not in the string yet ...
2505 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2507 //Get the style for this escaped char ...
2508 if (!$this->use_classes) {
2509 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2511 $escape_char_attributes = ' class="es0"';
2514 //Add the style for the escape char ...
2515 $string .= "<span$escape_char_attributes>" .
2516 GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2518 //Get the byte AFTER the ESCAPE_CHAR we just found
2519 $es_char = $part[$es_pos + 1];
2520 if ($es_char == "\n") {
2521 // don't put a newline around newlines
2522 $string .= "</span>\n";
2523 $start = $es_pos + 2;
2524 } else if (ord($es_char) >= 128) {
2525 //This is an non-ASCII char (UTF8 or single byte)
2526 //This code tries to work around SF#2037598 ...
2527 if(function_exists('mb_substr')) {
2528 $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2529 $string .= $es_char_m . '</span>';
2530 } else if (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) {
2531 if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2532 "|\xE0[\xA0-\xBF][\x80-\xBF]".
2533 "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2534 "|\xED[\x80-\x9F][\x80-\xBF]".
2535 "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2536 "|[\xF1-\xF3][\x80-\xBF]{3}".
2537 "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2538 $part, $es_char_m, null, $es_pos + 1)) {
2539 $es_char_m = $es_char_m[0];
2541 $es_char_m = $es_char;
2543 $string .= $this->hsc($es_char_m) . '</span>';
2545 $es_char_m = $this->hsc($es_char);
2547 $start = $es_pos + strlen($es_char_m) + 1;
2549 $string .= $this->hsc($es_char) . '</span>';
2550 $start = $es_pos + 2;
2552 } else if ($next_escape_regexp_pos < $length &&
2553 $next_escape_regexp_pos < $close_pos) {
2554 $es_pos = $next_escape_regexp_pos;
2555 //Add the stuff not in the string yet ...
2556 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2558 //Get the key and length of this match ...
2559 $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2560 $escape_str = substr($part, $es_pos, $escape['length']);
2561 $escape_key = $escape['key'];
2563 //Get the style for this escaped char ...
2564 if (!$this->use_classes) {
2565 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2567 $escape_char_attributes = ' class="es' . $escape_key . '"';
2570 //Add the style for the escape char ...
2571 $string .= "<span$escape_char_attributes>" .
2572 $this->hsc($escape_str) . '</span>';
2574 $start = $es_pos + $escape['length'];
2576 //Copy the remainder of the string ...
2577 $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2578 $start = $close_pos + $char_len;
2579 $string_open = false;
2581 } while($string_open);
2583 if ($check_linenumbers) {
2584 // Are line numbers used? If, we should end the string before
2585 // the newline and begin it again (so when <li>s are put in the source
2586 // remains XHTML compliant)
2587 // note to self: This opens up possibility of config files specifying
2588 // that languages can/cannot have multiline strings???
2589 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2596 } else if ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2597 substr($part, $i, $hq_strlen) == $hq) {
2598 // The start of a hard quoted string
2599 if (!$this->use_classes) {
2600 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2601 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2603 $string_attributes = ' class="st_h"';
2604 $escape_char_attributes = ' class="es_h"';
2606 // parse the stuff before this
2607 $result .= $this->parse_non_string_part($stuff_to_parse);
2608 $stuff_to_parse = '';
2610 // now handle the string
2613 // look for closing quote
2614 $start = $i + $hq_strlen;
2615 while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2616 $start = $close_pos + 1;
2617 if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR']) {
2618 // make sure this quote is not escaped
2619 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2620 if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2621 // check wether this quote is escaped or if it is something like '\\'
2622 $escape_char_pos = $close_pos - 1;
2623 while ($escape_char_pos > 0
2624 && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
2627 if (($close_pos - $escape_char_pos) & 1) {
2628 // uneven number of escape chars => this quote is escaped
2635 // found closing quote
2639 //Found the closing delimiter?
2641 // span till the end of this $part when no closing delimiter is found
2642 $close_pos = $length;
2645 //Get the actual string
2646 $string = substr($part, $i, $close_pos - $i + 1);
2649 // handle escape chars and encode html chars
2650 // (special because when we have escape chars within our string they may not be escaped)
2651 if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2654 while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2655 // hmtl escape stuff before
2656 $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2657 // check if this is a hard escape
2658 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2659 if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2660 // indeed, this is a hardescape
2661 $new_string .= "<span$escape_char_attributes>" .
2662 $this->hsc($hardescape) . '</span>';
2663 $start = $es_pos + strlen($hardescape);
2667 // not a hard escape, but a normal escape
2668 // they come in pairs of two
2670 while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2671 && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2672 && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2676 $new_string .= "<span$escape_char_attributes>" .
2677 str_repeat($escaped_escape_char, $c) .
2679 $start = $es_pos + $c;
2681 // this is just a single lonely escape char...
2682 $new_string .= $escaped_escape_char;
2683 $start = $es_pos + 1;
2686 $string = $new_string . $this->hsc(substr($string, $start));
2688 $string = $this->hsc($string);
2691 if ($check_linenumbers) {
2692 // Are line numbers used? If, we should end the string before
2693 // the newline and begin it again (so when <li>s are put in the source
2694 // remains XHTML compliant)
2695 // note to self: This opens up possibility of config files specifying
2696 // that languages can/cannot have multiline strings???
2697 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2700 $result .= "<span$string_attributes>" . $string . '</span>';
2704 //Have a look for regexp comments
2705 if ($i == $next_comment_regexp_pos) {
2706 $COMMENT_MATCHED = true;
2707 $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2708 $test_str = $this->hsc(substr($part, $i, $comment['length']));
2710 //@todo If remove important do remove here
2711 if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2712 if (!$this->use_classes) {
2713 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2715 $attributes = ' class="co' . $comment['key'] . '"';
2718 $test_str = "<span$attributes>" . $test_str . "</span>";
2720 // Short-cut through all the multiline code
2721 if ($check_linenumbers) {
2722 // strreplace to put close span and open span around multiline newlines
2723 $test_str = str_replace(
2724 "\n", "</span>\n<span$attributes>",
2725 str_replace("\n ", "\n ", $test_str)
2730 $i += $comment['length'] - 1;
2733 $result .= $this->parse_non_string_part($stuff_to_parse);
2734 $stuff_to_parse = '';
2737 // If we haven't matched a regexp comment, try multi-line comments
2738 if (!$COMMENT_MATCHED) {
2739 // Is this a multiline comment?
2740 if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2741 $next_comment_multi_pos = $length;
2742 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2744 if (isset($comment_multi_cache_per_key[$open]) &&
2745 ($comment_multi_cache_per_key[$open] >= $i ||
2746 $comment_multi_cache_per_key[$open] === false)) {
2747 // we have already matched something
2748 if ($comment_multi_cache_per_key[$open] === false) {
2749 // this comment is never matched
2752 $match_i = $comment_multi_cache_per_key[$open];
2753 } else if (($match_i = stripos($part, $open, $i)) !== false) {
2754 $comment_multi_cache_per_key[$open] = $match_i;
2756 $comment_multi_cache_per_key[$open] = false;
2759 if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2760 $next_comment_multi_pos = $match_i;
2761 $next_open_comment_multi = $open;
2762 if ($match_i === $i) {
2768 if ($i == $next_comment_multi_pos) {
2769 $open = $next_open_comment_multi;
2770 $close = $this->language_data['COMMENT_MULTI'][$open];
2771 $open_strlen = strlen($open);
2772 $close_strlen = strlen($close);
2773 $COMMENT_MATCHED = true;
2774 $test_str_match = $open;
2775 //@todo If remove important do remove here
2776 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2777 $open == GESHI_START_IMPORTANT) {
2778 if ($open != GESHI_START_IMPORTANT) {
2779 if (!$this->use_classes) {
2780 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
2782 $attributes = ' class="coMULTI"';
2784 $test_str = "<span$attributes>" . $this->hsc($open);
2786 if (!$this->use_classes) {
2787 $attributes = ' style="' . $this->important_styles . '"';
2789 $attributes = ' class="imp"';
2792 // We don't include the start of the comment if it's an
2794 $test_str = "<span$attributes>";
2797 $test_str = $this->hsc($open);
2800 $close_pos = strpos( $part, $close, $i + $open_strlen );
2802 if ($close_pos === false) {
2803 $close_pos = $length;
2806 // Short-cut through all the multiline code
2807 $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
2808 if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
2809 $test_str_match == GESHI_START_IMPORTANT) &&
2810 $check_linenumbers) {
2812 // strreplace to put close span and open span around multiline newlines
2813 $test_str .= str_replace(
2814 "\n", "</span>\n<span$attributes>",
2815 str_replace("\n ", "\n ", $rest_of_comment)
2818 $test_str .= $rest_of_comment;
2821 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2822 $test_str_match == GESHI_START_IMPORTANT) {
2823 $test_str .= '</span>';
2826 $i = $close_pos + $close_strlen - 1;
2829 $result .= $this->parse_non_string_part($stuff_to_parse);
2830 $stuff_to_parse = '';
2834 // If we haven't matched a multiline comment, try single-line comments
2835 if (!$COMMENT_MATCHED) {
2836 // cache potential single line comment occurances
2837 if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
2838 $next_comment_single_pos = $length;
2839 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
2841 if (isset($comment_single_cache_per_key[$comment_key]) &&
2842 ($comment_single_cache_per_key[$comment_key] >= $i ||
2843 $comment_single_cache_per_key[$comment_key] === false)) {
2844 // we have already matched something
2845 if ($comment_single_cache_per_key[$comment_key] === false) {
2846 // this comment is never matched
2849 $match_i = $comment_single_cache_per_key[$comment_key];
2851 // case sensitive comments
2852 ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
2853 ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
2854 // non case sensitive
2855 (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
2856 (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
2857 $comment_single_cache_per_key[$comment_key] = $match_i;
2859 $comment_single_cache_per_key[$comment_key] = false;
2862 if ($match_i !== false && $match_i < $next_comment_single_pos) {
2863 $next_comment_single_pos = $match_i;
2864 $next_comment_single_key = $comment_key;
2865 if ($match_i === $i) {
2871 if ($next_comment_single_pos == $i) {
2872 $comment_key = $next_comment_single_key;
2873 $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
2874 $com_len = strlen($comment_mark);
2876 // This check will find special variables like $# in bash
2877 // or compiler directives of Delphi beginning {$
2878 if ((empty($sc_disallowed_before) || ($i == 0) ||
2879 (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
2880 (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
2881 (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
2883 // this is a valid comment
2884 $COMMENT_MATCHED = true;
2885 if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
2886 if (!$this->use_classes) {
2887 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
2889 $attributes = ' class="co' . $comment_key . '"';
2891 $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
2893 $test_str = $this->hsc($comment_mark);
2896 //Check if this comment is the last in the source
2897 $close_pos = strpos($part, "\n", $i);
2899 if ($close_pos === false) {
2900 $close_pos = $length;
2903 $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
2904 if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
2905 $test_str .= "</span>";
2908 // Take into account that the comment might be the last in the source
2916 $result .= $this->parse_non_string_part($stuff_to_parse);
2917 $stuff_to_parse = '';
2923 // Where are we adding this char?
2924 if (!$COMMENT_MATCHED) {
2925 $stuff_to_parse .= $char;
2927 $result .= $test_str;
2929 $COMMENT_MATCHED = false;
2932 // Parse the last bit
2933 $result .= $this->parse_non_string_part($stuff_to_parse);
2934 $stuff_to_parse = '';
2936 $result .= $this->hsc($part);
2938 // Close the <span> that surrounds the block
2939 if ($STRICTATTRS != '') {
2940 $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
2941 $result .= '</span>';
2944 $endresult .= $result;
2945 unset($part, $parts[$key], $result);
2948 //This fix is related to SF#1923020, but has to be applied regardless of
2949 //actually highlighting symbols.
2950 /** NOTE: memorypeak #3 */
2951 $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
2953 // // Parse the last stuff (redundant?)
2954 // $result .= $this->parse_non_string_part($stuff_to_parse);
2956 // Lop off the very first and last spaces
2957 // $result = substr($result, 1, -1);
2959 // We're finished: stop timing
2960 $this->set_time($start_time, microtime());
2962 $this->finalise($endresult);
2967 * Swaps out spaces and tabs for HTML indentation. Not needed if
2968 * the code is in a pre block...
2970 * @param string The source to indent (reference!)
2974 function indent(&$result) {
2975 /// Replace tabs with the correct number of spaces
2976 if (false !== strpos($result, "\t")) {
2977 $lines = explode("\n", $result);
2978 $result = null;//Save memory while we process the lines individually
2979 $tab_width = $this->get_real_tab_width();
2980 $tab_string = ' ' . str_repeat(' ', $tab_width);
2982 for ($key = 0, $n = count($lines); $key < $n; $key++) {
2983 $line = $lines[$key];
2984 if (false === strpos($line, "\t")) {
2989 $length = strlen($line);
2990 $lines[$key] = ''; // reduce memory
2993 for ($i = 0; $i < $length; ++$i) {
2995 // Simple engine to work out whether we're in a tag.
2996 // If we are we modify $pos. This is so we ignore HTML
2997 // in the line and only workout the tab replacement
2998 // via the actual content of the string
2999 // This test could be improved to include strings in the
3000 // html so that < or > would be allowed in user's styles
3001 // (e.g. quotes: '<' '>'; or similar)
3006 $lines[$key] .= $char;
3007 } else if ('<' == $char) {
3009 $lines[$key] .= '<';
3010 } else if ('&' == $char) {
3011 $substr = substr($line, $i + 3, 5);
3012 $posi = strpos($substr, ';');
3013 if (false === $posi) {
3018 $lines[$key] .= $char;
3019 } else if ("\t" == $char) {
3021 // OPTIMISE - move $strs out. Make an array:
3025 // 3 => ' ' etc etc
3026 // to use instead of building a string every time
3027 $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
3028 if (($pos & 1) || 1 == $tab_end_width) {
3029 $str .= substr($tab_string, 6, $tab_end_width);
3031 $str .= substr($tab_string, 0, $tab_end_width+5);
3033 $lines[$key] .= $str;
3034 $pos += $tab_end_width;
3036 if (false === strpos($line, "\t", $i + 1)) {
3037 $lines[$key] .= substr($line, $i + 1);
3040 } else if (0 == $pos && ' ' == $char) {
3041 $lines[$key] .= ' ';
3044 $lines[$key] .= $char;
3049 $result = implode("\n", $lines);
3050 unset($lines);//We don't need the lines separated beyond this --- free them!
3053 // BenBE: Fix to reduce the number of replacements to be done
3054 $result = preg_replace('/^ /m', ' ', $result);
3055 $result = str_replace(' ', ' ', $result);
3057 if ($this->line_numbers == GESHI_NO_LINE_NUMBERS) {
3058 if ($this->line_ending === null) {
3059 $result = nl2br($result);
3061 $result = str_replace("\n", $this->line_ending, $result);
3067 * Changes the case of a keyword for those languages where a change is asked for
3069 * @param string The keyword to change the case of
3070 * @return string The keyword with its case changed
3074 function change_case($instr) {
3075 switch ($this->language_data['CASE_KEYWORDS']) {
3076 case GESHI_CAPS_UPPER:
3077 return strtoupper($instr);
3078 case GESHI_CAPS_LOWER:
3079 return strtolower($instr);
3086 * Handles replacements of keywords to include markup and links if requested
3088 * @param string The keyword to add the Markup to
3089 * @return The HTML for the match found
3093 * @todo Get rid of ender in keyword links
3095 function handle_keyword_replace($match) {
3096 $k = $this->_kw_replace_group;
3097 $keyword = $match[0];
3102 if ($this->keyword_links) {
3103 // Keyword links have been ebabled
3105 if (isset($this->language_data['URLS'][$k]) &&
3106 $this->language_data['URLS'][$k] != '') {
3107 // There is a base group for this keyword
3109 // Old system: strtolower
3110 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3111 // New system: get keyword from language file to get correct case
3112 if (!$this->language_data['CASE_SENSITIVE'][$k] &&
3113 strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
3114 foreach ($this->language_data['KEYWORDS'][$k] as $word) {
3115 if (strcasecmp($word, $keyword) == 0) {
3123 $before = '<|UR1|"' .
3131 str_replace('+', '%20', urlencode($this->hsc($word))),
3132 str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
3133 str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
3135 $this->language_data['URLS'][$k]
3141 return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
3145 * handles regular expressions highlighting-definitions with callback functions
3147 * @note this is a callback, don't use it directly
3149 * @param array the matches array
3150 * @return The highlighted string
3154 function handle_regexps_callback($matches) {
3155 // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
3156 return ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
3160 * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
3162 * @note this is a callback, don't use it directly
3164 * @param array the matches array
3169 function handle_multiline_regexps($matches) {
3170 $before = $this->_hmr_before;
3171 $after = $this->_hmr_after;
3172 if ($this->_hmr_replace) {
3173 $replace = $this->_hmr_replace;
3176 foreach (array_keys($matches) as $k) {
3177 $search[] = '\\' . $k;
3180 $before = str_replace($search, $matches, $before);
3181 $after = str_replace($search, $matches, $after);
3182 $replace = str_replace($search, $matches, $replace);
3184 $replace = $matches[0];
3187 . '<|!REG3XP' . $this->_hmr_key .'!>'
3188 . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
3194 * Takes a string that has no strings or comments in it, and highlights
3195 * stuff like keywords, numbers and methods.
3197 * @param string The string to parse for keyword, numbers etc.
3200 * @todo BUGGY! Why? Why not build string and return?
3202 function parse_non_string_part($stuff_to_parse) {
3203 $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3205 // Regular expressions
3206 foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
3207 if ($this->lexic_permissions['REGEXPS'][$key]) {
3208 if (is_array($regexp)) {
3209 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3210 // produce valid HTML when we match multiple lines
3211 $this->_hmr_replace = $regexp[GESHI_REPLACE];
3212 $this->_hmr_before = $regexp[GESHI_BEFORE];
3213 $this->_hmr_key = $key;
3214 $this->_hmr_after = $regexp[GESHI_AFTER];
3215 $stuff_to_parse = preg_replace_callback(
3216 "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
3217 array($this, 'handle_multiline_regexps'),
3219 $this->_hmr_replace = false;
3220 $this->_hmr_before = '';
3221 $this->_hmr_after = '';
3223 $stuff_to_parse = preg_replace(
3224 '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
3225 $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
3229 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3230 // produce valid HTML when we match multiple lines
3231 $this->_hmr_key = $key;
3232 $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3233 array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3234 $this->_hmr_key = '';
3236 $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
3242 // Highlight numbers. As of 1.0.8 we support diffent types of numbers
3243 $numbers_found = false;
3244 if ($this->lexic_permissions['NUMBERS'] && preg_match('#\d#', $stuff_to_parse )) {
3245 $numbers_found = true;
3247 //For each of the formats ...
3248 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3249 //Check if it should be highlighted ...
3250 $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
3254 // Highlight keywords
3255 $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#;>|^&";
3256 $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
3257 if ($this->lexic_permissions['STRINGS']) {
3258 $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
3259 $disallowed_before .= $quotemarks;
3260 $disallowed_after .= $quotemarks;
3262 $disallowed_before .= "])";
3263 $disallowed_after .= "])";
3265 $parser_control_pergroup = false;
3266 if (isset($this->language_data['PARSER_CONTROL'])) {
3267 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
3268 $x = 0; // check wether per-keyword-group parser_control is enabled
3269 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3270 $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3273 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3274 $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3277 $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3281 // if this is changed, don't forget to change it below
3282 // if (!empty($disallowed_before)) {
3283 // $disallowed_before = "(?<![$disallowed_before])";
3285 // if (!empty($disallowed_after)) {
3286 // $disallowed_after = "(?![$disallowed_after])";
3289 foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3290 if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
3291 $this->lexic_permissions['KEYWORDS'][$k]) {
3293 $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
3294 $modifiers = $case_sensitive ? '' : 'i';
3296 // NEW in 1.0.8 - per-keyword-group parser control
3297 $disallowed_before_local = $disallowed_before;
3298 $disallowed_after_local = $disallowed_after;
3299 if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
3300 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3301 $disallowed_before_local =
3302 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3305 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3306 $disallowed_after_local =
3307 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3311 $this->_kw_replace_group = $k;
3313 //NEW in 1.0.8, the cached regexp list
3314 // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3315 for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set < $set_length; ++$set) {
3316 $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
3317 // Might make a more unique string for putting the number in soon
3318 // Basically, we don't put the styles in yet because then the styles themselves will
3319 // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3320 $stuff_to_parse = preg_replace_callback(
3321 "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php))$disallowed_after_local/$modifiers",
3322 array($this, 'handle_keyword_replace'),
3330 // Now that's all done, replace /[number]/ with the correct styles
3332 foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3333 if (!$this->use_classes) {
3334 $attributes = ' style="' .
3335 (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
3336 $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
3338 $attributes = ' class="kw' . $k . '"';
3340 $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
3343 if ($numbers_found) {
3344 // Put number styles in
3345 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3346 //Commented out for now, as this needs some review ...
3347 // if ($numbers_permissions & $id) {
3348 //Get the appropriate style ...
3349 //Checking for unset styles is done by the style cache builder ...
3350 if (!$this->use_classes) {
3351 $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
3353 $attributes = ' class="nu'.$id.'"';
3356 //Set in the correct styles ...
3357 $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3362 // Highlight methods and fields in objects
3363 if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
3364 $oolang_spaces = "[\s]*";
3365 $oolang_before = "";
3366 $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3367 if (isset($this->language_data['PARSER_CONTROL'])) {
3368 if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
3369 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3370 $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3372 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3373 $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3375 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3376 $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3381 foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
3382 if (false !== strpos($stuff_to_parse, $splitter)) {
3383 if (!$this->use_classes) {
3384 $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
3386 $attributes = ' class="me' . $key . '"';
3388 $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
3394 // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3395 // You try it, and see what happens ;)
3396 // TODO: Fix lexic permissions not converting entities if shouldn't
3397 // be highlighting regardless
3399 if ($this->lexic_permissions['BRACKETS']) {
3400 $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
3401 $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3405 //FIX for symbol highlighting ...
3406 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
3407 //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3408 $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
3410 for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
3411 $symbol_match = $pot_symbols[$s_id][0][0];
3412 if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
3413 // already highlighted blocks _must_ include either < or >
3414 // so if this conditional applies, we have to skip this match
3415 // BenBE: UNLESS the block contains <SEMI> or <PIPE>
3416 if(strpos($symbol_match, '<SEMI>') === false &&
3417 strpos($symbol_match, '<PIPE>') === false) {
3422 // if we reach this point, we have a valid match which needs to be highlighted
3424 $symbol_length = strlen($symbol_match);
3425 $symbol_offset = $pot_symbols[$s_id][0][1];
3426 unset($pot_symbols[$s_id]);
3427 $symbol_end = $symbol_length + $symbol_offset;
3430 // if we have multiple styles, we have to handle them properly
3431 if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
3433 // Split the current stuff to replace into its atomic symbols ...
3434 preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
3435 foreach ($sym_match_syms[0] as $sym_ms) {
3436 //Check if consequtive symbols belong to the same group to save output ...
3437 if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
3438 && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3439 if (-1 != $old_sym) {
3442 $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
3443 if (!$this->use_classes) {
3444 $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
3446 $symbol_hl .= '<| class="sy' . $old_sym . '">';
3449 $symbol_hl .= $sym_ms;
3451 unset($sym_match_syms);
3453 //Close remaining tags and insert the replacement at the right position ...
3454 //Take caution if symbol_hl is empty to avoid doubled closing spans.
3455 if (-1 != $old_sym) {
3459 if (!$this->use_classes) {
3460 $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
3462 $symbol_hl = '<| class="sy0">';
3464 $symbol_hl .= $symbol_match . '|>';
3467 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
3469 // since we replace old text with something of different size,
3470 // we'll have to keep track of the differences
3471 $global_offset += strlen($symbol_hl) - $symbol_length;
3474 //FIX for symbol highlighting ...
3476 // Add class/style for regexps
3477 foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3478 if ($this->lexic_permissions['REGEXPS'][$key]) {
3479 if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
3480 $this->_rx_key = $key;
3481 $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
3482 array($this, 'handle_regexps_callback'),
3485 if (!$this->use_classes) {
3486 $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
3488 if (is_array($this->language_data['REGEXPS'][$key]) &&
3489 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
3490 $attributes = ' class="' .
3491 $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
3493 $attributes = ' class="re' . $key . '"';
3496 $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3501 // Replace <DOT> with . for urls
3502 $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3503 // Replace <|UR1| with <a href= for urls also
3504 if (isset($this->link_styles[GESHI_LINK])) {
3505 if ($this->use_classes) {
3506 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3508 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
3511 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3515 // NOW we add the span thingy ;)
3518 $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
3519 $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
3520 return substr($stuff_to_parse, 1);
3524 * Sets the time taken to parse the code
3526 * @param microtime The time when parsing started
3527 * @param microtime The time when parsing ended
3531 function set_time($start_time, $end_time) {
3532 $start = explode(' ', $start_time);
3533 $end = explode(' ', $end_time);
3534 $this->time = $end[0] + $end[1] - $start[0] - $start[1];
3538 * Gets the time taken to parse the code
3540 * @return double The time taken to parse the code
3543 function get_time() {
3548 * Merges arrays recursively, overwriting values of the first array with values of later arrays
3553 function merge_arrays() {
3554 $arrays = func_get_args();
3555 $narrays = count($arrays);
3558 // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3559 for ($i = 0; $i < $narrays; $i ++) {
3560 if (!is_array($arrays[$i])) {
3561 // also array_merge_recursive returns nothing in this case
3562 trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
3567 // the first array is in the output set in every case
3570 // merege $ret with the remaining arrays
3571 for ($i = 1; $i < $narrays; $i ++) {
3572 foreach ($arrays[$i] as $key => $value) {
3573 if (is_array($value) && isset($ret[$key])) {
3574 // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3575 // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3576 $ret[$key] = $this->merge_arrays($ret[$key], $value);
3578 $ret[$key] = $value;
3587 * Gets language information and stores it for later use
3589 * @param string The filename of the language file you want to load
3592 * @todo Needs to load keys for lexic permissions for keywords, regexps etc
3594 function load_language($file_name) {
3595 if ($file_name == $this->loaded_language) {
3596 // this file is already loaded!
3600 //Prepare some stuff before actually loading the language file
3601 $this->loaded_language = $file_name;
3602 $this->parse_cache_built = false;
3603 $this->enable_highlighting();
3604 $language_data = array();
3606 //Load the language file
3609 // Perhaps some checking might be added here later to check that
3610 // $language data is a valid thing but maybe not
3611 $this->language_data = $language_data;
3613 // Set strict mode if should be set
3614 $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
3616 // Set permissions for all lexics to true
3617 // so they'll be highlighted by default
3618 foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
3619 if (!empty($this->language_data['KEYWORDS'][$key])) {
3620 $this->lexic_permissions['KEYWORDS'][$key] = true;
3622 $this->lexic_permissions['KEYWORDS'][$key] = false;
3626 foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
3627 $this->lexic_permissions['COMMENTS'][$key] = true;
3629 foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3630 $this->lexic_permissions['REGEXPS'][$key] = true;
3633 // for BenBE and future code reviews:
3634 // we can use empty here since we only check for existance and emptiness of an array
3635 // if it is not an array at all but rather false or null this will work as intended as well
3636 // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3637 if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3638 foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3639 // it's either true or false and maybe is true as well
3640 $perm = $value !== GESHI_NEVER;
3641 if ($flag == 'ALL') {
3642 $this->enable_highlighting($perm);
3645 if (!isset($this->lexic_permissions[$flag])) {
3646 // unknown lexic permission
3649 if (is_array($this->lexic_permissions[$flag])) {
3650 foreach ($this->lexic_permissions[$flag] as $key => $val) {
3651 $this->lexic_permissions[$flag][$key] = $perm;
3654 $this->lexic_permissions[$flag] = $perm;
3657 unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
3660 //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
3661 //You need to set one for HARDESCAPES only in this case.
3662 if(!isset($this->language_data['HARDCHAR'])) {
3663 $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
3666 //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3667 $style_filename = substr($file_name, 0, -4) . '.style.php';
3668 if (is_readable($style_filename)) {
3669 //Clear any style_data that could have been set before ...
3670 if (isset($style_data)) {
3674 //Read the Style Information from the style file
3675 include $style_filename;
3677 //Apply the new styles to our current language styles
3678 if (isset($style_data) && is_array($style_data)) {
3679 $this->language_data['STYLES'] =
3680 $this->merge_arrays($this->language_data['STYLES'], $style_data);
3686 * Takes the parsed code and various options, and creates the HTML
3687 * surrounding it to make it look nice.
3689 * @param string The code already parsed (reference!)
3693 function finalise(&$parsed_code) {
3694 // Remove end parts of important declarations
3695 // This is BUGGY!! My fault for bad code: fix coming in 1.2
3696 // @todo Remove this crap
3697 if ($this->enable_important_blocks &&
3698 (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
3699 $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
3702 // Add HTML whitespace stuff if we're using the <div> header
3703 if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
3704 $this->indent($parsed_code);
3707 // purge some unnecessary stuff
3708 /** NOTE: memorypeak #1 */
3709 $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3711 // If we are using IDs for line numbers, there needs to be an overall
3712 // ID set to prevent collisions.
3713 if ($this->add_ids && !$this->overall_id) {
3714 $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
3717 // Get code into lines
3718 /** NOTE: memorypeak #2 */
3719 $code = explode("\n", $parsed_code);
3720 $parsed_code = $this->header();
3722 // If we're using line numbers, we insert <li>s and appropriate
3723 // markup to style them (otherwise we don't need to do anything)
3724 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3725 // If we're using the <pre> header, we shouldn't add newlines because
3726 // the <pre> will line-break them (and the <li>s already do this for us)
3727 $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
3729 // Set vars to defaults for following loop
3733 for ($i = 0, $n = count($code); $i < $n;) {
3734 //Reset the attributes for a new line ...
3737 // Make lines have at least one space in them if they're empty
3738 // BenBE: Checking emptiness using trim instead of relying on blanks
3739 if ('' == trim($code[$i])) {
3740 $code[$i] = ' ';
3743 // If this is a "special line"...
3744 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3745 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3746 // Set the attributes to style the line
3747 if ($this->use_classes) {
3748 //$attr = ' class="li2"';
3749 $attrs['class'][] = 'li2';
3750 $def_attr = ' class="de2"';
3752 //$attr = ' style="' . $this->line_style2 . '"';
3753 $attrs['style'][] = $this->line_style2;
3754 // This style "covers up" the special styles set for special lines
3755 // so that styles applied to special lines don't apply to the actual
3756 // code on that line
3757 $def_attr = ' style="' . $this->code_style . '"';
3760 if ($this->use_classes) {
3761 //$attr = ' class="li1"';
3762 $attrs['class'][] = 'li1';
3763 $def_attr = ' class="de1"';
3765 //$attr = ' style="' . $this->line_style1 . '"';
3766 $attrs['style'][] = $this->line_style1;
3767 $def_attr = ' style="' . $this->code_style . '"';
3771 //Check which type of tag to insert for this line
3772 if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3773 $start = "<pre$def_attr>";
3777 $start = "<div$def_attr>";
3783 // Are we supposed to use ids? If so, add them
3784 if ($this->add_ids) {
3785 $attrs['id'][] = "$this->overall_id-$i";
3788 //Is this some line with extra styles???
3789 if (in_array($i, $this->highlight_extra_lines)) {
3790 if ($this->use_classes) {
3791 if (isset($this->highlight_extra_lines_styles[$i])) {
3792 $attrs['class'][] = "lx$i";
3794 $attrs['class'][] = "ln-xtra";
3797 array_push($attrs['style'], $this->get_line_style($i));
3801 // Add in the line surrounded by appropriate list HTML
3803 foreach ($attrs as $key => $attr) {
3804 $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3807 $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3808 unset($code[$i - 1]);
3812 if ($this->use_classes) {
3813 $attributes = ' class="de1"';
3815 $attributes = ' style="'. $this->code_style .'"';
3817 if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3818 $parsed_code .= '<pre'. $attributes .'>';
3819 } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3820 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3821 if ($this->use_classes) {
3822 $attrs = ' class="ln"';
3824 $attrs = ' style="'. $this->table_linenumber_style .'"';
3826 $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3828 // we don't merge it with the for below, since it should be better for
3829 // memory consumption this way
3830 // @todo: but... actually it would still be somewhat nice to merge the two loops
3831 // the mem peaks are at different positions
3832 for ($i = 0; $i < $n; ++$i) {
3835 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3836 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3837 // Set the attributes to style the line
3838 if ($this->use_classes) {
3839 $parsed_code .= '<span class="xtra li2"><span class="de2">';
3841 // This style "covers up" the special styles set for special lines
3842 // so that styles applied to special lines don't apply to the actual
3843 // code on that line
3844 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3845 .'<span style="' . $this->code_style .'">';
3849 //Is this some line with extra styles???
3850 if (in_array($i + 1, $this->highlight_extra_lines)) {
3851 if ($this->use_classes) {
3852 if (isset($this->highlight_extra_lines_styles[$i])) {
3853 $parsed_code .= "<span class=\"xtra lx$i\">";
3855 $parsed_code .= "<span class=\"xtra ln-xtra\">";
3858 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3862 $parsed_code .= $this->line_numbers_start + $i;
3864 $parsed_code .= str_repeat('</span>', $close);
3865 } else if ($i != $n) {
3866 $parsed_code .= "\n";
3869 $parsed_code .= '</pre></td><td'.$attributes.'>';
3871 $parsed_code .= '<pre'. $attributes .'>';
3873 // No line numbers, but still need to handle highlighting lines extra.
3874 // Have to use divs so the full width of the code is highlighted
3876 for ($i = 0; $i < $n; ++$i) {
3877 // Make lines have at least one space in them if they're empty
3878 // BenBE: Checking emptiness using trim instead of relying on blanks
3879 if ('' == trim($code[$i])) {
3880 $code[$i] = ' ';
3883 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3884 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3885 // Set the attributes to style the line
3886 if ($this->use_classes) {
3887 $parsed_code .= '<span class="xtra li2"><span class="de2">';
3889 // This style "covers up" the special styles set for special lines
3890 // so that styles applied to special lines don't apply to the actual
3891 // code on that line
3892 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3893 .'<span style="' . $this->code_style .'">';
3897 //Is this some line with extra styles???
3898 if (in_array($i + 1, $this->highlight_extra_lines)) {
3899 if ($this->use_classes) {
3900 if (isset($this->highlight_extra_lines_styles[$i])) {
3901 $parsed_code .= "<span class=\"xtra lx$i\">";
3903 $parsed_code .= "<span class=\"xtra ln-xtra\">";
3906 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3911 $parsed_code .= $code[$i];
3914 $parsed_code .= str_repeat('</span>', $close);
3917 elseif ($i + 1 < $n) {
3918 $parsed_code .= "\n";
3923 if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
3924 $parsed_code .= '</pre>';
3926 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3927 $parsed_code .= '</td>';
3931 $parsed_code .= $this->footer();
3935 * Creates the header for the code block (with correct attributes)
3937 * @return string The header for the code block
3942 // Get attributes needed
3944 * @todo Document behaviour change - class is outputted regardless of whether
3945 * we're using classes or not. Same with style
3947 $attributes = ' class="' . $this->language;
3948 if ($this->overall_class != '') {
3949 $attributes .= " ".$this->overall_class;
3953 if ($this->overall_id != '') {
3954 $attributes .= " id=\"{$this->overall_id}\"";
3956 if ($this->overall_style != '') {
3957 $attributes .= ' style="' . $this->overall_style . '"';
3960 $ol_attributes = '';
3962 if ($this->line_numbers_start != 1) {
3963 $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
3966 // Get the header HTML
3967 $header = $this->header_content;
3969 if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
3970 $header = str_replace("\n", '', $header);
3972 $header = $this->replace_keywords($header);
3974 if ($this->use_classes) {
3975 $attr = ' class="head"';
3977 $attr = " style=\"{$this->header_content_style}\"";
3979 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3980 $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
3982 $header = "<div$attr>$header</div>";
3986 if (GESHI_HEADER_NONE == $this->header_type) {
3987 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3988 return "$header<ol$attributes$ol_attributes>";
3990 return $header . ($this->force_code_block ? '<div>' : '');
3993 // Work out what to return and do it
3994 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3995 if ($this->header_type == GESHI_HEADER_PRE) {
3996 return "<pre$attributes>$header<ol$ol_attributes>";
3997 } else if ($this->header_type == GESHI_HEADER_DIV ||
3998 $this->header_type == GESHI_HEADER_PRE_VALID) {
3999 return "<div$attributes>$header<ol$ol_attributes>";
4000 } else if ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4001 return "<table$attributes>$header<tbody><tr class=\"li1\">";
4004 if ($this->header_type == GESHI_HEADER_PRE) {
4005 return "<pre$attributes>$header" .
4006 ($this->force_code_block ? '<div>' : '');
4008 return "<div$attributes>$header" .
4009 ($this->force_code_block ? '<div>' : '');
4015 * Returns the footer for the code block.
4017 * @return string The footer for the code block
4022 $footer = $this->footer_content;
4024 if ($this->header_type == GESHI_HEADER_PRE) {
4025 $footer = str_replace("\n", '', $footer);;
4027 $footer = $this->replace_keywords($footer);
4029 if ($this->use_classes) {
4030 $attr = ' class="foot"';
4032 $attr = " style=\"{$this->footer_content_style}\"";
4034 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4035 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
4037 $footer = "<div$attr>$footer</div>";
4041 if (GESHI_HEADER_NONE == $this->header_type) {
4042 return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
4045 if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
4046 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4047 return "</ol>$footer</div>";
4049 return ($this->force_code_block ? '</div>' : '') .
4052 elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4053 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4054 return "</tr></tbody>$footer</table>";
4056 return ($this->force_code_block ? '</div>' : '') .
4060 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4061 return "</ol>$footer</pre>";
4063 return ($this->force_code_block ? '</div>' : '') .
4069 * Replaces certain keywords in the header and footer with
4070 * certain configuration values
4072 * @param string The header or footer content to do replacement on
4073 * @return string The header or footer with replaced keywords
4077 function replace_keywords($instr) {
4078 $keywords = $replacements = array();
4080 $keywords[] = '<TIME>';
4081 $keywords[] = '{TIME}';
4082 $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4084 $keywords[] = '<LANGUAGE>';
4085 $keywords[] = '{LANGUAGE}';
4086 $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
4088 $keywords[] = '<VERSION>';
4089 $keywords[] = '{VERSION}';
4090 $replacements[] = $replacements[] = GESHI_VERSION;
4092 $keywords[] = '<SPEED>';
4093 $keywords[] = '{SPEED}';
4097 $speed = strlen($this->source) / $time;
4098 if ($speed >= 1024) {
4099 $speed = sprintf("%.2f KB/s", $speed / 1024.0);
4101 $speed = sprintf("%.0f B/s", $speed);
4104 $replacements[] = $replacements[] = $speed;
4106 return str_replace($keywords, $replacements, $instr);
4110 * Secure replacement for PHP built-in function htmlspecialchars().
4112 * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
4113 * for this replacement function.
4115 * The INTERFACE for this function is almost the same as that for
4116 * htmlspecialchars(), with the same default for quote style; however, there
4117 * is no 'charset' parameter. The reason for this is as follows:
4120 * "The third argument charset defines character set used in conversion."
4122 * I suspect PHP's htmlspecialchars() is working at the byte-value level and
4123 * thus _needs_ to know (or asssume) a character set because the special
4124 * characters to be replaced could exist at different code points in
4125 * different character sets. (If indeed htmlspecialchars() works at
4126 * byte-value level that goes some way towards explaining why the
4127 * vulnerability would exist in this function, too, and not only in
4128 * htmlentities() which certainly is working at byte-value level.)
4130 * This replacement function however works at character level and should
4131 * therefore be "immune" to character set differences - so no charset
4132 * parameter is needed or provided. If a third parameter is passed, it will
4133 * be silently ignored.
4135 * In the OUTPUT there is a minor difference in that we use ''' instead
4136 * of PHP's ''' for a single quote: this provides compatibility with
4137 * get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
4138 * (see comment by mikiwoz at yahoo dot co dot uk on
4139 * http://php.net/htmlspecialchars); it also matches the entity definition
4141 * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
4142 * Like PHP we use a numeric character reference instead of ''' for the
4143 * single quote. For the other special characters we use the named entity
4144 * references, as PHP is doing.
4146 * @author {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
4148 * @license http://www.gnu.org/copyleft/lgpl.html
4149 * GNU Lesser General Public License
4150 * @copyright Copyright 2007, {@link http://wikkawiki.org/CreditsPage
4151 * Wikka Development Team}
4154 * @param string $string string to be converted
4155 * @param integer $quote_style
4156 * - ENT_COMPAT: escapes &, <, > and double quote (default)
4157 * - ENT_NOQUOTES: escapes only &, < and >
4158 * - ENT_QUOTES: escapes &, <, >, double and single quotes
4159 * @return string converted string
4162 function hsc($string, $quote_style = ENT_COMPAT) {
4164 static $aTransSpecchar = array(
4170 //This fix is related to SF#1923020, but has to be applied
4171 //regardless of actually highlighting symbols.
4173 //Circumvent a bug with symbol highlighting
4174 //This is required as ; would produce undesirable side-effects if it
4175 //was not to be processed as an entity.
4176 ';' => '<SEMI>', // Force ; to be processed as entity
4177 '|' => '<PIPE>' // Force | to be processed as entity
4178 ); // ENT_COMPAT set
4180 switch ($quote_style) {
4181 case ENT_NOQUOTES: // don't convert double quotes
4182 unset($aTransSpecchar['"']);
4184 case ENT_QUOTES: // convert single quotes as well
4185 $aTransSpecchar["'"] = '''; // (apos) htmlspecialchars() uses '''
4189 // return translated string
4190 return strtr($string, $aTransSpecchar);
4194 * Returns a stylesheet for the highlighted code. If $economy mode
4195 * is true, we only return the stylesheet declarations that matter for
4196 * this code block instead of the whole thing
4198 * @param boolean Whether to use economy mode or not
4199 * @return string A stylesheet built on the data for the current language
4202 function get_stylesheet($economy_mode = true) {
4203 // If there's an error, chances are that the language file
4204 // won't have populated the language data file, so we can't
4205 // risk getting a stylesheet...
4210 //Check if the style rearrangements have been processed ...
4211 //This also does some preprocessing to check which style groups are useable ...
4212 if(!isset($this->language_data['NUMBERS_CACHE'])) {
4213 $this->build_style_cache();
4216 // First, work out what the selector should be. If there's an ID,
4217 // that should be used, the same for a class. Otherwise, a selector
4218 // of '' means that these styles will be applied anywhere
4219 if ($this->overall_id) {
4220 $selector = '#' . $this->overall_id;
4222 $selector = '.' . $this->language;
4223 if ($this->overall_class) {
4224 $selector .= '.' . $this->overall_class;
4229 // Header of the stylesheet
4230 if (!$economy_mode) {
4231 $stylesheet = "/**\n".
4232 " * GeSHi Dynamically Generated Stylesheet\n".
4233 " * --------------------------------------\n".
4234 " * Dynamically generated stylesheet for {$this->language}\n".
4235 " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4236 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4237 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4238 " * --------------------------------------\n".
4241 $stylesheet = "/**\n".
4242 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4243 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4247 // Set the <ol> to have no effect at all if there are line numbers
4248 // (<ol>s have margins that should be destroyed so all layout is
4249 // controlled by the set_overall_style method, which works on the
4250 // <pre> or <div> container). Additionally, set default styles for lines
4251 if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4252 //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4253 $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4256 // Add overall styles
4257 // note: neglect economy_mode, empty styles are meaningless
4258 if ($this->overall_style != '') {
4259 $stylesheet .= "$selector {{$this->overall_style}}\n";
4262 // Add styles for links
4263 // note: economy mode does not make _any_ sense here
4264 // either the style is empty and thus no selector is needed
4265 // or the appropriate key is given.
4266 foreach ($this->link_styles as $key => $style) {
4270 $stylesheet .= "{$selector}a:link {{$style}}\n";
4273 $stylesheet .= "{$selector}a:hover {{$style}}\n";
4276 $stylesheet .= "{$selector}a:active {{$style}}\n";
4279 $stylesheet .= "{$selector}a:visited {{$style}}\n";
4285 // Header and footer
4286 // note: neglect economy_mode, empty styles are meaningless
4287 if ($this->header_content_style != '') {
4288 $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4290 if ($this->footer_content_style != '') {
4291 $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4294 // Styles for important stuff
4295 // note: neglect economy_mode, empty styles are meaningless
4296 if ($this->important_styles != '') {
4297 $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4300 // Simple line number styles
4301 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
4302 $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4304 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
4305 $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4307 // If there is a style set for fancy line numbers, echo it out
4308 if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
4309 $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4312 // note: empty styles are meaningless
4313 foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
4314 if ($styles != '' && (!$economy_mode ||
4315 (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
4316 $this->lexic_permissions['KEYWORDS'][$group]))) {
4317 $stylesheet .= "$selector.kw$group {{$styles}}\n";
4320 foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
4321 if ($styles != '' && (!$economy_mode ||
4322 (isset($this->lexic_permissions['COMMENTS'][$group]) &&
4323 $this->lexic_permissions['COMMENTS'][$group]) ||
4324 (!empty($this->language_data['COMMENT_REGEXP']) &&
4325 !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
4326 $stylesheet .= "$selector.co$group {{$styles}}\n";
4329 foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4330 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
4331 // NEW: since 1.0.8 we have to handle hardescapes
4332 if ($group === 'HARD') {
4335 $stylesheet .= "$selector.es$group {{$styles}}\n";
4338 foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
4339 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
4340 $stylesheet .= "$selector.br$group {{$styles}}\n";
4343 foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
4344 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
4345 $stylesheet .= "$selector.sy$group {{$styles}}\n";
4348 foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
4349 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
4350 // NEW: since 1.0.8 we have to handle hardquotes
4351 if ($group === 'HARD') {
4354 $stylesheet .= "$selector.st$group {{$styles}}\n";
4357 foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
4358 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
4359 $stylesheet .= "$selector.nu$group {{$styles}}\n";
4362 foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
4363 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
4364 $stylesheet .= "$selector.me$group {{$styles}}\n";
4367 // note: neglect economy_mode, empty styles are meaningless
4368 foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
4369 if ($styles != '') {
4370 $stylesheet .= "$selector.sc$group {{$styles}}\n";
4373 foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
4374 if ($styles != '' && (!$economy_mode ||
4375 (isset($this->lexic_permissions['REGEXPS'][$group]) &&
4376 $this->lexic_permissions['REGEXPS'][$group]))) {
4377 if (is_array($this->language_data['REGEXPS'][$group]) &&
4378 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
4379 $stylesheet .= "$selector.";
4380 $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
4381 $stylesheet .= " {{$styles}}\n";
4383 $stylesheet .= "$selector.re$group {{$styles}}\n";
4387 // Styles for lines being highlighted extra
4388 if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
4389 $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4391 $stylesheet .= "{$selector}span.xtra { display:block; }\n";
4392 foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
4393 $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4400 * Get's the style that is used for the specified line
4402 * @param int The line number information is requested for
4406 function get_line_style($line) {
4409 if (isset($this->highlight_extra_lines_styles[$line])) {
4410 $style = $this->highlight_extra_lines_styles[$line];
4411 } else { // if no "extra" style assigned
4412 $style = $this->highlight_extra_lines_style;
4419 * this functions creates an optimized regular expression list
4420 * of an array of strings.
4423 * <code>$list = array('faa', 'foo', 'foobar');
4424 * => string 'f(aa|oo(bar)?)'</code>
4426 * @param $list array of (unquoted) strings
4427 * @param $regexp_delimiter your regular expression delimiter, @see preg_quote()
4428 * @return string for regular expression
4429 * @author Milian Wolff <mail@milianw.de>
4433 function optimize_regexp_list($list, $regexp_delimiter = '/') {
4434 $regex_chars = array('.', '\\', '+', '*', '?', '[', '^', ']', '$',
4435 '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
4437 $regexp_list = array('');
4438 $num_subpatterns = 0;
4441 // the tokens which we will use to generate the regexp list
4443 $prev_keys = array();
4444 // go through all entries of the list and generate the token list
4446 for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
4447 if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
4448 // seems like the length of this pcre is growing exorbitantly
4449 $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4450 $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4455 $entry = preg_quote((string) $list[$i], $regexp_delimiter);
4456 $pointer = &$tokens;
4457 // properly assign the new entry to the correct position in the token array
4458 // possibly generate smaller common denominator keys
4460 // get the common denominator
4461 if (isset($prev_keys[$level])) {
4462 if ($prev_keys[$level] == $entry) {
4463 // this is a duplicate entry, skip it
4467 while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4468 && $entry[$char] == $prev_keys[$level][$char]) {
4472 // this entry has at least some chars in common with the current key
4473 if ($char == strlen($prev_keys[$level])) {
4474 // current key is totally matched, i.e. this entry has just some bits appended
4475 $pointer = &$pointer[$prev_keys[$level]];
4477 // only part of the keys match
4478 $new_key_part1 = substr($prev_keys[$level], 0, $char);
4479 $new_key_part2 = substr($prev_keys[$level], $char);
4481 if (in_array($new_key_part1[0], $regex_chars)
4482 || in_array($new_key_part2[0], $regex_chars)) {
4483 // this is bad, a regex char as first character
4484 $pointer[$entry] = array('' => true);
4485 array_splice($prev_keys, $level, count($prev_keys), $entry);
4486 $cur_len += strlen($entry);
4489 // relocate previous tokens
4490 $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4491 unset($pointer[$prev_keys[$level]]);
4492 $pointer = &$pointer[$new_key_part1];
4493 // recreate key index
4494 array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4495 $cur_len += strlen($new_key_part2);
4499 $entry = substr($entry, $char);
4502 // else: fall trough, i.e. no common denominator was found
4504 if ($level == 0 && !empty($tokens)) {
4505 // we can dump current tokens into the string and throw them away afterwards
4506 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4507 $new_subpatterns = substr_count($new_entry, '(?:');
4508 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
4509 $regexp_list[++$list_key] = $new_entry;
4510 $num_subpatterns = $new_subpatterns;
4512 if (!empty($regexp_list[$list_key])) {
4513 $new_entry = '|' . $new_entry;
4515 $regexp_list[$list_key] .= $new_entry;
4516 $num_subpatterns += $new_subpatterns;
4521 // no further common denominator found
4522 $pointer[$entry] = array('' => true);
4523 array_splice($prev_keys, $level, count($prev_keys), $entry);
4525 $cur_len += strlen($entry);
4530 // make sure the last tokens get converted as well
4531 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4532 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
4533 $regexp_list[++$list_key] = $new_entry;
4535 if (!empty($regexp_list[$list_key])) {
4536 $new_entry = '|' . $new_entry;
4538 $regexp_list[$list_key] .= $new_entry;
4540 return $regexp_list;
4543 * this function creates the appropriate regexp string of an token array
4544 * you should not call this function directly, @see $this->optimize_regexp_list().
4546 * @param &$tokens array of tokens
4547 * @param $recursed bool to know wether we recursed or not
4549 * @author Milian Wolff <mail@milianw.de>
4553 function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4555 foreach ($tokens as $token => $sub_tokens) {
4557 $close_entry = isset($sub_tokens['']);
4558 unset($sub_tokens['']);
4559 if (!empty($sub_tokens)) {
4560 $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4562 // make sub_tokens optional
4569 // do some optimizations
4570 // common trailing strings
4572 //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
4573 // '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4575 $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
4576 // (?:a|b|c|d|...)? => [abcd...]?
4577 // TODO: a|bb|c => [ac]|bb
4579 if (!isset($callback_2)) {
4580 $callback_2 = create_function('$matches', 'return "[" . str_replace("|", "", $matches[1]) . "]";');
4582 $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
4584 // return $list without trailing pipe
4585 return substr($list, 0, -1);
4587 } // End Class GeSHi
4590 if (!function_exists('geshi_highlight')) {
4592 * Easy way to highlight stuff. Behaves just like highlight_string
4594 * @param string The code to highlight
4595 * @param string The language to highlight the code in
4596 * @param string The path to the language files. You can leave this blank if you need
4597 * as from version 1.0.7 the path should be automatically detected
4598 * @param boolean Whether to return the result or to echo
4599 * @return string The code highlighted (if $return is true)
4602 function geshi_highlight($string, $language, $path = null, $return = false) {
4603 $geshi = new GeSHi($string, $language, $path);
4604 $geshi->set_header_type(GESHI_HEADER_NONE);
4607 return '<code>' . $geshi->parse_code() . '</code>';
4610 echo '<code>' . $geshi->parse_code() . '</code>';
4612 if ($geshi->error()) {