1 /* 2 Copyright (c) 2003-2009, CKSource - Frederico Knabben. All rights reserved. 3 For licensing, see LICENSE.html or http://ckeditor.com/license 4 */ 5 6 /** 7 * HTML text parser. 8 * @constructor 9 * @example 10 */ 11 CKEDITOR.htmlParser = function() 12 { 13 this._ = 14 { 15 htmlPartsRegex : new RegExp( '<(?:(?:\\/([^>]+)>)|(?:!--([\\S|\\s]*?)-->)|(?:([^\\s>]+)\\s*((?:(?:[^"\'>]+)|(?:"[^"]*")|(?:\'[^\']*\'))*)\\/?>))', 'g' ) 16 }; 17 }; 18 19 (function() 20 { 21 var attribsRegex = /([\w:]+)\s*=\s*(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s>]+))?/g, 22 emptyAttribs = {checked:1,compact:1,declare:1,defer:1,disabled:1,ismap:1,multiple:1,nohref:1,noresize:1,noshade:1,nowrap:1,readonly:1,selected:1}; 23 24 CKEDITOR.htmlParser.prototype = 25 { 26 /** 27 * Function to be fired when a tag opener is found. This function 28 * should be overriden when using this class. 29 * @param {String} tagName The tag name. The name is guarantted to be 30 * lowercased. 31 * @param {Object} attributes An object containing all tag attributes. Each 32 * property in this object represent and attribute name and its 33 * value is the attribute value. 34 * @param {Boolean} selfClosing true if the tag closes itself, false if the 35 * tag doesn't. 36 * @example 37 * var parser = new CKEDITOR.htmlParser(); 38 * parser.onTagOpen = function( tagName, attributes, selfClosing ) 39 * { 40 * alert( tagName ); // e.g. "b" 41 * }); 42 * parser.parse( "<!-- Example --><b>Hello</b>" ); 43 */ 44 onTagOpen : function() {}, 45 46 /** 47 * Function to be fired when a tag closer is found. This function 48 * should be overriden when using this class. 49 * @param {String} tagName The tag name. The name is guarantted to be 50 * lowercased. 51 * @example 52 * var parser = new CKEDITOR.htmlParser(); 53 * parser.onTagClose = function( tagName ) 54 * { 55 * alert( tagName ); // e.g. "b" 56 * }); 57 * parser.parse( "<!-- Example --><b>Hello</b>" ); 58 */ 59 onTagClose : function() {}, 60 61 /** 62 * Function to be fired when text is found. This function 63 * should be overriden when using this class. 64 * @param {String} text The text found. 65 * @example 66 * var parser = new CKEDITOR.htmlParser(); 67 * parser.onText = function( text ) 68 * { 69 * alert( text ); // e.g. "Hello" 70 * }); 71 * parser.parse( "<!-- Example --><b>Hello</b>" ); 72 */ 73 onText : function() {}, 74 75 /** 76 * Function to be fired when a commend is found. This function 77 * should be overriden when using this class. 78 * @param {String} comment The comment text. 79 * @example 80 * var parser = new CKEDITOR.htmlParser(); 81 * parser.onText = function( comment ) 82 * { 83 * alert( comment ); // e.g. " Example " 84 * }); 85 * parser.parse( "<!-- Example --><b>Hello</b>" ); 86 */ 87 onComment : function() {}, 88 89 /** 90 * Parses text, looking for HTML tokens, like tag openers or closers, 91 * or comments. This function fires the onTagOpen, onTagClose, onText 92 * and onComment function during its execution. 93 * @param {String} html The HTML to be parsed. 94 * @example 95 * var parser = new CKEDITOR.htmlParser(); 96 * // The onTagOpen, onTagClose, onText and onComment should be overriden 97 * // at this point. 98 * parser.parse( "<!-- Example --><b>Hello</b>" ); 99 */ 100 parse : function( html ) 101 { 102 var parts, 103 tagName, 104 nextIndex = 0; 105 106 while ( ( parts = this._.htmlPartsRegex.exec( html ) ) ) 107 { 108 var tagIndex = parts.index; 109 if ( tagIndex > nextIndex ) 110 this.onText( html.substring( nextIndex, tagIndex ) ); 111 112 nextIndex = this._.htmlPartsRegex.lastIndex; 113 114 /* 115 "parts" is an array with the following items: 116 0 : The entire match (not used) 117 1 : Group filled with the tag name for closing tags. 118 2 : Group filled with the comment text. 119 3 : Group filled with the tag name for opening tags. 120 4 : Group filled with the attributes part of opening tags. 121 */ 122 123 // Closing tag 124 if ( ( tagName = parts[ 1 ] ) ) 125 { 126 this.onTagClose( tagName.toLowerCase() ); 127 continue; 128 } 129 130 // Opening tag 131 if ( ( tagName = parts[ 3 ] ) ) 132 { 133 var attribs = {}, 134 attribMatch, 135 attribsPart = parts[ 4 ], 136 selfClosing = !!( attribsPart && attribsPart.charAt( attribsPart.length - 1 ) == '/' ); 137 138 if ( attribsPart ) 139 { 140 while ( ( attribMatch = attribsRegex.exec( attribsPart ) ) ) 141 { 142 var attName = attribMatch[1].toLowerCase(), 143 attValue = attribMatch[2] || attribMatch[3] || attribMatch[4] || ''; 144 145 if ( !attValue && emptyAttribs[ attName ] ) 146 attribs[ attName ] = attName; 147 else 148 attribs[ attName ] = attValue; 149 } 150 } 151 152 this.onTagOpen( tagName.toLowerCase(), attribs, selfClosing ); 153 continue; 154 } 155 156 // Comment 157 if( ( tagName = parts[ 2 ] ) ) 158 this.onComment( tagName ); 159 } 160 161 if ( html.length > nextIndex ) 162 this.onText( html.substring( nextIndex, html.length ) ); 163 } 164 }; 165 })(); 166