1 /*
  2 Copyright (c) 2003-2009, CKSource - Frederico Knabben. All rights reserved.
  3 For licensing, see LICENSE.html or http://ckeditor.com/license
  4 */
  5
  6 /**
  7  * HTML text parser.
  8  * @constructor
  9  * @example
 10  */
 11 CKEDITOR.htmlParser = function()
 12 {
 13 	this._ =
 14 	{
 15 		htmlPartsRegex : new RegExp( '<(?:(?:\\/([^>]+)>)|(?:!--([\\S|\\s]*?)-->)|(?:([^\\s>]+)\\s*((?:(?:[^"\'>]+)|(?:"[^"]*")|(?:\'[^\']*\'))*)\\/?>))', 'g' )
 16 	};
 17 };
 18
 19 (function()
 20 {
 21 	var attribsRegex	= /([\w:]+)\s*=\s*(?:(?:"([^"]*)")|(?:'([^']*)')|([^\s>]+))?/g,
 22 		emptyAttribs	= {checked:1,compact:1,declare:1,defer:1,disabled:1,ismap:1,multiple:1,nohref:1,noresize:1,noshade:1,nowrap:1,readonly:1,selected:1};
 23
 24 	CKEDITOR.htmlParser.prototype =
 25 	{
 26 		/**
 27 		 * Function to be fired when a tag opener is found. This function
 28 		 * should be overriden when using this class.
 29 		 * @param {String} tagName The tag name. The name is guarantted to be
 30 		 *		lowercased.
 31 		 * @param {Object} attributes An object containing all tag attributes. Each
 32 		 *		property in this object represent and attribute name and its
 33 		 *		value is the attribute value.
 34 		 * @param {Boolean} selfClosing true if the tag closes itself, false if the
 35 		 * 		tag doesn't.
 36 		 * @example
 37 		 * var parser = new CKEDITOR.htmlParser();
 38 		 * parser.onTagOpen = function( tagName, attributes, selfClosing )
 39 		 *     {
 40 		 *         alert( tagName );  // e.g. "b"
 41 		 *     });
 42 		 * parser.parse( "<!-- Example --><b>Hello</b>" );
 43 		 */
 44 		onTagOpen	: function() {},
 45
 46 		/**
 47 		 * Function to be fired when a tag closer is found. This function
 48 		 * should be overriden when using this class.
 49 		 * @param {String} tagName The tag name. The name is guarantted to be
 50 		 *		lowercased.
 51 		 * @example
 52 		 * var parser = new CKEDITOR.htmlParser();
 53 		 * parser.onTagClose = function( tagName )
 54 		 *     {
 55 		 *         alert( tagName );  // e.g. "b"
 56 		 *     });
 57 		 * parser.parse( "<!-- Example --><b>Hello</b>" );
 58 		 */
 59 		onTagClose	: function() {},
 60
 61 		/**
 62 		 * Function to be fired when text is found. This function
 63 		 * should be overriden when using this class.
 64 		 * @param {String} text The text found.
 65 		 * @example
 66 		 * var parser = new CKEDITOR.htmlParser();
 67 		 * parser.onText = function( text )
 68 		 *     {
 69 		 *         alert( text );  // e.g. "Hello"
 70 		 *     });
 71 		 * parser.parse( "<!-- Example --><b>Hello</b>" );
 72 		 */
 73 		onText		: function() {},
 74
 75 		/**
 76 		 * Function to be fired when a commend is found. This function
 77 		 * should be overriden when using this class.
 78 		 * @param {String} comment The comment text.
 79 		 * @example
 80 		 * var parser = new CKEDITOR.htmlParser();
 81 		 * parser.onText = function( comment )
 82 		 *     {
 83 		 *         alert( comment );  // e.g. " Example "
 84 		 *     });
 85 		 * parser.parse( "<!-- Example --><b>Hello</b>" );
 86 		 */
 87 		onComment	: function() {},
 88
 89 		/**
 90 		 * Parses text, looking for HTML tokens, like tag openers or closers,
 91 		 * or comments. This function fires the onTagOpen, onTagClose, onText
 92 		 * and onComment function during its execution.
 93 		 * @param {String} html The HTML to be parsed.
 94 		 * @example
 95 		 * var parser = new CKEDITOR.htmlParser();
 96 		 * // The onTagOpen, onTagClose, onText and onComment should be overriden
 97 		 * // at this point.
 98 		 * parser.parse( "<!-- Example --><b>Hello</b>" );
 99 		 */
100 		parse : function( html )
101 		{
102 			var parts,
103 				tagName,
104 				nextIndex = 0;
105
106 			while ( ( parts = this._.htmlPartsRegex.exec( html ) ) )
107 			{
108 				var tagIndex = parts.index;
109 				if ( tagIndex > nextIndex )
110 					this.onText( html.substring( nextIndex, tagIndex ) );
111
112 				nextIndex = this._.htmlPartsRegex.lastIndex;
113
114 				/*
115 				 "parts" is an array with the following items:
116 					0 : The entire match (not used)
117 					1 : Group filled with the tag name for closing tags.
118 					2 : Group filled with the comment text.
119 					3 : Group filled with the tag name for opening tags.
120 					4 : Group filled with the attributes part of opening tags.
121 				 */
122
123 				// Closing tag
124 				if ( ( tagName = parts[ 1 ] ) )
125 				{
126 					this.onTagClose( tagName.toLowerCase() );
127 					continue;
128 				}
129
130 				// Opening tag
131 				if ( ( tagName = parts[ 3 ] ) )
132 				{
133 					var attribs = {},
134 						attribMatch,
135 						attribsPart = parts[ 4 ],
136 						selfClosing = !!( attribsPart && attribsPart.charAt( attribsPart.length - 1 ) == '/' );
137
138 					if ( attribsPart )
139 					{
140 						while ( ( attribMatch = attribsRegex.exec( attribsPart ) ) )
141 						{
142 							var attName = attribMatch[1].toLowerCase(),
143 								attValue = attribMatch[2] || attribMatch[3] || attribMatch[4] || '';
144
145 							if ( !attValue && emptyAttribs[ attName ] )
146 								attribs[ attName ] = attName;
147 							else
148 								attribs[ attName ] = attValue;
149 						}
150 					}
151
152 					this.onTagOpen( tagName.toLowerCase(), attribs, selfClosing );
153 					continue;
154 				}
155
156 				// Comment
157 				if( ( tagName = parts[ 2 ] ) )
158 					this.onComment( tagName );
159 			}
160
161 			if ( html.length > nextIndex )
162 				this.onText( html.substring( nextIndex, html.length ) );
163 		}
164 	};
165 })();
166