NorKen Technologies, Inc.


Home Page of NorKen Technologies, Inc. Information About NorKen Technologies, Inc. Information About NorKen Technologies' Products Information About NorKen Technologies' Services Information About Ordering NorKen Technologies' Products How to Contact NorKen Technologies


FAQs



Free Trial

Order



Downloads

Grammars





























































































































































































HTML Parser Example

// =================================================================
// ProGrammar Grammar Definition File
// -----------------------------------------------------------------
//  
//  Markup.GDL -  Defines a general syntax for parsing documents
//                that are marked-up by tags.  This grammar accepts 
//                any tag name, as long as the beginning and ending 
//                tags match.  Specific markup languages, such as
//                HTML, can extend and constrain this grammar.
//
//  This example is intended for demonstration purposes only.
//
//  (c) Copyright 1999, 2000 NorKen Technologies, Inc.
//  All rights reserved.
//
// ==================================================================

grammar Markup 
{
	text ::= { section };

	section ::= 
		  script
		| start_tag [{ section }] [end_tag] 
		| comment 
		| doctype
		| plain_text
		;
	
	plain_text  ::= *("<");	// parse everything up to the next tag
	
	comment ::= "") "-->";
	
	doctype ::= "" ;
	
	script ::= start_script
				*(end_script) end_script ;
				
	start_script ::= "<" "script" *(">") ">" ;
	end_script   ::= "<" "/" "script" ">" ;
	
	doctype_info ::= *(">");
	
	start_tag ::= 	"<"
			element_name 
			[{ tag_attr }] 	// optional tag attributes
			">"
			;

	element_name  ::= identifier;

	tag_attr ::= attr_name ["=" attr_value_string];

	attr_name ::= '[a-zA-Z0-9-]+' ;

	// There are three different ways to delimit an attribute 
	// value: with double-quotes, single quotes, or not delimited.
	
	attr_value_string ::= "\"" attr_value "\""
					  |   "\'" attr_value "\'"
					  |        attr_value 
					  ;

	// Regardless of how the attribute value is delimited, we 
	// are only interested in the "value itself".  The following 
	// production shows how a nonterminal symbol can be parsed 
	// in different ways, depending upon the context in which it 
	// occurs. For example, when attr_value occurs in 
	// double-quotes, the parser will scan all characters up to 
	// the closing quote; but when it occurs without quotations, 
	// the parser scans all characters up to the next whitespace 
	// or closing tag ('>') character.  Hence, symbol attr_value 
	// is "polymorphic".
			
	attr_value ::= 
		(? #VALUE == "\"") *("\"")     // scan to closing dbl-quote
		 | (? #VALUE == "\'") *("\'")  // scan to closing single-quote
		 | '[^\32\t\n\r>]+'            // scan to next whitespace or '>'
				; 
				
	end_tag ::= "";
			
	// The constraint "(? #VALUE = ^^section.start_tag.element_name)" 
	// enforces the rule that end_tags must have the same name as their 
	// corresponding start_tag.

	end_tag_name  ::= alphanumeric;
};


// =================================================================
// ProGrammar Grammar Definition File
// -----------------------------------------------------------------
//  
//  Html.GDL - Parses HTML documents; extends the "Markup" grammar.
//
//  This example is intended for demonstration purposes only.
//
//  (c) Copyright 1999, 2000 NorKen Technologies, Inc.
//  All rights reserved.
//
// ==================================================================


grammar HTML extends Markup	// extend the 'Markup' grammar 
{
	text ::= Markup.text;	// start symbol
	
	// Constrain the possible values for "element_name" to those 
	// defined in HTML 3.2

	element_name ::= 
		Markup.element_name
			(? #WARN = 100, 
			   #VALUE ::= html32_tag; )	; // constraint
	
	// define valid HTML tag names
	
	html32_tag ::= 
		( "A"         | "ADDRESS"     | "AREA"      | "B"
		| "BASE"      | "BASEFONT"    | "BGSOUND"   | "BIG"
		| "BLINK"     | "BLOCKQUOTE"  | "BODY"      | "BR"		
		| "CAPTION"   | "CENTER"      | "CITE"      | "CODE"    
		| "COL"
		| "COLGROUP"  | "COMMENT"     | "DFN"       | "DIR"
		| "DIV"       | "DL"          | "DT"        | "EM"      
		| "FONT"
		| "FORM"      | "FRAME"       | "FRAMESET"  | "HEAD"
		| "HR"        | "HTML"        | "I"         | "IMG"
		| "INPUT"     | "ISINDEX"     | "KBD"       | "LI"
		| "LINK"      | "LISTING"     | "MAP"       | "MARQUEE"	
		| "MENU"      | "META"        | "NEXTID"    | "NOBR"		
		| "NOFRAMES"  | "OL"          | "OPTION"    | "P"		
		| "PLAINTEXT" | "PRE"         | "SAMP"      | "SCRIPT"
		| "SELECT"    | "SMALL"       | "STRIKE"    | "STRONG"
		| "SUB"       | "SUP"         | "TABLE"     | "TBODY"
		| "TD"        | "TEXTAREA"    | "TFOOT"     | "TH"
		| "THEAD"     | "TITLE"       | "TR"        | "TT"
		| "U"         | "UL"          | "VAR"       | "WBR"
		| "XMP" 
		)
		"H" numeric<1>
		;
			
	grammar Attrs
	{
		a_attrs	::= 
			  "HREF" | "METHODS" | "NAME" | "REL" 
			| "REV" | "TARGET" | "TITLE" | "URN" ;
	
		area_attrs ::= 
			"SHAPE" | "COORDS" | "HREF" ;
	
		basefont_attrs ::= 
			"FACE" | "COLOR" ;
	
		bgsound_attrs ::= 
			"SRC" | "LOOP" ;
	
		body_attrs ::= 
			"BACKGROUND" | "BGCOLOR" | "BGPROPERTIES" 
			| "LEFTMARGIN" | "LINK" | "VLINK" | "ALINK" 
			| "TEXT" | "TOPMARGIN" | "STYLESRC" 
			| "MARGINWIDTH" | "MARGINHEIGHT" ;
			  
		base_attrs	::= "HREF" | "TARGET" ;
	
		caption_attrs ::= "ALIGN" | "VALIGN" ;
	
		col_attrs ::= "ALIGN" | "SPAN" ;
	
		colgroup_attrs ::= "ALIGN" | "SPAN" | "VALIGN" ;
	
		font_attrs ::= "COLOR" | "FACE" | "SIZE" ;
	
		form_attrs ::= "ACTION" | "ENCTYPE" | "METHOD" ;
	
		frame_attrs	::= 
			"SRC" | "NAME" | "MARGINWIDTH" | "MARGINHEIGHT" 
			| "SCROLLING" |   "NORESIZE" | "FRAMEBORDER" 
			| "FRAMESPACING" ;
				
		frameset_attrs	::= 
			"ROWS" | "COLS" | frame_attrs;				
	
		hr_attrs ::= 
			"ALIGN" | "COLOR" | "NOSHADE" | "SIZE" | "WIDTH" ;
	
		input_attrs	::= 
			"ALIGN" | "CHECKED" | "MAXLENGTH" | "NAME" | "SIZE" 
			| "SRC" | "TYPE" |   "BUTTON" | "CHECKBOX" | "FILE" 
			| "HIDDEN" | "IMAGE" | "PASSWORD" | "RADIO" 
			| "RESET" | "SUBMIT" | "TEXT" | "TEXTAREA" | "VALUE" ;
				
	
		img_attrs ::= 
			"ALIGN" | "ALT" | "BORDER" | "ISMAP" | "LOWSRC" 
			| "SRC"	|   "VSPACE" | "HSPACE" | "WIDTH" 
			| "HEIGHT" | "USEMAP"
			// MS Explorer
			|   "CONTROLS" | "DYNSRC" | "LOOP" | "LOOPDELAY" 
			| "START" | "VRML" | "NAME"	;

		isindex_attrs ::= 
			"ACTION" | "PROMPT" ;
	
		link_attrs ::= a_attrs;
	
		map_attrs ::= "NAME" ;
	
		marquee_attrs ::= 
			"ALIGN" | "BEHAVIOR" | "BGCOLOR" | "DIRECTION" 
			| "HEIGHT" | "HSPACE" | "LOOP" | "SCROLLAMOUNT" 
			| "SCROLLDELAY" | "VSPACE" | "WIDTH" ;
				
		meta_attrs	::= "CONTENT" | "HTTP-EQUIV" | "NAME" ;
	
		option_attrs ::= "SELECTED" | "VALUE" ;
	
		p_attrs	 ::= "ALIGN" ;
	
		pre_attrs ::= "WIDTH" ;
	
		select_attrs ::= "MULTIPLE" | "NAME" | "SIZE" ;
	
		table_attrs	::= 
			"ALIGN" | "BACKGROUND" | "BGCOLOR" | "BORDER" 
			| "BORDERCOLOR"	|   "BORDERCOLORDARK" 
			| "BORDERCOLORLIGHT" | "CELLPADDING" | "CELLSPACING" 
			| "FRAME" | "HEIGHT" | "RULES" | "VALIGN" | "WIDTH"
					;
				
		td_attrs ::= 
			"ALIGN" | "BACKGROUND" | "BGCOLOR" | "BORDERCOLOR" 
			| "BORDERCOLORDARK"	|   "BORDERCOLORLIGHT" | "COLSPAN" 
			| "HEIGHT" | "NOWRAP" | "ROWSPAN" |   "VALIGN" 
			| "WIDTH"
					;
	
		textarea_attrs ::= "WRAP" ;
	
		th_attrs ::= td_attrs ;
	
		tr_attrs ::= 
			"ALIGN" | "BGCOLOR" | "BORDERCOLOR" 
			| "BORDERCOLORDARK" | "BORDERCOLORLIGHT" | "VALIGN"
					;

		// Constraints on attr_values
		// append "_value" to attribute name and the production 
		// will be called automatically by the constraint.
	
		align_value ::= 
			"CENTER" | "JUSTIFY" | "LEFT" 
			| "RIGHT" | "TOP" | "BOTTOM" ;
		
		height_value ::= value_or_percent;

		size_value ::= ["-"|"+"] numeric;

		valign_value ::= "TOP" | "BOTTOM";
		
		value_or_percent ::= numeric ["%"];

		width_value  ::= value_or_percent;
	};

	attr_name ::= Markup.attr_name
		(? 	#WARN = 101,
			#VALUE ::= 
				^#ISSYMBOL("Attrs." + 
					^*start_tag.element_name + "_attrs") *()
				  | #SYMBOL("Attrs." + 
					^*start_tag.element_name + "_attrs");
		);
			
	attr_value ::= Markup.attr_value 
		(? 	#WARN = 102,
			#VALUE ::= 
				^#ISSYMBOL("Attrs." + ^.attr_name + "_value") *()
				| #SYMBOL("Attrs." + ^.attr_name + "_value") ;
		);
};






For comments or questions about this site, please contact
webmaster@programmar.com
Copyright © 1998-2008 NorKen Technologies, Inc. All rights reserved.