package XML::Handler::Dtd2Html::Document;

sub new {
	my $proto = shift;
	my $class = ref($proto) || $proto;
	my $self = {
			prolog_comments         => [],
			xml_decl                => undef,
			doctype_decl            => undef,
			root_name               => "",
			list_decl               => [],
			hash_notation           => {},
			hash_entity             => {},
			hash_element            => {},
			hash_attr               => {}
	};
	bless($self, $class);
	return $self;
}

package XML::Handler::Dtd2Html;

use strict;

use vars qw($VERSION);

$VERSION="0.10";

sub new {
	my $proto = shift;
	my $class = ref($proto) || $proto;
	my $self = {
			prolog      => 1,
			doc         => new XML::Handler::Dtd2Html::Document(),
			comments    => []
	};
	bless($self, $class);
	return $self;
}

sub start_element {
	my $self = shift;
	my ($element) = @_;
	my $name = $element->{Name};
	$self->{doc}->{root_name} = $name unless ($self->{doc}->{root_name});
}

sub end_document {
	my $self = shift;
	return $self->{doc};
}

sub comment {
	my $self = shift;
	if ($self->{prolog}) {
		push @{$self->{doc}->{prolog_comments}}, shift;
	} else {
		push @{$self->{comments}}, shift;
	}
}

sub notation_decl {
	my $self = shift;
	my ($decl) = @_;
	$self->{prolog} = 0;
	if (scalar @{$self->{comments}}) {
		$decl->{comments} = [@{$self->{comments}}];
		$self->{comments} = [];
	}
	$decl->{type} = "notation";
	my $name = $decl->{Name};
	$self->{doc}->{hash_notation}->{$name} = $decl;
	push @{$self->{doc}->{list_decl}}, $decl;
}

sub unparsed_entity_decl {
	my $self = shift;
	my ($decl) = @_;
	$self->{prolog} = 0;
	if (scalar @{$self->{comments}}) {
		$decl->{comments} = [@{$self->{comments}}];
		$self->{comments} = [];
	}
	$decl->{type} = "unparsed_entity";
	my $name = $decl->{Name};
	$self->{doc}->{hash_entity}->{$name} = $decl;
	push @{$self->{doc}->{list_decl}}, $decl;
}

sub entity_decl {
	my $self = shift;
	my ($decl) = @_;
	$self->{prolog} = 0;
	if (scalar @{$self->{comments}}) {
		$decl->{comments} = [@{$self->{comments}}];
		$self->{comments} = [];
	}
	$decl->{type} = "entity";
	my $name = $decl->{Name};
	unless ($name =~ /^%/) {
		$self->{doc}->{hash_entity}->{$name} = $decl;
		push @{$self->{doc}->{list_decl}}, $decl;
	}
}

sub element_decl {
	my $self = shift;
	my ($decl) = @_;
	$self->{prolog} = 0;
	if (scalar @{$self->{comments}}) {
		$decl->{comments} = [@{$self->{comments}}];
		$self->{comments} = [];
	}
	$decl->{type} = "element";
	$decl->{used_by} = [];
	$decl->{uses} = [];
	my $name = $decl->{Name};
	$self->{doc}->{hash_element}->{$name} = $decl;
	push @{$self->{doc}->{list_decl}}, $decl;
}

sub attlist_decl {
	my $self = shift;
	my ($decl) = @_;
	if (scalar @{$self->{comments}}) {
		$decl->{comments} = [@{$self->{comments}}];
		$self->{comments} = [];
	}
	my $elt_name = $decl->{ElementName};
	$self->{doc}->{hash_attr}->{$elt_name} = []
			unless (exists $self->{doc}->{hash_attr}->{$elt_name});
	push @{$self->{doc}->{hash_attr}->{$elt_name}}, $decl;
}

sub doctype_decl {
	my $self = shift;
	my ($decl) = @_;
	$self->{doc}->{doctype_decl} = $decl;
	$self->{doc}->{root_name} = $decl->{Name};
}

sub xml_decl {
	my $self = shift;
	$self->{doc}->{xml_decl} = shift;
}

package XML::Handler::Dtd2Html::Document;

sub _cross_ref {
	my $self = shift;

	foreach my $decl (values %{$self->{hash_element}}) {
		my $name = $decl->{Name};
		my $model = $decl->{Model};
		while ($model) {
			for ($model) {
				s/^([ \n\r\t\f\013]+)//;							# whitespaces

				s/^([\?\*\+\(\),\|])//
						and last;
				s/^(EMPTY)//
						and last;
				s/^(ANY)//
						and last;
				s/^(#PCDATA)//
						and last;
				s/^([A-Za-z_:][0-9A-Za-z\.\-_:]*)//
						and push(@{$self->{hash_element}->{$name}->{uses}}, $1),
						and push(@{$self->{hash_element}->{$1}->{used_by}}, $name),
						    last;
				s/^([\S]+)//
						and warn __PACKAGE__,":_cross_ref INTERNAL_ERROR $1\n",
						    last;
		}
	}
	}
}

sub _format_head {
	my $self = shift;
	my($FH, $title) = @_;
	print $FH "<?xml version='1.0' encoding='ISO-8859-1'?>\n";
	print $FH "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'xhtml1-transitional.dtd'>\n";
	print $FH "<html xmlns='http://www.w3.org/1999/xhtml'>\n";
	print $FH "\n";
	print $FH "  <head>\n";
	print $FH "    <meta name='generator' content='dtd2html (Perl)' />\n";
	print $FH "    <meta http-equiv='Content-Type' content='text/html; charset=ISO-8859-1' />\n";
	print $FH "    <title>",$title,"</title>\n";
	print $FH "  </head>\n";
	print $FH "\n";
	print $FH "  <body>\n";
}

sub _format_tail {
	my $self = shift;
	my($FH) = @_;
	print $FH "<hr align='center'/>\n";
	print $FH "    <i>Generated by dtd2html</i>\n";
	print $FH "\n";
	print $FH "  </body>\n";
	print $FH "\n";
	print $FH "</html>\n";
}

sub _format_content_model {
	my $self = shift;
	my ($model) = @_;
	my $str = "";
	while ($model) {
		for ($model) {
			s/^([ \n\r\t\f\013]+)//							# whitespaces
					and $str .= $1,
					    last;

			s/^([\?\*\+\(\),\|])//
					and $str .= $1,
					    last;
			s/^(EMPTY)//
					and $str .= "<font color='#0000A0'>" . $1 . "</font>",
					    last;
			s/^(ANY)//
					and $str .= "<font color='#0000A0'>" . $1 . "</font>",
					    last;
			s/^(#PCDATA)//
					and $str .= "<font color='#0000A0'>" . $1 . "</font>",
					    last;
			s/^([A-Za-z_:][0-9A-Za-z\.\-_:]*)//
					and $str .= "<a href='#elt_" . $1 . "'>" . $1 . "</a>",
					    last;
			s/^([\S]+)//
					and warn __PACKAGE__,":_format_content_model INTERNAL_ERROR $1\n",
					    last;
		}
	}
	return $str;
}

sub _process_text {
	my $self = shift;
	my($text) = @_;

	# keep track of leading and trailing white-space
	my $lead  = ($text =~ s/\A(\s+)//s ? $1 : "");
	my $trail = ($text =~ s/(\s+)\Z//s ? $1 : "");

	# split at space/non-space boundaries
	my @words = split( /(?<=\s)(?=\S)|(?<=\S)(?=\s)/, $text );

	# process each word individually
	foreach my $word (@words) {
		# skip space runs
		next if $word =~ /^\s*$/;
		if ($word =~ /^[A-Za-z_:][0-9A-Za-z\.\-_:]*$/) {
			# looks like a DTD name
			if (exists $self->{hash_notation}->{$word}) {
				$word = "<a href='#not_" . $word . "'>" . $word . "</a>"
			}
			if (exists $self->{hash_entity}->{$word}) {
				$word = "<a href='#ent_" . $word . "'>" . $word . "</a>"
			}
			if (exists $self->{hash_element}->{$word}) {
				$word = "<a href='#elt_" . $word . "'>" . $word . "</a>"
			}
		} elsif ($word =~ /^\w+:\/\/\w/) {
			# looks like a URL
			# Don't relativize it: leave it as the author intended
			$word = "<a href='" . $word . "'>" . $word . "</a>";
		} elsif ($word =~ /^[\w.-]+\@[\w.-]+/) {
			# looks like an e-mail address
			$word = "<a href='mailto:" . $word . "'>" . $word . "</a>";
		}
	}

	# put everything back together
	return $lead . join('', @words) . $trail;
}

sub _mk_tree {
	my $self = shift;
	my ($FH, $name) = @_;
	my %done = ();

	$self->{hash_element}->{$name}->{done} = 1;
	print $FH "<ul>\n";
	foreach (sort @{$self->{hash_element}->{$name}->{uses}}) {
		next if ($_ eq $name);
		next if (exists $done{$_});
		$done{$_} = 1;
		print $FH "  <li><a href='#elt_",$_,"'><b>",$_,"</b></a>\n";
		$self->_mk_tree($FH, $_)
				unless (exists $self->{hash_element}->{$_}->{done});
		print $FH "  </li>\n";
	}
	print $FH "</ul>\n";
}

sub printToFileHandle {
	my $self = shift;
	my ($FH) = @_;

	$self->_cross_ref();
	my @notations = sort keys %{$self->{hash_notation}};
	my @entities = sort keys %{$self->{hash_entity}};
	my @elements = sort keys %{$self->{hash_element}};

	$self->_format_head($FH, "DTD " . $self->{root_name});
	print $FH "<h1>DTD ",$self->{root_name},"</h1>\n";
	print $FH "<hr align='center'/>\n";
	if (scalar @elements) {
		print $FH "<h2><font color='#FF0000'>Element index.</font></h2>\n";
		print $FH "<dl>\n";
		foreach (@elements) {
			if ($_ eq $self->{root_name}) {
				print $FH "    <dt><a href='#elt_",$_,"'><b>",$_,"</b></a> (root)</dt>\n";
			} else {
				print $FH "    <dt><a href='#elt_",$_,"'><b>",$_,"</b></a></dt>\n";
			}
		}
		print $FH "</dl>\n";
	}
	if (scalar @entities) {
		print $FH "<h2><font color='#FF0000'>General Entity index.</font></h2>\n";
		print $FH "<dl>\n";
		foreach (@entities) {
			print $FH "    <dt><a href='#ent_",$_,"'><b>",$_,"</b></a></dt>\n";
		}
		print $FH "</dl>\n";
	}
	if (scalar @notations) {
		print $FH "<h2><font color='#FF0000'>Notation index.</font></h2>\n";
		print $FH "<dl>\n";
		foreach (@notations) {
			print $FH "    <dt><a href='#not_",$_,"'><b>",$_,"</b></a></dt>\n";
		}
		print $FH "</dl>\n";
	}
	print $FH "<hr align='center'/>\n";
	if (scalar @elements) {
		print $FH "<h2><font color='#FF0000'>Element tree.</font></h2>\n";
		print $FH "<ul>\n";
		print $FH "  <li><a href='#elt_",$self->{root_name},"'><b>",$self->{root_name},"</b></a>\n";
		$self->_mk_tree($FH, $self->{root_name});
		print $FH "  </li>\n";
		print $FH "</ul>\n";
		print $FH "<hr align='center'/>\n";
	}

	if (defined $self->{doctype_decl}) {
		print $FH "<h2>Document entity</h2>\n";
		print $FH "<pre>";
		if (defined $self->{xml_decl}) {
			my $version = $self->{xml_decl}->{Version};
			my $encoding = $self->{xml_decl}->{Encoding} || "";
			my $standalone = "";
			if (exists $self->{xml_decl}->{Standalone}) {
				$standalone = ($self->{xml_decl}->{Standalone}) ? "yes" : "no";
			}
			print $FH "&lt;<font color='#0000A0'>?xml</font> ";
			print $FH "<font color='#0000A0'>version</font>='<font color='#A00000'>",$version,"</font>' " if (defined $version);
			print $FH "<font color='#0000A0'>encoding</font>='",$encoding,"' " if (defined $encoding);
			print $FH "<font color='#0000A0'>standalone</font>='<font color='#A00000'>",$standalone,"</font>' " if ($standalone);
			print $FH "?&gt;\n";
		}
		my $name = $self->{doctype_decl}->{Name};
		print $FH "&lt;<font color='#0000A0'>!DOCTYPE</font> ",$name," [\n";
		print $FH "\t...\n";
		print $FH "]&gt;\n";
		print $FH "</pre>\n";
		foreach my $comment (@{$self->{prolog_comments}}) {
			my $data = $comment->{Data};
			print $FH "comment $data\n";
		}
	}

	print $FH "<ul>\n";
	foreach my $decl (@{$self->{list_decl}}) {
		print $FH "  <li>\n";
		my $type = $decl->{type};
		my $name = $decl->{Name};
		if      ($type eq "notation") {
			my $publicId = $decl->{PublicId};
			my $systemId = $decl->{SystemId};
			print $FH "    <h3><a name='not_",$name,"'/>",$name,"</h3>\n";
			print $FH "<pre>&lt;<font color='#0000A0'>!NOTATION</font> ",$name," ";
			if      (defined $publicId and defined $systemId) {
				print $FH "<font color='#0000A0'>PUBLIC</font> '",$publicId,"' '",$systemId,"'";
			} elsif (defined $publicId) {
				print $FH "<font color='#0000A0'>PUBLIC</font> '",$publicId,"'";
			} elsif (defined $systemId) {
				print $FH "<font color='#0000A0'>SYSTEM</font> '",$systemId,"'";
			} else {
				warn __PACKAGE__,":printToFileHandle INTERNAL_ERROR (NOTATION $name)\n";
			}
			print $FH " &gt;</pre>\n";
		} elsif ($type eq "unparsed_entity") {
			my $systemId = $decl->{SystemId};
			my $publicId = $decl->{PublicId};
			print $FH "    <h3><a name='ent_",$name,"'/>",$name,"</h3>\n";
			print $FH "<pre>&lt;<font color='#0000A0'>!ENTITY</font> ",$name," ";
			if (defined $publicId) {
				print $FH "<font color='#0000A0'>PUBLIC</font> '",$publicId,"' '",$systemId,"'";
			} else {
				print $FH "<font color='#0000A0'>SYSTEM</font> '",$systemId,"'";
			}
			print $FH " &gt;</pre>\n";
		} elsif ($type eq "entity") {
			my $value = $decl->{Value};
			my $systemId = $decl->{SystemId};
			my $publicId = $decl->{PublicId};
			my $notation = $decl->{Notation};
			print $FH "    <h3><a name='ent_",$name,"'/>",$name,"</h3>\n";
			print $FH "<pre>&lt;<font color='#0000A0'>!ENTITY</font> ",$name," ";
			if (defined $value) {
				$value =~ s/&/&amp;/g;
				print $FH "'",$value,"'";
			} else {
				if (defined $publicId) {
					print $FH "<font color='#0000A0'>PUBLIC</font> '",$publicId,"' '",$systemId,"'";
				} else {
					print $FH "<font color='#0000A0'>SYSTEM</font> '",$systemId,"'";
				}
				print $FH "<font color='#0000A0'>NDATA</font> <a href='#not_",$notation,"'>",$notation,"</a> "
						 if (defined $notation);
			}
			print $FH " &gt;</pre>\n";
		} elsif ($type eq "element") {
			my $model = $decl->{Model};
			my $f_model = $self->_format_content_model($model);
			print $FH "    <h3><a name='elt_",$name,"'/>",$name,"</h3>\n";
			print $FH "<pre>&lt;<font color='#0000A0'>!ELEMENT</font> ",$name," ",$f_model," &gt;\n";
			if (exists $self->{hash_attr}->{$name}) {
				foreach my $attr (@{$self->{hash_attr}->{$name}}) {
					my $attr_name = $attr->{AttributeName};
					my $type = $attr->{Type};
					my $default = $attr->{Default};
					my $fixed = $attr->{Fixed};
					print $FH "&lt;<font color='#0000A0'>!ATTLIST</font> ",$name;
					print $FH " ",$attr_name;
					if       ( $type eq "CDATA"
							or $type eq "ID"
							or $type eq "IDREF"
							or $type eq "IDREFS"
							or $type eq "ENTITY"
							or $type eq "ENTITIES"
							or $type eq "NMTOKEN"
							or $type eq "NMTOKENS" ) {
						print $FH " <font color='#0000A0'>",$type,"</font>";
					} else {
						print $FH " ",$type;
					}
					print $FH " <font color='#A00000'>",$default,"</font>";
					print $FH " ",$fixed if (defined $fixed);
					print $FH " &gt;\n";
				}
			}
			print $FH "</pre>\n";
		} else {
			warn __PACKAGE__,":printToFileHandle INTERNAL_ERROR (type:$type)\n";
		}
		if (exists $decl->{comments}) {
			foreach my $comment (@{$decl->{comments}}) {
				my $data = $self->_process_text($comment->{Data});
				print $FH "    <p><font color='#00A000'>\n";
				print $FH "$data\n";
				print $FH "    </font></p>\n";
			}
		}
		if ($type eq "element" and exists $self->{hash_attr}->{$name}) {
			print $FH "  <ul>\n";
			foreach my $attr (@{$self->{hash_attr}->{$name}}) {
				if (exists $attr->{comments}) {
					my $attr_name = $attr->{AttributeName};
					print $FH "    <li>",$attr_name," : <font color='#00A000'>\n";
					my $first = 1;
					foreach my $comment (@{$attr->{comments}}) {
						my $data = $self->_process_text($comment->{Data});
						print $FH "<p>\n" unless ($first);
						print $FH "$data\n";
						print $FH "</p>\n" unless ($first);
						$first = 0;
					}
					print $FH "    </font></li>\n";
				}
			}
			print $FH "  </ul>\n";
		}
		if ($type eq "element" and scalar @{$decl->{used_by}} != 0) {
			my %done;
			print $FH "  <p>Used by : ";
			foreach (sort @{$decl->{used_by}}) {
				next if (exists $done{$_});
				$done{$_} = 1;
				print $FH "<a href='#elt_",$_,"'>",$_,"</a> ";
			}
			print $FH "  </p>\n";
		}
		print $FH "  </li>\n";
	}
	print $FH "</ul>\n";
	$self->_format_tail($FH);
}

1;

__END__

=head1 NAME

XML::Handler::Dtd2Html - PerlSAX handler for generate a HTML documentation from a DTD

=head1 SYNOPSIS

  use XML::Parser::PerlSAX;
  use XML::Handler::Dtd2Html;

  $my_handler = new XML::Handler::Dtd2Html;

  $my_parser = new XML::Parser::PerlSAX(Handler => $my_handler, ParseParamEnt => 1);
  $result = $my_parser->parse( [OPTIONS] );

=head1 DESCRIPTION

All comments before a declaration are captured.

All entity references inside attribute values are expanded.

=head1 AUTHOR

Francois Perrad, perrad@besancon.sema.slb.com

=head1 SEE ALSO

PerlSAX.pod(3)

 Extensible Markup Language (XML) <http://www.w3c.org/TR/REC-xml>

=head1 COPYRIGHT

This program is distributed under the Artistic License.

=cut

