#! /usr/local/bin/perl -w # # $Id: mkmodels,v 1.2 2001/04/13 20:28:34 lee Exp $ # mkmodel -- make an XML instance that matches a cotnent model read from # a DTD. Elements and text are created, but no attribtues. # # Copyright Liam Quin, 2001 # You can use this program in any way yuo like, as long as you go # barefoot for an hour when you first use it. # Feel free to send me comments, suggestions, flames, pictures of your # ankles, etc., I am liam@holoweb.net # # # Usage: # ./mkmodel instance.xml [element] # # For DocBook, you can put the following three lines in a file and use # that as the instance (uncommented, and edit the path): # # # use strict; use XML::Parser; use constant MAXNESTINGDEPTH => 60; # max nesting depth, 0 means no limit use constant MAXRECURSION => 2; # max nesting of same element type use constant INDENT => 2; # indent nested elements by this many spaces each time use constant STARQUANTITY => 2; # make 0 to STARQUANTITY erpeated items for * use constant PLUSQUANTITY => 2; # make 0 to STARQUANTITY erpeated items for + my %Elements; my %ModelFragments; sub processContentModel($$); # declare it to allow recursion sub processContentModel($$) { my ($name, $model) = @_; my $fragmentNumber = 0; # we are called with a textual fully expanded content model $model =~ s/\s+//g; # ( m ) -> processContentModel(name.n, m) # while ($model =~ m{\(([^()]+)\)}) { my $repl = $name . '$' . ++$fragmentNumber; $ModelFragments{$repl} = processContentModel($repl, $1); $model =~ s{\(([^()]+)\)}{$repl}; } # a|b|c -> alternation while ($model =~ m{([^,?*+|]+[?+*]? (?:\|[^,?*+|])+)}) { my $repl = $name . '$' . ++$fragmentNumber; $ModelFragments{$repl} = $1; $model =~ s{[^,?*+|]+[?+*]? (\|[^,?*+|])+}{$repl}; } # now we have a simple sequence $model .= ','; # force it to be regular my @result; while ($model =~ m{([^,]+),}g) { push @result, $1; } return \@result; } # Keep track of active elements, so we can limit recursion; another way # to do this would be to manage a stack and search it; a lot slower, but # maybe that doesn't matter... my %doing; sub makePossibleInstance($$); # declare to allow recursion sub makePossibleInstance($$) { my ($name, $prefix) = @_; my $isFake = ($name =~ m/\$/); if (MAXNESTINGDEPTH > 0 && (length $prefix) / INDENT >= MAXNESTINGDEPTH) { print $prefix, "\n"; return; } if ($name eq "#PCDATA") { print $prefix, $name, "\n"; return; } if ($name eq "EMPTY") { # in XML, is the same as # strictly speaking, may be # different, so probably I should handle EMPTY element better. print $prefix, "\n"; return; } $doing{$name}++; my $content = $Elements{$name} || $ModelFragments{$name}; if (!$isFake) { print "${prefix}<$name>\n"; } if (MAXRECURSION == 0 || $doing{$name} < MAXRECURSION) { if (defined $content) { foreach my $fragment (@$content) { my @choices = split /\|/, $fragment; if ($choices[0] =~ m/^\#PCDATA/) { print $prefix, "text\n"; } else { # we have an alternation, a|b|c # choose a random path: $fragment = $choices[int(rand(scalar @choices))]; # if the fragment allows repeated objects, # maybe make more than one of them if ($fragment =~ m/^(.*)([?*+])$/) { my ($e, $howmany) = ($1, $2); if ($howmany eq "?") { # zero or one if (rand > 0.5) { makePossibleInstance($e, $prefix . " "); } } elsif ($howmany eq "*") { my $howmany = int(rand(STARQUANTITY + 1)); while ($howmany > 0) { makePossibleInstance($e, $prefix . " "); $howmany--; } } elsif ($howmany eq "+") { my $howmany = int(rand(PLUSQUANTITY + 1)); makePossibleInstance($e, $prefix . " "); while ($howmany > 0) { makePossibleInstance($e, $prefix . " "); $howmany--; } } else { die "unknown repeat $howmany in $fragment " . "for <$name> " . join(' | ', @$content) ; } } else { # single item makePossibleInstance($fragment, $prefix . " "); } } } } else { # unknown item, it's in a content model but # there is no definition for it print " \n"; } } else { # already open, MAXRECURSION exceeded print $prefix, "\n"; } if (!$isFake) { print "${prefix}\n"; } $doing{$name}--; } sub ElementDeclaration($$$) { my ($expat, $name, $model) = @_; # print "e $name $model\n"; if (exists($Elements{$name})) { die "duplicate element $name"; } $Elements{$name} = processContentModel($name, $model); } sub AttributeSpecificationList($$$$$) { my ($expat, $elementName, $attributeName, $type, $default, $fixed) = @_; # print "a $elementName $attributeName\n"; } sub ExternalEntity($$$$) { my ($expat, $base, $sysid, $pubid) = @_; # print "f $base $sysid\n"; return XML::Parser::lwp_ext_ent_handler($expat, $base, $sysid, $pubid); } sub ElementStart($$@) { my ($expat, $element, @atts) = @_; # print "( $element\n"; } sub ElementEnd($$) { my ($expat, $element) = @_; # print ") $element\n"; } sub processDTD($) { my ($dtdfile) = @_; my $parser = new XML::Parser( Style => 'Subs', ParseParamEnt => 1, Handlers => { Element => \&ElementDeclaration, Attlist => \&AttributeSpecificationList, # ExternEnt => \&ExternalEntity, Start => \&ElementStart, End => \&ElementEnd } ); $parser->parsefile($dtdfile, ErrorContext => 3); } processDTD $ARGV[0]; makePossibleInstance($ARGV[1] || "book", "");