#! /usr/local/bin/perl -w
#
# $Id: mkmodels,v 1.2 2001/04/13 20:28:34 lee Exp $
# mkmodel -- make an XML instance that matches a cotnent model read from
# a DTD. Elements and text are created, but no attribtues.
#
# Copyright Liam Quin, 2001
# You can use this program in any way yuo like, as long as you go
# barefoot for an hour when you first use it.
# Feel free to send me comments, suggestions, flames, pictures of your
# ankles, etc., I am liam@holoweb.net
#
#
# Usage:
# ./mkmodel instance.xml [element]
#
# For DocBook, you can put the following three lines in a file and use
# that as the instance (uncommented, and edit the path):
#
#
#
use strict;
use XML::Parser;
use constant MAXNESTINGDEPTH => 60; # max nesting depth, 0 means no limit
use constant MAXRECURSION => 2; # max nesting of same element type
use constant INDENT => 2; # indent nested elements by this many spaces each time
use constant STARQUANTITY => 2; # make 0 to STARQUANTITY erpeated items for *
use constant PLUSQUANTITY => 2; # make 0 to STARQUANTITY erpeated items for +
my %Elements;
my %ModelFragments;
sub processContentModel($$); # declare it to allow recursion
sub processContentModel($$)
{
my ($name, $model) = @_;
my $fragmentNumber = 0;
# we are called with a textual fully expanded content model
$model =~ s/\s+//g;
# ( m ) -> processContentModel(name.n, m)
#
while ($model =~ m{\(([^()]+)\)}) {
my $repl = $name . '$' . ++$fragmentNumber;
$ModelFragments{$repl} = processContentModel($repl, $1);
$model =~ s{\(([^()]+)\)}{$repl};
}
# a|b|c -> alternation
while ($model =~ m{([^,?*+|]+[?+*]? (?:\|[^,?*+|])+)}) {
my $repl = $name . '$' . ++$fragmentNumber;
$ModelFragments{$repl} = $1;
$model =~ s{[^,?*+|]+[?+*]? (\|[^,?*+|])+}{$repl};
}
# now we have a simple sequence
$model .= ','; # force it to be regular
my @result;
while ($model =~ m{([^,]+),}g) {
push @result, $1;
}
return \@result;
}
# Keep track of active elements, so we can limit recursion; another way
# to do this would be to manage a stack and search it; a lot slower, but
# maybe that doesn't matter...
my %doing;
sub makePossibleInstance($$); # declare to allow recursion
sub makePossibleInstance($$)
{
my ($name, $prefix) = @_;
my $isFake = ($name =~ m/\$/);
if (MAXNESTINGDEPTH > 0 && (length $prefix) / INDENT >= MAXNESTINGDEPTH) {
print $prefix, "\n";
return;
}
if ($name eq "#PCDATA") {
print $prefix, $name, "\n";
return;
}
if ($name eq "EMPTY") {
# in XML, is the same as
# strictly speaking, may be
# different, so probably I should handle EMPTY element better.
print $prefix, "\n";
return;
}
$doing{$name}++;
my $content = $Elements{$name} || $ModelFragments{$name};
if (!$isFake) {
print "${prefix}<$name>\n";
}
if (MAXRECURSION == 0 || $doing{$name} < MAXRECURSION) {
if (defined $content) {
foreach my $fragment (@$content) {
my @choices = split /\|/, $fragment;
if ($choices[0] =~ m/^\#PCDATA/) {
print $prefix, "text\n";
} else {
# we have an alternation, a|b|c
# choose a random path:
$fragment = $choices[int(rand(scalar @choices))];
# if the fragment allows repeated objects,
# maybe make more than one of them
if ($fragment =~ m/^(.*)([?*+])$/) {
my ($e, $howmany) = ($1, $2);
if ($howmany eq "?") {
# zero or one
if (rand > 0.5) {
makePossibleInstance($e, $prefix . " ");
}
} elsif ($howmany eq "*") {
my $howmany = int(rand(STARQUANTITY + 1));
while ($howmany > 0) {
makePossibleInstance($e, $prefix . " ");
$howmany--;
}
} elsif ($howmany eq "+") {
my $howmany = int(rand(PLUSQUANTITY + 1));
makePossibleInstance($e, $prefix . " ");
while ($howmany > 0) {
makePossibleInstance($e, $prefix . " ");
$howmany--;
}
} else {
die
"unknown repeat $howmany in $fragment " .
"for <$name> " .
join(' | ', @$content)
;
}
} else {
# single item
makePossibleInstance($fragment, $prefix . " ");
}
}
}
} else {
# unknown item, it's in a content model but
# there is no definition for it
print " \n";
}
} else {
# already open, MAXRECURSION exceeded
print $prefix, "\n";
}
if (!$isFake) {
print "${prefix}$name>\n";
}
$doing{$name}--;
}
sub ElementDeclaration($$$)
{
my ($expat, $name, $model) = @_;
# print "e $name $model\n";
if (exists($Elements{$name})) {
die "duplicate element $name";
}
$Elements{$name} = processContentModel($name, $model);
}
sub AttributeSpecificationList($$$$$)
{
my ($expat, $elementName, $attributeName, $type, $default, $fixed) = @_;
# print "a $elementName $attributeName\n";
}
sub ExternalEntity($$$$)
{
my ($expat, $base, $sysid, $pubid) = @_;
# print "f $base $sysid\n";
return XML::Parser::lwp_ext_ent_handler($expat, $base, $sysid, $pubid);
}
sub ElementStart($$@)
{
my ($expat, $element, @atts) = @_;
# print "( $element\n";
}
sub ElementEnd($$)
{
my ($expat, $element) = @_;
# print ") $element\n";
}
sub processDTD($)
{
my ($dtdfile) = @_;
my $parser = new XML::Parser(
Style => 'Subs',
ParseParamEnt => 1,
Handlers => {
Element => \&ElementDeclaration,
Attlist => \&AttributeSpecificationList,
# ExternEnt => \&ExternalEntity,
Start => \&ElementStart,
End => \&ElementEnd
}
);
$parser->parsefile($dtdfile, ErrorContext => 3);
}
processDTD $ARGV[0];
makePossibleInstance($ARGV[1] || "book", "");