微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

perl 将XML格式转换为html、txt、pod格式

xmlfile:

[root@dou xml]# cat example
<?xml version="1.0" encoding="UTF-8"?>
<README>
  <NAME>Test README File</NAME>

  <SYnopSIS>
     This is a summary of the file.
       It should appear in PRE tags
  </SYnopSIS>

  <DESCRIPTION>
     <TEXT>This is the full description of the file</TEXT>
     <SUBSECTION>
       <HEAD>Subsection Title</HEAD>
       <TEXT>Subsection text</TEXT>
     </SUBSECTION>
     <SUBSECTION>
       <HEAD>Another Subsection Title</HEAD>
       <TEXT>More Subsection text</TEXT>
       <LIST TYPE='bullet'>
         <ITEM>List item 1</ITEM>
         <ITEM>List item 2</ITEM>
       </LIST>
     </SUBSECTION>
  </DESCRIPTION>

  <AUTHOR>
     <ANAME>Dave Cross</ANAME>
     <EMAIL>dave@mag-sol.com</EMAIL>
  </AUTHOR>

  <SEE_ALSO>
     <LIST TYPE='bullet'>
       <ITEM>Something</ITEM>
       <ITEM>Something else</ITEM>
     </LIST>
  </SEE_ALSO>
</README>
[root@dou xml]#
 

perl转换脚本:

#!/usr/bin/perl -w

use strict;

use XML::Parser;
use Getopt::Std;
use Text::Wrap;

my %formats = (h => {name => 'html'},
p => {name => 'pod'},
t => {name => 'text'});

my %opts;
(getopts('f:',\%opts) && @ARGV) || die "usage: format_xml.pl -f h|p|t xml_file\n";

die "Invalid format: $opts{f}\n" unless exists $formats{$opts{f}};

warn "Formatting file as $formats{$opts{f}}->{name}\n";

my $p = XML::Parser->new(Style => 'Tree');
my $tree = $p->parsefile(shift);

my $level = 0;
my $ind = '';
my $head = 1;

top($tree);

process_node(@$tree);

bot();

sub process_node {
   my ($type,$content) = @_;

   $ind = ' ' x $level;

   if ($type) {

     local $_ = $type;

     my $attrs = shift @$content;

     /^NAME$/ && name($content);
     /^SYnopSIS$/ && synopsis($content);
     /^DESCRIPTION$/ && description();
     /^TEXT$/ && text($content);
     /^CODE$/ && code($content);
     /^HEAD$/ && head($content);
     /^LIST$/ && do {list($attrs,$content); @$content = ()};
     /^AUTHOR$/ && author();
     /^ANAME$/ && aname($content);
     /^EMAIL$/ && email($content);
     /^SEE_ALSO$/ && see_also($content);

     while (my @node = splice @$content,2) {
       ++$level;
       ++$head if $type eq 'SUBSECTION';
       process_node(@node);
       --$head if $type eq 'SUBSECTION';
       --$level;
     }
   }
}

sub top {
   $tree = shift;

   if ($opts{f} eq 'h') {
     print "<html>\n";
           print "<head>\n";
     print "<title>$tree->[1]->[4]->[2]</title>\n";
     print "</head>\n<body>\n";
   } elsif ($opts{f} eq 'p') {
     print "=pod\n\n";
   } elsif ($opts{f} eq 't') {
     print "\n",$tree->[1]->[4]->[2],"\n";
     print '-' x length($tree->[1]->[4]->[2]),"\n\n";
   }
}

sub bot {
   if ($opts{f} eq 'h') {
     print "</body>\n</html>\n";
   } elsif ($opts{f} eq 'p') {
     print "=cut\n\n";
   } elsif ($opts{f} eq 't') {
     # do nothing
   }
}

sub name {
   my $content = shift;

   if ($opts{f} eq 'h') {
     print "<h1>NAME</h1>\n";
     print "<p>$content->[1]</p>\n"
   } elsif ($opts{f} eq 'p') {
     print "=head1 NAME\n\n";
     print "$content->[1]\n\n";
   } elsif ($opts{f} eq 't') {
     print "NAME\n\n";
     print $ind,"$content->[1]\n\n";
   }
}

sub synopsis {
   my $content = shift;

   if ($opts{f} eq 'h') {
     print "<h1>SYnopSIS</h1>\n";
     print "<pre>$content->[1]</pre>\n"
   } elsif ($opts{f} eq 'p') {
     print "=head1 SYnopSIS\n\n";
     print "$content->[1]\n";
   } elsif ($opts{f} eq 't') {
     print "SYnopSIS\n";
     print "$content->[1]\n";
   }
}
sub description {

   if ($opts{f} eq 'h') {
     print "<h1>DESCRIPTION</h1>\n";
   } elsif ($opts{f} eq 'p') {
     print "=head1 DESCRIPTION\n\n";
   } elsif ($opts{f} eq 't') {
     print "DESCRIPTION\n\n";
   }
}

sub text {
   my $content = shift;

   if ($opts{f} eq 'h') {
     print "<p>$content->[1]</p>\n"
   } elsif ($opts{f} eq 'p') {
     print wrap('','',trim($content->[1])),"\n\n";
   } elsif ($opts{f} eq 't') {
     print wrap($ind,$ind,"\n\n";
   }
}

sub code {
   my $content = shift;

   if ($opts{f} eq 'h') {
     print "<pre>$content->[1]</pre>\n"
   } elsif ($opts{f} eq 'p') {
     print "$content->[1]\n";
   } elsif ($opts{f} eq 't') {
     print "$content->[1]\n";
   }
}

sub head {
   my $content = shift;

   if ($opts{f} eq 'h') {
     print "<h$head>",trim($content->[1]),"</h$head>\n"
   } elsif ($opts{f} eq 'p') {
     print "=head$head ","\n\n";
   } elsif ($opts{f} eq 't') {
     print trim($content->[1]),"\n\n";
   }
}

sub list {
   my ($attrs,$content) = @_;

   my %list = (bullet => 'ul',numbered => 'ol');
   my $type = $attrs->{TYPE};

    if ($opts{f} eq 'h') {
      print "<$list{$type}>\n";
      while (my @node = splice @$content,2) {
         if ($node[0] eq 'ITEM') {
           print "<li>$node[1]->[2]</li>\n";
         }
      }
      print "</$list{$type}>\n";
    } elsif ($opts{f} eq 'p') {
      print "=over 4\n";
      while (my @node = splice @$content,2) {
         my $cnt = 1;
         if ($node[0] eq 'ITEM') {
           print "=item *\n$node[1]->[2]\n\n";
         }
      }
      print "=back\n\n";
    } elsif ($opts{f} eq 't') {
      while (my @node = splice @$content,2) {
         my $cnt = 1;
         if ($node[0] eq 'ITEM') {
           print $ind,"* $node[1]->[2]\n";
         }
      }
      print "\n";
    }
}

sub author {
    if ($opts{f} eq 'h') {
      print "<h1>AUTHOR</h1>\n";
    } elsif ($opts{f} eq 'p') {
      print "=head1 AUTHOR\n\n";
    } elsif ($opts{f} eq 't') {
      print "AUTHOR\n\n";
    }
}

sub aname {
    my $content = shift;

    if ($opts{f} eq 'h') {
      print "<p>$content->[1]\n"
    } elsif ($opts{f} eq 'p') {
      print trim($content->[1]),' ';
    } elsif ($opts{f} eq 't') {
      print $ind,' ';
    }
}

sub email {
   my $content = shift;

   if ($opts{f} eq 'h') {
      print '&lt;',"&gt;</p>\n"
   } elsif ($opts{f} eq 'p') {
      print '<',">\n\n";
   } elsif ($opts{f} eq 't') {
      print '<',">\n\n";
   }
}

sub see_also {

   if ($opts{f} eq 'h') {
      print "<h1>SEE ALSO</h1>\n";
   } elsif ($opts{f} eq 'p') {
      print "=head1 SEE ALSO\n\n";
   } elsif ($opts{f} eq 't') {
      print "SEE ALSO\n\n";
   }
}

sub trim {
   local $_ = shift;

   s/\n/ /g;
   s/^\s+//;
   s/\s+$//;

   $_;
}
 

 

执行结果:

[root@dou xml]# perl ex1.pl -f h example
Formatting file as html
<html>
<head>
<title>Test README File</title>
</head>
<body>
<h1>NAME</h1>
<p>Test README File</p>
<h1>SYnopSIS</h1>
<pre>
     This is a summary of the file.
       It should appear in PRE tags
  </pre>
<h1>DESCRIPTION</h1>
<p>This is the full description of the file</p>
<h2>Subsection Title</h2>
<p>Subsection text</p>
<h2>Another Subsection Title</h2>
<p>More Subsection text</p>
<ul>
<li>List item 1</li>
<li>List item 2</li>
</ul>
<h1>AUTHOR</h1>
<p>Dave Cross
&lt;dave@mag-sol.com&gt;</p>
<h1>SEE ALSO</h1>
<ul>
<li>Something</li>
<li>Something else</li>
</ul>
</body>
</html>
[root@dou xml]#
[root@dou xml]# perl ex1.pl -f t example
Formatting file as text

Test README File
----------------

NAME

 Test README File

SYnopSIS

     This is a summary of the file.
       It should appear in PRE tags

DESCRIPTION

  This is the full description of the file

Subsection Title

   Subsection text

Another Subsection Title

   More Subsection text

   * List item 1
   * List item 2

AUTHOR

  Dave Cross <dave@mag-sol.com>

SEE ALSO

  * Something
  * Something else

[root@dou xml]# perl ex1.pl -f p example
Formatting file as pod
=pod

=head1 NAME

Test README File

=head1 SYnopSIS


     This is a summary of the file.
       It should appear in PRE tags

=head1 DESCRIPTION

This is the full description of the file

=head2 Subsection Title

Subsection text

=head2 Another Subsection Title

More Subsection text

=over 4
=item *
List item 1

=item *
List item 2

=back

=head1 AUTHOR

Dave Cross <dave@mag-sol.com>

=head1 SEE ALSO

=over 4
=item *
Something

=item *
Something else

=back

=cut

[root@dou xml]#  

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。

相关推荐