#!/usr/bin/perl # # xml2json # # Copyright (C) 2008 # Paul E. Jones # # NOTE: This is experimental code written for demonstrative purposes and # has not been fully tested. # # ISSUES: If the XML has multiple elements with the sane name, it gets # converted as-is. Unfortunately, this is useless for JSON, since # the JavaScript interpreter will essentially replace the previous # object with the next one. In effect, you will only have # visibility to the last object processed. An array would # be the right solution, but then how does one know if there # will be multiple elements? Having the XML schema in hand # might address that issue, but there are other issues # with converting XML to JSON, including namespaces, arrays, # ordered elements, and so forth. There are ways to # do a conversion that allows for any XML to be properly # converted to and from JSON, but the resulting JSON # is fat, ugly, and difficult to work with. Perhaps # if conversion is necessary, the XML should be limited # to something simple. If one needs something complex, # then forget about automating the conversion. # The whole idea behind JSON in the first place was to define # something simpler than XML. If that cannot be done, then # there is no point using JSON. # use XML::Parser; # Help do the pretty printing to ident text with spaces sub json_indent_string { # How many levels to indent? my $indent_string = ""; for(my $i = 0; $i < $json_level; $i++) { $indent_string .= " "; } return $indent_string; } # Escape strings as per json.org sub json_char_escape { my $string = shift; $string =~ s/\\/\\\\/g; $string =~ s/"/\\"/g; #$string =~ s/\b/\\b/g; # Says json.org. What character is \b? # Does not map to Perl $string =~ s/\f/\\f/g; # Valid in XML 1.1 $string =~ s/\n/\\n/g; $string =~ s/\r/\\r/g; $string =~ s/\t/\\t/g; $string =~ s/([^ -~])/sprintf("\\u%04X", ord($1))/eg; return $string; } # Handle start of the document sub init_handler { $json_doc = "{\n"; $json_level = 1; @json_stack = (); $need_comma = 0; } # Handle the end of the document sub final_handler { $json_doc .= "\n}\n"; } # XML Parsing call-back routines sub start_handler { my($expat, $element, %attrs) = @_; my $i; # Do we need a comma to separate? if ($need_comma) { $json_doc .= ",\n"; } elsif ($json_level > 1) { $json_doc .= "\n"; } # Append the element block start $json_doc .= json_indent_string() . '"' . $element . '" : {'; $json_level++; $i = 0; while( my ($key, $value) = each(%attrs) ) { if ($i) { $json_doc .= ",\n"; } else { $json_doc .= "\n"; $i = 1; } $key = json_char_escape($key); $json_doc .= json_indent_string() . '"@' . $key . '" : "' . $value . '"'; } if ($i) { $need_comma = 1; } else { $need_comma = 0; } $element_string = ""; push(@json_stack, $need_comma); } sub char_handler { my ($expat, $chardata) = @_; $element_string .= $chardata; } sub end_handler { my ($expat, $element) = @_; # Do we need a comma? $need_comma = pop(@json_stack); # Remove any leading or tailing whitespace on element string $element_string =~ s/^[ \t\r\n]*//; $element_string =~ s/[ \t\r\n]*$//; # Escape certain characters $element_string = json_char_escape($element_string); if (length($element_string) > 0) { # If we need a comma, insert one if ($need_comma) { $json_doc .= ",\n"; } else { $json_doc .= "\n"; } $json_doc .= json_indent_string() . '"$" : "' . $element_string . '"' . "\n"; } else { $json_doc .= "\n"; } # End of element $element_string = ""; $json_level--; $json_doc .= json_indent_string() . "}"; $need_comma = 1; } # # MAIN # { $xml_doc = ""; # Read XML from STDIN while(<>) { $xml_doc .= $_; } # Parse the XML document $parser = XML::Parser->new(ErrorContext => 2); $parser->setHandlers(Init => \&init_handler, Final => \&final_handler, Start => \&start_handler, End => \&end_handler, Char => \&char_handler); eval { $parser->parse($xml_doc); }; if ($@) { die "Could not parse XML: $@\n"; } print $json_doc; }