package XML_DBI;
require 5.001;
##############################################################################
# $Id: XML_DBI.pm,v 1.1 2003/03/28 23:53:51 xwolf Exp $          #            
# 
# See the bottom of this file for the POD documentation.  Search for the
# string '=head'.
# You can run this file through either pod2man or pod2html to produce pretty
# documentation in manual or html file format (these utilities are part of the
# Perl 5 distribution).
#
# Copyright 1999-2000 Wolfgang Wiese.  All rights reserved.
# It may be used and modified freely, but I do request that this copyright
# notice remain attached to the file.  You may modify this module as you 
# wish, but if you redistribute a modified version, please attach a note
# listing the modifications you have made.
#
##############################################################################
# Last Modified on:	$Date: 2003/03/28 23:53:51 $
# By:			$Author: xwolf $
# Version:		$Revision: 1.1 $ 
##############################################################################
use strict;

BEGIN {
    use Exporter   ();
    use vars       qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

    # if using RCS/CVS, this may be preferred
    $XML_DBI::VERSION = do { my @r = (q$Revision: 1.1 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
    $XML_DBI::revision = '$Id: XML_DBI.pm,v 1.1 2003/03/28 23:53:51 xwolf Exp $';
    # The above must be all one line, for MakeMaker

    @ISA         = qw(Exporter);
    @EXPORT      = qw(&XML_to_Hash &XMLfile_to_Hash &Hash_to_XML &LineUp_Attributes);
    %EXPORT_TAGS = ( );     # eg: TAG => [ qw!name1 name2! ],

    # your exported package globals go here,
    # as well as any optionally exported functions
    @EXPORT_OK   = qw();
}
use vars      @EXPORT_OK;
##############################################################################
# Exported Subroutines
##############################################################################
sub Hash_to_XML {
  my $hashref = shift;  
  my %hash; 
  my $einrueck = shift;
  my $key;
  my $subkey;
  my $attribut;
  my $result;
  my $tree;

  if (not $hashref) {
    return;
  }
  %hash= %{$hashref};
  foreach $key (keys %hash) {
    if (ref($hash{$key}) eq 'HASH') {
      $tree =0;
      foreach $subkey (keys %{$hash{$key}}) {
       if ($subkey =~ /^\d+$/) {
          $tree =1;
          last;
        }
        if (($subkey eq 'VALUE') || ($subkey eq 'MODIFIER')) {
          $tree =2;
          last;
        }
      }
      if ($tree==1) {
        foreach $subkey (keys %{$hash{$key}}) {
          if (ref($hash{$key}{$subkey}{'MODIFIER'}) eq 'HASH') {
            $attribut = &LineUp_Attributes($hash{$key}{$subkey}{'MODIFIER'});
          } else {
            $attribut = "";
          }

          $result .= "$einrueck<$key$attribut>";
          if (($subkey =~ /^\d+/) && (ref($hash{$key}{$subkey}{'VALUE'}) eq 'HASH')) {
             $result .= "\n";
             $einrueck .= "\t";
             $result .= &Hash_to_XML($hash{$key}{$subkey}{'VALUE'},$einrueck);           
             chop($einrueck);         
             $result .= "$einrueck</$key>\n";
          } else {
            if ($hash{$key}{$subkey}{'VALUE'}) {
              if (ref($hash{$key}{$subkey}{'VALUE'}) eq 'HASH') { 
                $result .= "\n";
                $einrueck .= "\t";
                $result .= &Hash_to_XML($hash{$key}{$subkey}{'VALUE'},$einrueck);
                chop($einrueck);
                $result .= "$einrueck</$key>";
              } else {
                $result .= "$hash{$key}{$subkey}{'VALUE'}";
                $result .= "</$key>";
              }
            }         
            $result .= "\n";                    
          }
      }
    } elsif ($tree==2) {
          if (ref($hash{$key}{'MODIFIER'}) eq 'HASH') {
            $attribut = &LineUp_Attributes($hash{$key}{'MODIFIER'});
          } else {
            $attribut = "";
          }
          $result .= "$einrueck<$key$attribut>";
          if (ref($hash{$key}{'VALUE'}) eq 'HASH') {
             $result .= "\n";
             $einrueck .= "\t";
             $result .= &Hash_to_XML($hash{$key}{'VALUE'},$einrueck);           
             chop($einrueck);         
             $result .= "$einrueck</$key>\n";
          } 
          $result .= "\n";                    
    } else {
        $result .= "$einrueck<$key>\n";
        foreach $subkey (keys %{$hash{$key}}) {
          $einrueck .= "\t";
          $result .= "$einrueck<$subkey>$hash{$key}{$subkey}</$subkey>\n";
          chop($einrueck);         
        }          
        $result .= "$einrueck</$key>\n";
      }
    } else {
      $result .= "$einrueck<$key>$hash{$key}</$key>\n";
    }
  } 
  return $result;
}
##############################################################################
sub LineUp_Attributes {
  my $hashref = $_[0];
  my %attributes = %{$hashref};
  my $result;
  my $key;
  
  if (not %attributes) {
    return;
  }
  foreach $key (keys %attributes) {
    $result.= " $key=\"$attributes{$key}\"";
  }
  return $result; 
}
##############################################################################
sub XML_to_Hash {
  my $content = $_[0];
  my $origin_file = $_[1];
  my %workhash;
  my $dtd_file;
  my $dtd;
  
  if ($content =~ /<\?xml\s+(.*?)>/) {
     $content = $';
  }
  if ($content =~ /<!DOCTYPE\s+(.*?)\s+\[\s+(?:["\'\s]?)(.*?)(?:["\'\s]?)\s*\]>/i) {
    $dtd = $2;
    $content = $';
  } elsif ($content =~ /<!DOCTYPE\s+(.*?)\s+SYSTEM\s+(?:["\'\s]?)(.*?)(?:["\'\s]?)\s*>/i) {
    $dtd_file = $2;
    $content = $'; 
    if ($dtd_file =~ /^http/i) {
      use LWP::Simple;
      $dtd = get($dtd_file);
    } elsif (($dtd_file !~ /^\//) && ($origin_file =~ /\//)) {
      my $path = substr($origin_file,0,rindex($origin_file,"/")+1);
      $dtd_file = $path.$dtd_file;
    }
    if (-r $dtd_file) {
       open(XTH, "$dtd_file");
        while(<XTH>) {
          $dtd .= $_;
        }
       close XTH;
    }
  } 
  %workhash = &ParseContent($content);
  if ((not $workhash{'DTD'}) && ($dtd)) {
    $workhash{'DTD'} = $dtd;
  }
  return %workhash;
}
##############################################################################
sub XMLfile_to_Hash {
  my $file = $_[0];
  my $inhalt;
  
  if (-r $file) {
     open(XTH,"$file");
       while(<XTH>) {
         $inhalt .= $_;
       }
     close XTH;
    return &XML_to_Hash($inhalt, $file);
  } else {
    print STDERR "XMLfile_to_Hash: File \"$file\" not readable\n";
    return;
  }
}
##############################################################################
# Private Subroutines
##############################################################################
sub ParseContent {
  my $string = $_[0];
  my %result;
  my $inhalt;
  my $tagname;
  my $pre;
  my %modifier;
  my $key;
  my %work;
  my %namehash;
  
  $string =~ s/[\n\r]/ /g;
  while($string =~ /<.*>/) {
    $inhalt ="";
    $pre = "";
    $tagname ="";
    %work = ();
    %modifier = ();
    if ($string =~ m/<(.*?)\s*>/gi) {
      $tagname = $1;     
      $inhalt = $';  
      $pre = $`;    
      if ($tagname =~ /^(\w*)\s+(.*)/) {
        $tagname = $1;
        %modifier = &ParseTagKeys($2);
      } else {
        %modifier = ();
      }
    } 
    if ($inhalt =~ /<\/$tagname>/) {
      $inhalt = $`; 
      $string = $pre.$';
      if ($inhalt =~ /<.*>/) {
        %work = ParseContent($inhalt);
        $inhalt = "";   
      } else {
        %work = ();
      }
    } else {
      $string = $pre.$inhalt;
      $inhalt = "";
    }
    $namehash{$tagname}++;
    if ($namehash{$tagname}>1) {     
      if ($namehash{$tagname}==2) {
        my $lastwork;
        my $lastnum = $namehash{$tagname}-1;
        my %retter;
        if (ref($result{$tagname}) eq 'HASH') {
          if (not $result{$tagname}{'MODIFIER'}) {
            %retter = %{$result{$tagname}};
            delete $result{$tagname};
            %{$result{$tagname}{$lastnum}{'VALUE'}} = %retter;
          } else {          
            if (ref($result{$tagname}{'VALUE'}) eq 'HASH') {
              %{$result{$tagname}{$lastnum}{'VALUE'}} = %{$result{$tagname}{'VALUE'}};
              delete $result{$tagname}{'VALUE'};
            } else {
              $result{$tagname}{$lastnum}{'VALUE'} = $result{$tagname}{'VALUE'};
              delete $result{$tagname}{'VALUE'};
            }
          }
          if ($result{$tagname}{'MODIFIER'}) {  
            my %lastmodi =  %{$result{$tagname}{'MODIFIER'}};
            if (%{$result{$tagname}{'MODIFIER'}}) {        
              %{$result{$tagname}{$lastnum}{'MODIFIER'}} = %{$result{$tagname}{'MODIFIER'}};
              delete $result{$tagname}{'MODIFIER'};
            }      
          }
        } else {
          $lastwork = $result{$tagname};
          delete $result{$tagname};   
          $result{$tagname}{$lastnum}{'VALUE'}= $lastwork;   
          $result{$tagname}{$lastnum}{'MODIFIER'} = (); 
        }        
      }
      if (%modifier) {        
        %{$result{$tagname}{$namehash{$tagname}}{'MODIFIER'}} = %modifier;
        if (%work) {
          %{$result{$tagname}{$namehash{$tagname}}{'VALUE'}} = %work;
        } else {
          $result{$tagname}{$namehash{$tagname}}{'VALUE'} = $inhalt;
        }
      } else {
        if (%work) {
          %{$result{$tagname}{$namehash{$tagname}}{'VALUE'}} = %work;
        } else {
          $result{$tagname}{$namehash{$tagname}}{'VALUE'} = $inhalt;
        }      
      }     
    } else {
      if (%modifier) {        
        %{$result{$tagname}{'MODIFIER'}} = %modifier;      
        if (%work) {
          %{$result{$tagname}{'VALUE'}} = %work;
        } else {
          $result{$tagname}{'VALUE'} = $inhalt;
        }
      } else {
        if (%work) {
          %{$result{$tagname}} = %work;
        } else {
          $result{$tagname} = $inhalt;
        }      
      }
    }
   
  }
  return %result;
}
##############################################################################
sub ParseTagKeys {
  my $tagkeys = $_[0];
  my %field;
  my $name;
  my $value;
  
  while($tagkeys) {
    if ($tagkeys =~ /\s*(\w*)\s*=(?:["\'\s]?)([^"\']*)(?:["\'\s]?)/) {
      $name = $1;
      $value = $2;
      if ($name) {
        $field{$name} = $value;
      }
      $tagkeys = $`.$';
    } else {
      $tagkeys ="";
    }
  }
  return %field;
}
##############################################################################
# EOFunctions
##############################################################################
1;
__END__

=head1 NAME

XML_DBI - Functions to read in and write out wellformed XML-files
	  without needing the XML::Parser yet


=head1 SYNOPSIS

Example call:

	use XML_DBI;
	my %result = XMLfile_to_Hash("db/user.xml");

	use Data::Dumper;
	print Dumper(%result);


	exit;


=head1 ABSTRACT

Beneath the existing XML::Parser from Clark Cooper, this modul 
simply reads in a string or a file, parses it for XML-syntax and
pushes the result into a perlhash. It only requires  LWP::Simple
for getting access to DTD's, which are avaible over net only.

If there is a DTD avaible either in the XML-file itself or within
a DTD-file, which was in e.g. defined with

	<!DOCTYPE user SYSTEM "user.dtd">
	
it will be read and pushed into the hash too.

Also if the DTD will be read and saved, there won't be any
validy-check with the XML-file. A well-formed XML-file is assumed.
It's up to later versions or the XML::Parser to perform a check 
for valid document definitions.

=head1 DESCRIPTION

=head2 XMLfile_to_Hash

Reads in a file and returns a perlhash by invoking XML_to_Hash with
the content of the file.

=head2 XML_to_Hash

Reads in a string and parses its content into a perlhash, based
on the XML-syntax.
If a DTD is defined by <!DOCTYPE>, it will be read and added to
the hash too. Due to the fact, that the DTD won't be compared to
the real content of the XML-string, the content is treated as
wellformed (see http://www.w3.org/TR/REC-xml).


	

=head1 AUTHOR INFORMATION

Copyright 1999-2000 Wolfgang Wiese.  All rights reserved.
It may be used and modified freely, but I do request that this copyright
notice remain attached to the file.  You may modify this module as you 
wish, but if you redistribute a modified version, please attach a note
listing the modifications you have made.

Address bug reports and comments to:
xwolf@xwolf.com

=head1 CREDITS

Thanks very much to:

=over 4

=item Rolf Rost (rolfrost@yahoo.com)

=cut


