# NHSE Repository in a Box (RIB)
#
# The authors of this software are Paul McMahan and Jeff Horner.
# Copyright (c) 1997 by the University of Tennessee.
# Permission to use, copy, modify, and distribute this software for any
# purpose without fee is hereby granted, provided that this entire notice
# is included in all copies of any software which is or includes a copy
# or modification of this software and in all copies of the supporting
# documentation for such software.
# THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR UNIVERSITY OF TENNESSEE
# MAKE ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
# MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
#
# $Id: BIDMParser.pm,v 1.1.1.1 1997/12/10 15:59:34 jhorner Exp $
#
# $Log: BIDMParser.pm,v $
# Revision 1.1.1.1  1997/12/10 15:59:34  jhorner
# RIB pre 1.0
#
# Revision 1.1  1997/05/06 19:01:21  jhorner
# Initial revision
#
package RIB::BIDMParser;
use RIB::Util;
require HTML::Parser;
@ISA = qw(HTML::Parser);

use HTML::Entities ();
use LWP::UserAgent ();
use HTTP::Request ();
use HTTP::Date ();
use Data::Dumper ();


use strict;
use vars qw( $VERSION $DEBUG $DONE );
$DEBUG = 1;
$DONE = 0;
$VERSION = 0.9;

sub new {
    my $class = shift;
    my $self = bless HTML::Parser->new,$class;
    $self->{'META'} = {}; # Hash of name=>content pair. Each key of the hash
			   # is a BIDM string. Each value is a list
			   # of one or more values for the BIDM string.
    $self->{'LINK'} = {}; # Hash of rel=>href and and rev=>href pairs.

    $self->{'_errormsg'} = [];

    return $self;
}

sub parse {
    my $self = shift;
    eval { $self->SUPER::parse(@_) };
    if ($@) {
        print $@ if $DEBUG;
	$self->{'_buf'} = '';  # flush rest of buffer
        push @{$self->{'_errormsg'}}, "HTML::Parser::Parse Failed!";
        return '';
    }
    return 1;
}

sub ErrorMsg {
    shift @{shift->{'_errormsg'}};
}

sub parse_file {
    my ($self, $file) = @_;
    unless (open(F, $file)){
        push @{$self->{'_errormsg'}}, "Can't open $file: $!";
        return 0;
    }
    my $chunk = '';
    while(read(F, $chunk, 2048)) {
        $self->parse($chunk);
    }
    close(F);
    $self->parse(undef); #EOF
    return 1;
}

sub parse_url {
    my ($self,$url,$lm) = @_;
    my $ua = LWP::UserAgent->new("RIBcatalog/1.0");
    my $req = HTTP::Request->new('GET',$url);
    my $res = $ua->request($req);
    if ($res->is_success){
	$self->parse($res->content);
       # been modified since last modified time $lm
	# if it has, return HTTP RESPONCE CODE 304
	# and update $lm else return 1
	if (defined $lm && $res->last_modified > HTTP::Date::str2time($$lm)){ 
	    $$lm = HTTP::Date::time2str($res->last_modified);
	    return 304,$res->content; 
	} else {
	    return 1,$res->content;
	}
    } else {
	my $buf = $res->code . " - " . $res->message;
	push @{$self->{'_errormsg'}}, $buf;
	return 0;
    }
}

sub error_msg {
    my $self = shift;
    shift @{$self->{'_errormsg'}};
}

sub start {
    my ($self,$tag,$attr) = @_;

    if ( $tag eq "meta"){ 
	if (exists $attr->{'name'}){
	    my $buf = pretty($attr->{'name'});
	    my $field;
	    if ($buf =~ /^(.*)\.(.*)\.(.*)$/){
		unless (exists $self->{'CLASS'}){
		    $self->{'CLASS'} = $2;
		}
		$field = $3;
	    } elsif ($buf =~ /^(.*)\.(.*)$/){
		unless (exists $self->{'CLASS'}){
                    $self->{'CLASS'} = $1;
                }
                $field = $2;
	    }
	    unless (exists $self->{'META'}{$field}){
		$self->{'META'}{$field} = [];
	    }
	    my $array = $self->{'META'}{$field};
	    push @$array, pretty($attr->{'content'});
	}
    } elsif ($tag eq "link"){
	if (exists $attr->{'rel'}){
	    my $buf = pretty($attr->{'rel'});
	    my $field;
	    if ($buf =~ /^(.*)\.(.*)\.(.*)\.(.*)$/){
                unless (exists $self->{'CLASS'}){
                    $self->{'CLASS'} = $2;
                }
                $field = $3;
            } elsif ($buf =~ /^(.*)\.(.*)\.(.*)$/){
                unless (exists $self->{'CLASS'}){
                    $self->{'CLASS'} = $1;
                }
                $field = $2;
            }
	    unless (exists $self->{'LINK'}{$field}){
		$self->{'LINK'}{$field} = [];
	    }
	    my $array = $self->{'LINK'}{$field};
	    push @$array, pretty($attr->{'href'});
	}
	if (exists $attr->{'rev'}){
	    my $buf = pretty($attr->{'rev'});
	    my $field;
	    if ($buf =~ /^(.*)\.(.*)\.(.*)\.(.*)$/){
                unless (exists $self->{'CLASS'}){
                    $self->{'CLASS'} = $2;
                }
                $field = $3;
            } elsif ($buf =~ /^(.*)\.(.*)\.(.*)$/){
                unless (exists $self->{'CLASS'}){
                    $self->{'CLASS'} = $1;
                }
                $field = $2;
            }
	    unless (exists $self->{'LINK'}{$field}){
		$self->{'LINK'}{$field} = [];
	    }
	    my $array = $self->{'LINK'}{$field};
	    push @$array, pretty($attr->{'href'});
	}
    }
}

sub link {
    my $self = shift;
    return %{$self->{'LINK'}};
}

sub meta {
    my $self = shift;
    return %{$self->{'META'}};
}

sub pretty {
    my $buf = shift;
    HTML::Entities::decode($buf);
    $buf =~ s/^\s+//;
    $buf =~ s/\s+$//;
    $buf =~ s/\s+/ /g;

    return $buf;
}

sub valuesof {
    my ($self,$field) = @_;
    return @{$self->{'META'}->{$field}} if (exists $self->{'META'}->{$field});
    return @{$self->{'LINK'}->{$field}} if (exists $self->{'LINK'}->{$field});
    return ();
}

sub valueof {
    my ($self,$field) = @_;
    return () unless $self->valuesof($field);
    my @array = $self->valuesof($field);
    return $array[0];
}

1;
