# NHSE Repository in a Box (RIB)
#
# The authors of this software are Paul McMahan and Jeff Horner.
# Copyright (c) 1997 by the University of Tennessee.
# Permission to use, copy, modify, and distribute this software for any
# purpose without fee is hereby granted, provided that this entire notice
# is included in all copies of any software which is or includes a copy
# or modification of this software and in all copies of the supporting
# documentation for such software.
# THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR UNIVERSITY OF TENNESSEE
# MAKE ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE
# MERCHANTABILITY OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
#
# $Id: check_nonlocal_links.pl,v 1.1.1.1 1997/12/10 15:59:32 jhorner Exp $
#
# $Log: check_nonlocal_links.pl,v $
# Revision 1.1.1.1  1997/12/10 15:59:32  jhorner
# RIB pre 1.0
#
# Revision 1.2  1997/05/26 13:37:06  mcmahan
# removed extraneous reference to $cgipath - the script should have just
# used a relative link to the current directory rather than trying to figure
# out what the path to the cgi-bin was
#
# Revision 1.1  1997/05/06 18:15:29  jhorner
# Initial revision
#
use strict;
use RIB::Util ();
use LWP::UserAgent ();
use HTTP::Request ();
use HTML::Entities ();

$|=1; # unbuffer

my $util = RIB::Util->new();
$util->PrintHeader();

my $repository = $util->GetRepoName();
if (!$repository) {
  $util->ErrorMessage("repository was not specifed in your input.");
}

my $filepath = $util->GetRibDir() . "/repositories/" . $repository;
my $urlpath  = $util->GetRibUrl() . "/repositories/" . $repository;

my $back_to_top = $util->BackToTop($repository);

my $top = "<head><title>Check catalog links for  "
     . "$repository</title></head>\n"
     . "<body bgcolor=#FFFFF0>"
     . "<center>"
     . "<h1>Check catalog links for $repository</h1>\n"
     . "</center>"
     . "<p><hr><p>\n";

unless (-f "$filepath/catalog/.nonlocal" or -s "$filepath/catalog/.nonlocal") {
  print $top;
  print "This repository's catalog contains no links to foreign Assets.\n";
  print $back_to_top;
  print "</body>\n";
  exit;
}

unless ($util->InitNonLocal($repository)) {
   $util->ErrorMessage('Access to the links is currently locked.'
       .' Please try pressing the Reload button in your web client.'
       .' If this message persists, contact your RIB administrator<br>');
}

print $top;
print "As RIB checks each of the urls linked to by this repository's\n";
print "software catalog you will see the output of each attempt to\n";
print "check the document which that url points to.  When this process is\n";
print "finally completed (you'll know because your browser will stop\n";
print "loading) you can click <a href=#summary>here</a> to view a\n";
print "summary of the output.  Be sure to wait until the browser has\n";
print "stopped or the summary won't be printed yet.\n";
print "<p>\n";
print "Note that it is not always possible to check the modification time\n";
print "of a foreign Asset because ";
print "some HTTP servers do not always specify a value for the \n";
print "&quot;Last-Modified&quot; field in their\n";
print "HTTP response headers.\n";
print "If this is the case then RIB will report '<i>unknown</i>'\n";
print "in its output.<p><hr>\n";

my ($link,@known,@unknown,@broken,@updated,@triplet);

foreach $link ( $util->NonLocalLink ){
  my $url = $link->{URL};
  print "$url<br>\n<ul>\n";
  my $ua = LWP::UserAgent->new("RIB/1.0");
  my $req = HTTP::Request->new('GET',$url);
  if ($link->{'LM'} ne ''){
      $req->push_header('If-Modified-Since',$link->{'LM'});
  }
  my $res = $ua->request($req);
  if ($res->code == 304){
     # the url content has NOT been modified
     print "<li>Updated: no\n";
  } elsif (!$res->last_modified()) {
     print "<i>unknown</i>\n";
     push (@unknown, $url);
  } elsif ($res->code == 200) {
     # Update the catalog entry
     #$util->UpdateNonLocal($link,$res);
     print "<li>Updated: yes\n";
     push (@updated, $url);
  } elsif ($res->is_error){
    print "<li>Couldn't retrieve document : status code "
                     . $res->code. " : ". $res->message;
    push (@broken, [$url, $res->code, $res->message]);
  }  
  #if ($res->is_error){
  #  print "<li>Couldn't retrieve document : status code "
  #                   . $res->code. " : ". $res->message;
  #  push (@broken, [$url, $res->code, $res->message]);
  #}
  #else {
  #  print "<li>Updated: ";
  #  if (!$res->last_modified()) {
  #    print "<i>unknown</i>\n";
  #    push (@unknown, $url);
  #  } elsif ($last_mod_date < $res->last_modified()) {
  #    print "yes\n";
  #    push (@updated, $url);
  #  } else {
  #    print "no\n";
  #  }
  #}
  print "</ul>\n";
  print "<p>\n";
}
$util->CommitNonLocal();
print "<hr>\n";
print "<a name=summary></a>\n";
print "<strong><font size=+2>Summary</font></strong><p>\n";
print "Links that have been modified since last catalog creation:\n";
print "<ul>\n";
print "none\n" unless @updated;
my $url;
foreach $url (@updated) {
  print "<li><a href=$url>$url</a>\n<br>";
}
print "</ul>\n";

print "Links whose last modification times are unknown :\n";
print "<ul>\n";
print "none\n" unless @unknown;
foreach $url (@unknown) {
  print "<li><a href=$url>$url</a>\n<br>";
}
print "</ul>\n";

print "<p>\n";
print "Links that could not be checked:\n";
print "<ul>\n";
print "none\n" unless @broken;
my $triplet;
foreach $triplet (@broken) {
  my ($url,$code,$message) = @{$triplet};
  print "<li><a href=$url>$url</a>\n<br>";
  print "Reason: status code $code : $message<br>\n";
  print "<a href=delete_nonlocal_link.pl?url=$url>";
  print "(remove catalog's link to this Asset)</a>\n";
}
print "</ul>\n";

print $back_to_top;
print "</body>\n";
