cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski############################################################
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# David Wood (david.wood@talis.com)
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# January 2011
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Script to modify PURLs in a PURLz v1.x server. PURLs are described in
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# the same XML format used by sendbatchpurls.pl.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Copyright 2011 Talis Inc. Licensed under the Apache License,
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Version 2.0 (the "License"); you may not use this file except in compliance
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# with the License. You may obtain a copy of the License at
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# or agreed to in writing, software distributed under the License is distributed
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# express or implied. See the License for the specific language governing
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# permissions and limitations under the License.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski###########################################################
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski###########################################
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Complete/modify these variables
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy $server = 'localhost:8080'; # Hostname:Port
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski###########################################
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy $authurl = "http://$server/admin/login/login-submit.bsh";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# set up user agent with a cookie jar
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiprint STDERR "no old cookie jar file, so asking for new authorization\n"
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski # try the authorization call, saving the cookie
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski print STDERR "auth cookies retrieved:\n", $cookie_jar->as_string(), "\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiprint STDERR "load cookie jar from the file\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiprint STDERR "cookies loaded for update: \n", $cookie_jar->as_string(), "\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Open log file.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiopen LOG, ">purl_modifications.log" or die "Couldn't open log file: $!\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Get the contents of each file in the directory.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy @files = <$directory/*> or die "ERROR: Can't open directory $directory for reading: $!\n";;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski # Get content from file
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski local $/=undef;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski open FILE, $file or die "Couldn't open file $file: $!\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski # parse PURLs from the file;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski my ($purlid, $type, $maintainers, $target, $seelaso);
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski if ( $purl =~ /<purl\s*id=\"(.*)\"\s*type=\"(.*)\">/s ) {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski if ( $purl =~ /<maintainers>(.*)<\/maintainers>/s ) {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski if ( $purl =~ /<target\s*url=\"(.*)\"\/>/ ) {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski $params .= "target=" . uri_escape($1) . "&";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski } elsif ( $purl =~ /<seealso\s*url=\"(.*)\"\/>/ ) {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski # Regexp failed to match. Either a bad XML entry or the tail end of the file.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski if ( $debug ) { print STDERR "make update request\n"; }
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski my $url = $workurl . $purlid . '?' . $params;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski print $response->error_as_HTML unless $response->is_success;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski # Report results.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski print "$purlid OK\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski print "\nERROR: $purlid: $report\n\n";