cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski#!/usr/bin/perl -w
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski#use strict;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski############################################################
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# modifybatchpurls.pl
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski#
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# David Wood (david.wood@talis.com)
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# January 2011
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski#
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Script to modify PURLs in a PURLz v1.x server. PURLs are described in
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# the same XML format used by sendbatchpurls.pl.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski#
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Copyright 2011 Talis Inc. Licensed under the Apache License,
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Version 2.0 (the "License"); you may not use this file except in compliance
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# with the License. You may obtain a copy of the License at
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# or agreed to in writing, software distributed under the License is distributed
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# express or implied. See the License for the specific language governing
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# permissions and limitations under the License.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski#
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski###########################################################
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskipackage PURLz;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiuse LWP::UserAgent;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiuse HTTP::Cookies;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiuse HTTP::Request::Common qw(GET PUT POST);
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiuse URI::Escape;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski###########################################
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Complete/modify these variables
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy $server = 'localhost:8080'; # Hostname:Port
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy $userid = 'admin';
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy $passwd = 'password';
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy $directory = "batch_files";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski###########################################
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy $authurl = "http://$server/admin/login/login-submit.bsh";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy $workurl = "http://$server/admin/purl";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy $cookiefile = 'lwpcookies.txt';
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy $useoldfile = 0;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy $debug = 0;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# set up user agent with a cookie jar
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy $res;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy $req;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy $ua = LWP::UserAgent->new();
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy $cookie_jar = HTTP::Cookies->new(
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski file => $cookiefile,
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski autosave => 1,
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski ignore_discard => 1,
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski);
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski$ua->cookie_jar($cookie_jar);
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiprint STDERR "no old cookie jar file, so asking for new authorization\n"
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski if ($useoldfile && ! -e $cookiefile);
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiprint STDERR "set up cookie jar\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiunless ($useoldfile && -e $cookiefile) {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski # try the authorization call, saving the cookie
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski $req = POST($authurl, [id => $userid,
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski passwd => $passwd,
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski referrer => '/docs/index.html',
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski ]);
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski print STDERR "make auth request\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski $res = $ua->request($req);
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski print STDERR "save cookie jar file\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski $cookie_jar->save();
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski print STDERR "auth cookies retrieved:\n", $cookie_jar->as_string(), "\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski}
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiprint STDERR "load cookie jar from the file\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski$cookie_jar->load();
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski$ua->cookie_jar($cookie_jar);
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiprint STDERR "cookies loaded for update: \n", $cookie_jar->as_string(), "\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Open log file.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiopen LOG, ">purl_modifications.log" or die "Couldn't open log file: $!\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski# Get the contents of each file in the directory.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskimy @files = <$directory/*> or die "ERROR: Can't open directory $directory for reading: $!\n";;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiforeach my $file (@files) {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski # Get content from file
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski local $/=undef;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski open FILE, $file or die "Couldn't open file $file: $!\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski binmode FILE;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski # parse PURLs from the file;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski my $xml = <FILE>;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski my @purls = split(/<\/purl>/, $xml);
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski foreach my $purl (@purls) {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski # DBG
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski print "$purl\n\n" if $debug;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski my $params;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski my ($purlid, $type, $maintainers, $target, $seelaso);
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski if ( $purl =~ /<purl\s*id=\"(.*)\"\s*type=\"(.*)\">/s ) {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski $purlid = $1;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski $params .= "type=$2&maintainers=";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski if ( $purl =~ /<maintainers>(.*)<\/maintainers>/s ) {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski my $maints = $1;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski while ($maints =~ /<uid>(.*?)<\/uid>/g) {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski $params .= "$1,";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski }
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski while ($maints =~ /<gid>(.*?)<\/gid>/g) {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski $params .= "$1,";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski }
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski }
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski $params .= "&";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski if ( $purl =~ /<target\s*url=\"(.*)\"\/>/ ) {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski $params .= "target=" . uri_escape($1) . "&";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski } elsif ( $purl =~ /<seealso\s*url=\"(.*)\"\/>/ ) {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski $params .= "seealso=" . uri_escape($1);
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski }
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski $params =~ s/,&/&/;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski $params =~ s/&$//;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski } else {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski # Regexp failed to match. Either a bad XML entry or the tail end of the file.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski next;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski }
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski # DBG
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski if ( $debug ) { print STDERR "make update request\n"; }
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski my $url = $workurl . $purlid . '?' . $params;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski my $response = $ua->request(PUT $url);
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski if ( $debug ) {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski print $response->error_as_HTML unless $response->is_success;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski }
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski # Report results.
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski my $report = $response->as_string;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski if ( $report =~ m/Updated resource/ ) {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski print "$purlid OK\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski print LOG "$purlid OK\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski } else {
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski print "\nERROR: $purlid: $report\n\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski print LOG "\nERROR: $purlid: $report\n\n";
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski }
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski }
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski close FILE;
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski}
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowski
cbc7f7ea90538f481b528959e9b6cf837b0dd785Till Mossakowskiclose(LOG);