#!/bin/perl
# ----------------------------------------------
# author(s) : Edward Corrado, Eric Thul
# title : rss.pl
# date : 2005.09.02
# version : 0.1
#
# this script generates rss version 2.0 files
# from "lst" files generated from an SQL query
# there is a corresponding "ini" file which
# stores the channel information and points to
# the actual lst file which contains the items
# for the rss document.
#
# to run this script, just pass it the path to
# the configuration file which should be
# formatted as (rss.ini):
# channel-title="title"
# channel-description="description"
# channel-itemfile="/path/to/items.lst"
#
# note, separate each channel with a blank
# line.
#
# an example of usage would be:
# > ./rss.pl rss.ini
# which will generate the rss file named:
# items.xml, located in the current directory
# ----------------------------------------------
# use lib '/home/dadt/www/lib';
use warnings;
use strict;
use XML::RSS;
use CGI qw{ escapeHTML };
use URI::Escape;
# ----------------------------------------------
&main($ARGV[0]);
# ----------------------------------------------
# replaces any smart quotes with regular quotes
# and uses the cgi's escape-html to replace '<' or '>'
sub escape_chars
{
my ($str) = @_;
# map smart quotes to regular quotes
$str =~ tr/\221\222\223\224/''""/;
return CGI::escapeHTML($str);
}
# removes trailing whitespace on a string
sub remspace
{
my ($str) = @_;
$str =~ s#\s+$##;
return $str;
}
# returns the isbn from a string
sub format_isbn
{
my ($isbn) = @_;
$isbn =~ m#(\d+)#;
return $1;
}
# returns the proper rss formatted date
sub format_nowdate
{
my @datetime = localtime(time);
my $months = {
0 => 'Jan', 1 => 'Feb', 2 => 'Mar', 3 => 'Apr',
4 => 'May', 5 => 'Jun', 6 => 'Jul', 7 => 'Aug',
8 => 'SEP', 9 => 'Oct', 10=> 'Nov', 11=> 'Dec',
};
my $days = {
0 => 'Sun', 1 => 'Mon', 2 => 'Tue',
3 => 'Wed', 4 => 'Thu', 5 => 'Fri', 6 => 'Sat',
};
my $str = '';
$str = $days->{$datetime[6]}.", ".sprintf("%02d",$datetime[3]);
$str .= " ".$months->{$datetime[4]}." ";
$str .= $datetime[5]+1900;
$str .= " $datetime[2]:$datetime[1]:$datetime[0] EDT";
return $str;
}
# sets the to either the isbn
# or the title of the item
sub format_link
{
my ($isbn,$title) = @_;
my $linkurl = 'http://libcat.tcnj.edu/cgi-bin/Pwebrecon.cgi?DB=local&Search_Arg=';
my $link = '';
if ($isbn)
{
$link = $linkurl.'ISBN+%22'.&format_isbn($isbn).
'%22&SL=None&Search_Code=CMD&CNT=10';
}
else
{
$link = $linkurl.&uri_escape(&remspace($title)).
'&Search_Code=TALL&CNT=10';
}
return $link;
}
# returns the description formatted as:
# LongTitle. Location: Loc. Temporarily Shevled at: temp. Call Number: #.
sub format_descr
{
my ($tlong,$loc,$temp,$call) = @_;
my $descr = '';
$descr .= &remspace($tlong).($tlong !~ m#\.\s*$# ?'.':'');
$descr .= ' Location: '.&remspace($loc).($loc !~ m#\.\s*$# ?'.':'');
if ($temp)
{
$descr .= ' Temporarily Shelved at: '.&remspace($temp).($temp !~ m#\.\s*$# ?'.':'');
}
$descr .= ' Call Number: ';
$descr .= &remspace($call) || 'Ask at Reference Desk';
return $descr;
}
# iterates through all the items in the lst file
# and builds an array of hashed for each item
# then returns the array
sub fetch_items
{
my ($itemfile) = @_;
my $items = [];
my $i = 0;
open (IN, "< $itemfile") || die "cannot read from $itemfile:\n";
while ()
{
# skip blank lines in the file
next if $_ =~ m#^$#;
# replace trailing whitespace
$_ =~ s#\s+$##;
# break up the line 'x|y|z' into an array
my @item_array = split(/\|/,$_);
$items->[$i]->{pubDate} = &remspace($item_array[0]);
$items->[$i]->{link} = &format_link(@item_array[1,2]);
$items->[$i]->{title} = &remspace($item_array[2]);
$items->[$i]->{description} = &format_descr(@item_array[3,4,5,6]);
$items->[$i]->{author} = 'corrado@tcnj.edu';
$i++;
}
close (IN);
return $items;
}
# reads the config (rss.ini) file and
# creates an array of hashes where each
# element in the array represents a channel
# returns an array of channels
sub parse_config
{
my ($ini) = @_;
my $i = 0;
my $channels = [];
open (IN,"< $ini") || die "cannot read from $ini:\n";
while ()
{
# new array index on blank line
if ($_ =~ m#^$#)
{
$i++;
}
elsif ($_ =~ m#^channel\-([\w\d]+)\="([-.\w\s\d]+)"$#i)
{
# the above matches the name=value pairs
# in the config file for each channel
$channels->[$i]->{uc($1)} = $2;
}
}
close (IN);
return $channels;
}
# uses the XML::RSS module to generate
# version 2.0 (atom) rss
sub gen_rss
{
my ($channel,$cgi) = @_;
my ($rss,$now_date,$date,$lastbuild,$items);
# returns an array reference of hash-references
# for example: $items->[0]->{TITLE} is the
# first item's title
$items = fetch_items($channel->{ITEMFILE});
# retrieves the current date, see perldoc -f localtime
@{$now_date} = localtime(time());
if ($items->[0])
{
$lastbuild = $items->[0]->{pubDate};
}
# get today's date/time formated to rss spec
$date = &format_nowdate();
# begin rss creation
$rss = new XML::RSS(version => '2.0',encoding=>'UTF-8');
# create the channel
$rss->channel(
title => &escape_chars($channel->{TITLE}),
link => 'http://www.tcnj.edu/',
description => &escape_chars($channel->{DESCRIPTION}),
copyright => 'Copyright ' . ($now_date->[5]+1900) .
', The College of New Jersey',
pubDate => $date,
lastBuildDate => $lastbuild,
language => 'en-us',
generator => 'The "Webalerts" News & Events System',
webMaster => 'dadt@tcnj.edu',
docs => 'http://blogs.law.harvard.edu/tech/rss',
);
# set the image
$rss->image(
title => 'TCNJ Logo',
url => 'http://www.tcnj.edu/media/tcnjlogosmall.gif',
link => 'http://www.tcnj.edu',
description => 'TCNJ Logo',
);
# iterate through the items
# and add each item to the channel
for my $item (@{$items})
{
$rss->add_item(
title => &escape_chars($item->{title}),
link => $item->{link},
description => &escape_chars($item->{description}),
pubDate => $item->{pubDate},
author => $item->{author},
);
}
return $rss->as_string();
}
# ----------------------------------------------
sub main
{
my ($ini) = @_;
my $cgi = CGI->new();
my $channels = [];
my $doc = '';
if ($ini)
{
# retrieve an array of all the channels
$channels = parse_config($ini);
# iterate through the channels to build each one
for my $channel (@{$channels})
{
# create the rss document based on the channel
$doc = &gen_rss($channel,$cgi);
# replace the 'lst' extention with 'xml' for
# the output file
$channel->{ITEMFILE} =~ s#\.[\w]+#.xml#i;
# write the xml file to the current directory
open (RSS, '> '.$channel->{ITEMFILE}) ||
die 'cannot write rss file:'.$channel->{ITEMFILE};
print RSS $doc;
close (RSS);
}
}
else
{
print STDERR "error: usage: rss.pl /path/to/ini\n";
}
}
# ----------------------------------------------