#!/bin/perl # ---------------------------------------------- # author(s) : Edward Corrado, Eric Thul # title : rss.pl # date : 2005.09.02 # version : 0.1 # # this script generates rss version 2.0 files # from "lst" files generated from an SQL query # there is a corresponding "ini" file which # stores the channel information and points to # the actual lst file which contains the items # for the rss document. # # to run this script, just pass it the path to # the configuration file which should be # formatted as (rss.ini): # channel-title="title" # channel-description="description" # channel-itemfile="/path/to/items.lst" # # note, separate each channel with a blank # line. # # an example of usage would be: # > ./rss.pl rss.ini # which will generate the rss file named: # items.xml, located in the current directory # ---------------------------------------------- # use lib '/home/dadt/www/lib'; use warnings; use strict; use XML::RSS; use CGI qw{ escapeHTML }; use URI::Escape; # ---------------------------------------------- &main($ARGV[0]); # ---------------------------------------------- # replaces any smart quotes with regular quotes # and uses the cgi's escape-html to replace '<' or '>' sub escape_chars { my ($str) = @_; # map smart quotes to regular quotes $str =~ tr/\221\222\223\224/''""/; return CGI::escapeHTML($str); } # removes trailing whitespace on a string sub remspace { my ($str) = @_; $str =~ s#\s+$##; return $str; } # returns the isbn from a string sub format_isbn { my ($isbn) = @_; $isbn =~ m#(\d+)#; return $1; } # returns the proper rss formatted date sub format_nowdate { my @datetime = localtime(time); my $months = { 0 => 'Jan', 1 => 'Feb', 2 => 'Mar', 3 => 'Apr', 4 => 'May', 5 => 'Jun', 6 => 'Jul', 7 => 'Aug', 8 => 'SEP', 9 => 'Oct', 10=> 'Nov', 11=> 'Dec', }; my $days = { 0 => 'Sun', 1 => 'Mon', 2 => 'Tue', 3 => 'Wed', 4 => 'Thu', 5 => 'Fri', 6 => 'Sat', }; my $str = ''; $str = $days->{$datetime[6]}.", ".sprintf("%02d",$datetime[3]); $str .= " ".$months->{$datetime[4]}." "; $str .= $datetime[5]+1900; $str .= " $datetime[2]:$datetime[1]:$datetime[0] EDT"; return $str; } # sets the to either the isbn # or the title of the item sub format_link { my ($isbn,$title) = @_; my $linkurl = 'http://libcat.tcnj.edu/cgi-bin/Pwebrecon.cgi?DB=local&Search_Arg='; my $link = ''; if ($isbn) { $link = $linkurl.'ISBN+%22'.&format_isbn($isbn). '%22&SL=None&Search_Code=CMD&CNT=10'; } else { $link = $linkurl.&uri_escape(&remspace($title)). '&Search_Code=TALL&CNT=10'; } return $link; } # returns the description formatted as: # LongTitle. Location: Loc. Temporarily Shevled at: temp. Call Number: #. sub format_descr { my ($tlong,$loc,$temp,$call) = @_; my $descr = ''; $descr .= &remspace($tlong).($tlong !~ m#\.\s*$# ?'.':''); $descr .= ' Location: '.&remspace($loc).($loc !~ m#\.\s*$# ?'.':''); if ($temp) { $descr .= ' Temporarily Shelved at: '.&remspace($temp).($temp !~ m#\.\s*$# ?'.':''); } $descr .= ' Call Number: '; $descr .= &remspace($call) || 'Ask at Reference Desk'; return $descr; } # iterates through all the items in the lst file # and builds an array of hashed for each item # then returns the array sub fetch_items { my ($itemfile) = @_; my $items = []; my $i = 0; open (IN, "< $itemfile") || die "cannot read from $itemfile:\n"; while () { # skip blank lines in the file next if $_ =~ m#^$#; # replace trailing whitespace $_ =~ s#\s+$##; # break up the line 'x|y|z' into an array my @item_array = split(/\|/,$_); $items->[$i]->{pubDate} = &remspace($item_array[0]); $items->[$i]->{link} = &format_link(@item_array[1,2]); $items->[$i]->{title} = &remspace($item_array[2]); $items->[$i]->{description} = &format_descr(@item_array[3,4,5,6]); $items->[$i]->{author} = 'corrado@tcnj.edu'; $i++; } close (IN); return $items; } # reads the config (rss.ini) file and # creates an array of hashes where each # element in the array represents a channel # returns an array of channels sub parse_config { my ($ini) = @_; my $i = 0; my $channels = []; open (IN,"< $ini") || die "cannot read from $ini:\n"; while () { # new array index on blank line if ($_ =~ m#^$#) { $i++; } elsif ($_ =~ m#^channel\-([\w\d]+)\="([-.\w\s\d]+)"$#i) { # the above matches the name=value pairs # in the config file for each channel $channels->[$i]->{uc($1)} = $2; } } close (IN); return $channels; } # uses the XML::RSS module to generate # version 2.0 (atom) rss sub gen_rss { my ($channel,$cgi) = @_; my ($rss,$now_date,$date,$lastbuild,$items); # returns an array reference of hash-references # for example: $items->[0]->{TITLE} is the # first item's title $items = fetch_items($channel->{ITEMFILE}); # retrieves the current date, see perldoc -f localtime @{$now_date} = localtime(time()); if ($items->[0]) { $lastbuild = $items->[0]->{pubDate}; } # get today's date/time formated to rss spec $date = &format_nowdate(); # begin rss creation $rss = new XML::RSS(version => '2.0',encoding=>'UTF-8'); # create the channel $rss->channel( title => &escape_chars($channel->{TITLE}), link => 'http://www.tcnj.edu/', description => &escape_chars($channel->{DESCRIPTION}), copyright => 'Copyright ' . ($now_date->[5]+1900) . ', The College of New Jersey', pubDate => $date, lastBuildDate => $lastbuild, language => 'en-us', generator => 'The "Webalerts" News & Events System', webMaster => 'dadt@tcnj.edu', docs => 'http://blogs.law.harvard.edu/tech/rss', ); # set the image $rss->image( title => 'TCNJ Logo', url => 'http://www.tcnj.edu/media/tcnjlogosmall.gif', link => 'http://www.tcnj.edu', description => 'TCNJ Logo', ); # iterate through the items # and add each item to the channel for my $item (@{$items}) { $rss->add_item( title => &escape_chars($item->{title}), link => $item->{link}, description => &escape_chars($item->{description}), pubDate => $item->{pubDate}, author => $item->{author}, ); } return $rss->as_string(); } # ---------------------------------------------- sub main { my ($ini) = @_; my $cgi = CGI->new(); my $channels = []; my $doc = ''; if ($ini) { # retrieve an array of all the channels $channels = parse_config($ini); # iterate through the channels to build each one for my $channel (@{$channels}) { # create the rss document based on the channel $doc = &gen_rss($channel,$cgi); # replace the 'lst' extention with 'xml' for # the output file $channel->{ITEMFILE} =~ s#\.[\w]+#.xml#i; # write the xml file to the current directory open (RSS, '> '.$channel->{ITEMFILE}) || die 'cannot write rss file:'.$channel->{ITEMFILE}; print RSS $doc; close (RSS); } } else { print STDERR "error: usage: rss.pl /path/to/ini\n"; } } # ----------------------------------------------