#!/usr/bin/perl -w # sinsyn: SINfest SYNdicator # An RSS feed for the webcomic Sinfest ( https://sinfest.xyz/ ) # Programmer: Triskaideka ( https://triskaideka.net/ ) # License: MIT ( https://en.wikipedia.org/wiki/MIT_License ) # (that's the license for this script, not the comic) # Version history: # 1.3.0, 2021 May: # Change Sinfest's domain name from "www.sinfest.net" to "sinfest.xyz" # 1.2.1, 2020 Nov: # Update HTTP URLs to HTTPS # 1.2.0, 2016 May: # Re-sort the feed into date order before saving the RSS file (thanks Shaun) # In titles, print the date in a more human-readable format # Changed TTL to one day minus one minute, just in case # Improved some comments # 1.1.0, 2015 Aug: # Put sinsyn's web address in the user-agent string, plus some minor refactoring # 1.0.0, 2015 Jun: # Initial release # Also see these other Sinfest feeds/generators: # https://github.com/jarek/sinfest-rss # https://github.com/xtaran/sinfest-gen-rss ### Pragmas and modules use warnings; use strict; use DateTime; # https://metacpan.org/pod/DateTime use LWP::UserAgent; # https://metacpan.org/pod/LWP::UserAgent use Try::Tiny; # https://metacpan.org/pod/Try::Tiny use XML::RSS; # https://metacpan.org/pod/XML::RSS ### Settings and variables # Settings that you might want to change if you're reusing this code elsewhere. Also see the calls to # $rss->channel() later on. my $script_name = 'sinsyn'; my $version = '1.3.0'; my $email = 'octopus@triskaideka.net'; # Is this your e-mail address? No? Then change it! my $web_addr = 'https://triskaideka.net/sinsyn/'; my $timeout = 60; # seconds before the UserAgent will give up on its request my $num_entries = 20; # number of published comics to keep in the RSS feed my $rssfile = 'sinfest.rss'; my $stable_url_portion = 'https://sinfest.xyz/view.php?date='; #my $stable_url_portion = 'http://localhost/sinsyn/view.php?date='; # for local testing # Other variable declarations my %dates; # keys will contain the dates we're going to include in the feed my $date = DateTime->now( time_zone => 'local' )->set_time_zone('floating'); ### Build the list of dates that we're concerned about, i.e. the last $num_entries days while ( scalar(keys %dates) < $num_entries ) { $dates{ $date->ymd } = 0; # create the key but set its value to something false for now $date->subtract( days => 1 ); } ### Open or create the RSS file my $rss = XML::RSS->new( version => '2.0' ); # Assume that the RSS file already exists and try to load it: try { $rss->parsefile($rssfile); # If something went wrong, like the file doesn't exist or couldn't be parsed, start a new one: } catch { $rss->channel( title => 'Sinfest', link => 'https://sinfest.xyz/', language => 'en-us', description => 'Sinfest is a long-running webcomic written and drawn by Tatsuya Ishida. This is an unofficial RSS feed for it.', webMaster => $email, docs => 'https://www.rssboard.org/rss-specification', # "Time To Live" -- the feed is safe to cache for this many minutes. # XML::RSS doesn't document this but seems to support it. # This feature is not well understood. For discussion, see: # https://rssweblog.com/?guid=20070529130637 # https://www.rssboard.org/rss-profile#element-channel-ttl ttl => (60 * 24) - 1, ); }; # These channel elements might change, so we re-set them every time $rss->channel( copyright => 'Sinfest is copyright (c) 2000-' . DateTime->now->year . ' by Tatsuya Ishida/Museworks', generator => "$script_name v$version, $web_addr", # There is a DateTime::Format::RSS module in CPAN, but when I checked I found that it had not been updated since 2008 # and does not support the RFC 822 date-time string required for RSS 2.0. The DateTime.pm documentation specifically # mentions the following format string as being compliant with RFC 822, which appears correct to me (see # http://www.faqs.org/rfcs/rfc822.html). lastBuildDate => DateTime->now->strftime("%a, %d %b %Y %H:%M:%S %z"), ); ### Create the user agent my $ua = LWP::UserAgent->new( agent => "$script_name/$version (+$web_addr) ", from => $email, protocols_allowed => [ 'http', 'https' ], requests_redirectable => [], timeout => $timeout, ); ### Make the requests DAY: foreach my $day (keys %dates) { my $url = $stable_url_portion . $day; # check that we don't already have a positive entry for this day (which in most cases we should) foreach my $e ( @{ $rss->{'items'} } ) { if ( $e->{'permaLink'} eq $url ) { next DAY; } } # if we haven't recorded a link for this day yet, then ask the server for it and see what the response is my $response = $ua->head( $url ); if ( $response->is_success() ) { # specifically, if it's a 2xx $dates{$day}++; # change its value from false to true } } ### Add entries to the feed foreach my $day (sort keys %dates) { if ($dates{$day}) { # if today's value was set to true earlier my ($y, $m, $d) = split '-', $day; # separate out the different parts of the date so we can give them to DateTime->new() $rss->add_item( title => "Sinfest comic for " . DateTime->new(year=>$y, month=>$m, day=>$d)->strftime("%A, %e %B %Y"), permaLink => $stable_url_portion . $day, ); } } ### Sort the feed in date order, in case it's gotten out of order somehow. # This isn't just cosmetic; it's important that the earliest entries are at the front of the list so that they're the # ones that get shifted off the list first. # Sorting by permaLink works because the URLs are all the same except for the date, which, thanks to the sensible # format, is naturally in alphabetical order. Sorting by title *wouldn't* work now that day names are spelled out in # the titles. @{$rss->{'items'}} = sort { $a->{'permaLink'} cmp $b->{'permaLink'} } @{$rss->{'items'}}; ### Remove older entries from the beginning of the feed while (@{$rss->{'items'}} > $num_entries) { shift (@{ $rss->{'items'} }); } ### Write the file to disk $rss->save($rssfile);