#!/usr/bin/perl # Copyright 2007 Gérald Sédrati-Dinet # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA use strict; use warnings; use Sleepycat::DbXml 'simple'; use XML::Twig; use LWP::UserAgent; use Getopt::Long; # Some files will be save under tree hierarchy based on executable location use FindBin qw($Bin); use lib "$Bin"; # Default values my $path2DbEnv = "$Bin/dbxml"; # Parse command line options Getopt::Long::Configure("bundling"); GetOptions( 'path2dbenv|p=s' => \$path2DbEnv, 'help|h' => sub { print STDERR <] where is the wiki name of the politician and can be candidates, mps, meps or ministers (for convenience the final "s" can be omitted, also ".dbxml" can be appended) Options can be: --path2dbenv, -p : path to directory of DB XML database default: $Bin/dbxml --help, -h: print this message USAGE exit 0; } ); my $wiki_name = shift or die "You should provide the wiki name of the politician, see $0 -h\n"; my $theContainer = shift or die "You should provide the name of the container, see $0 -h\n"; $theContainer =~ s/s?(?:\.dbxml)?$/s/; # Create a user agent object my $ua = LWP::UserAgent->new; $ua->agent("$0 (gibus perl script to fetch information from wiki.ffii.fr)"); my $headers = HTTP::Headers->new('Accept-Language' => 'fr'); # Fetch the wiki page my $wiki_url = "http://wiki.ffii.fr/spikini/MemoirePolitique$wiki_name"; my $wiki_req = HTTP::Request->new(GET => $wiki_url, $headers); my $wiki_res = $ua->request($wiki_req); unless ($wiki_res->is_success) { die "Error fetching wiki page: $wiki_url: ", $wiki_res->status_line, "\n"; } my $wiki_html = $wiki_res->content; die "Unknown wiki page for $wiki_name\n" if $wiki_html =~ /Vous n'êtes pas autorisé à lire cette page/o; # Extract opinions (my $opinions_str) = ($wiki_html =~ /
  • Google<\/a><\/li><\/ul>(.+?)
    \s*<\/div>\s*