#!/usr/bin/perl # Copyright 2007 Gérald Sédrati-Dinet # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA use strict; use warnings; use Sleepycat::DbXml 'simple'; use XML::Twig; use Getopt::Long; use Tie::IxHash; # Some files will be save under tree hierarchy based on executable location use FindBin qw($Bin); use lib "$Bin"; # Default values my $path2DbEnv = "$Bin/dbxml"; my $theContainer = 'mps.dbxml'; my $output_dir = "$Bin/spikini/mps"; # Parse command line options Getopt::Long::Configure("bundling"); GetOptions( 'path2dbenv|p=s' => \$path2DbEnv, 'container|c=s' => \$theContainer, 'output|o=s' => \$output_dir, 'help|h' => sub { print STDERR <] [-c ] [-o Options can be: --path2dbenv, -p : path to directory of DB XML database default: $Bin/dbxml --container, -c : DB XML container is typically mps (for convenience the final "s" can be omitted, also ".dbxml" can be appended) default: mps.dbxml --output, -o : output directory of spikini pages default: $Bin/spikini/mps --help, -h: print this message USAGE exit 0; } ); $theContainer =~ s/s?(?:\.dbxml)?$/s/; my $mp_wiki = shift or die "You should provide the wiki name of the Member of the Parliament, see $0 -h\n"; # Open a container in the db environment my $env = new DbEnv(0); $env->set_cachesize(0, 64 * 1024, 1); $env->open($path2DbEnv, Db::DB_INIT_MPOOL|Db::DB_CREATE|Db::DB_INIT_LOCK|Db::DB_INIT_LOG); my $theMgr = new XmlManager($env); my $container = $theMgr->openContainer("$theContainer.dbxml"); # Get the document eval { my $theDocument = $container->getDocument($mp_wiki); my $docString = $theDocument->getContent(); # Build xml tree my $xml = XML::Twig->new(output_encoding => 'ISO-8859-1')->parse($value); # Extract infos my $infos = $xml->root->first_child('infos'); my $name = $infos->first_child('name'); my $gender = $name->first_child_text('gender'); my $role = 'député'.(($gender eq 'M.')?'':'e'); my $first_name = $name->first_child_text('first'); my $last_name = $name->first_child_text('last'); my $wiki_name = $name->first_child_text('wiki'); my $pic = $infos->first_child_text('picture'); my $birth = $infos->first_child('birth'); my $birth_date = $birth->first_child('date'); my $birth_place = $birth->first_child('place'); my $birth_str = sprintf "{%s le} %02d %s %04d {à} %s (%s)", $birth->first_child_text('gender'), $birth_date->first_child_text('day'), $birth_date->first_child_text('month'), $birth_date->first_child_text('year'), $birth_place->first_child_text('city'), $birth_place->first_child_text('department'); my $constituency = $infos->first_child('constituency'); my $constituency_dep = sprintf "%s (%s)", $constituency->first_child('department')->first_child_text('name'), $constituency->first_child('department')->first_child_text('number'); my $constituency_nb = $constituency->first_child_text('number'); my $constituency_cantons = join ', ', $constituency->first_child('cantons')->children_text('name'); my $constituency_wiki = desaccent($constituency->first_child('department')->first_child_text('name')); my $grp = $infos->first_child('group'); my $grp_str = $grp->first_child_text('name'); my $grp_ap = $grp->att('status') if $grp->att('status'); $grp_ap .= 'e' if $grp_ap and $gender ne 'M.'; $grp_str = "$grp_ap $grp_str" if $grp_ap; $grp_str .= ' ('.$grp->att('role').')' if $grp->att('role'); my $party = $grp->first_child_text('abbreviation'); my $grp_wiki; if ($grp->first_child_text('name') eq "Députés n'appartenant à aucun groupe") { $grp_wiki = 'AucunGroupe' } else { ($grp_wiki = $party) =~ s/^Ap\. //o; $grp_wiki = ucfirst(lc($grp_wiki)); } my $prof = $infos->first_child_text('profession'); my $contact = $xml->root->first_child('contact'); my $address = "-* " . join("\n-* ", map({($_->first_child('street')?$_->first_child_text('street').',':'').($_->first_child('postcode')?' '.$_->first_child_text('postcode'):'').($_->first_child('city')?' '.$_->first_child_text('city'):'').($_->first_child('phone')?(' - Tél. : '.join(' - Tél. : ', $_->children_text('phone'))):'').($_->first_child('fax')?(' - Fax : '.join(' - Fax : ', $_->children_text('fax'))):'')} $contact->children('address')))."\n"; # Hack for telephone/fax without address $address =~ s/^-\* - /-* /om; my $mail = $contact->first_child_text('email'); my $mail_str; (($mail_str = $mail) =~ s/@/(à)/o) if $mail; my @webs = map { (my $web_str = $_->text) =~ s#^http://##o; my $type=$_->att('type')?ucfirst($_->att('type')):'Site web'; [$_->text(), $web_str, $type]; } $contact->children('web'); # Group functions by type my $functions = $xml->root->first_child('functions'); my %function; foreach my $func ($functions->children('function')) { my $func_val; my $type = $func->first_child_text('type'); my $label = $func->first_child_text('label'); my $role = $func->first_child_text('role'); my $mission = $func->first_child_text('mission'); if ($type eq 'Commission') { $label = "[$label -> MemoirePolitiqueDeputes".wikify_com($label)."]" } if ($mission) { $func_val = "$role au nom de la $label ($mission)"; } else { $func_val = ($label?$label:'').($role?" ($role)":''); } if (exists $function{$type}) { $function{$type} .= ", $func_val"; } else { if ($type eq 'Mandat') { $function{$func_val} = ''; } else { $function{$type} = $func_val; } } } # Group mandates by type and by institution or role my $mandates = $xml->root->first_child('mandates'); my %mandate; foreach my $mandate ($mandates->children('mandate')) { my $type = $mandate->first_child_text('type'); if ($mandate->att('current') eq 'true') { $type .= ' en cours' if $type ne 'Organismes extra-parlementaires'; # Hack for "Mandats et fonctions à l'Assemblée nationale" $type =~ s/Mandats et fonctions à l'Assemblée nationale en cours/Mandats et fonctions en cours à l'Assemblée nationale/o; } else { $type =~ s/^Mandats/Anciens mandats/o; $type =~ s/^Fonctions/Anciennes fonctions/o; } my $institution_or_role = 'null'; my $mandate_str = ''; if ($mandate->first_child('institution')) { $institution_or_role = $mandate->first_child_text('institution'); if ($mandate->first_child('election_date')) { $mandate_str .= sprintf("Élections du %02d/%02d/%04d - ", $mandate->first_child('election_date')->first_child_text('day'), $mandate->first_child('election_date')->first_child_text('month'), $mandate->first_child('election_date')->first_child_text('year')); } if ($mandate->first_child('begin_term')) { $mandate_str .= sprintf("Mandat du %02d/%02d/%04d ", $mandate->first_child('begin_term')->first_child_text('day'), $mandate->first_child('begin_term')->first_child_text('month'), $mandate->first_child('begin_term')->first_child_text('year')); $mandate_str .= sprintf("(%s) ", $mandate->first_child('begin_term')->first_child_text('reason')) if $mandate->first_child('begin_term')->first_child('reason'); } if ($mandate->first_child('end_term')) { $mandate_str .= sprintf("au %02d/%02d/%04d", $mandate->first_child('end_term')->first_child_text('day'), $mandate->first_child('end_term')->first_child_text('month'), $mandate->first_child('end_term')->first_child_text('year')); $mandate_str .= sprintf(" (%s)", $mandate->first_child('end_term')->first_child_text('reason')) if $mandate->first_child('end_term')->first_child('reason'); } $mandate_str .= sprintf(" : %s", $mandate->first_child_text('role')) if $mandate->first_child('role'); } elsif ($mandate->first_child('role')) { $institution_or_role = $mandate->first_child_text('role'); if ($mandate->first_child('begin_term')) { $mandate_str .= sprintf("Mandat du %02d/%02d/%04d ", $mandate->first_child('begin_term')->first_child_text('day'), $mandate->first_child('begin_term')->first_child_text('month'), $mandate->first_child('begin_term')->first_child_text('year')); $mandate_str .= sprintf("(%s) ", $mandate->first_child('begin_term')->first_child_text('reason')) if $mandate->first_child('begin_term')->first_child('reason'); } if ($mandate->first_child('end_term')) { $mandate_str .= sprintf("au %02d/%02d/%04d", $mandate->first_child('end_term')->first_child_text('day'), $mandate->first_child('end_term')->first_child_text('month'), $mandate->first_child('end_term')->first_child_text('year')); $mandate_str .= sprintf(" (%s)", $mandate->first_child('end_term')->first_child_text('reason')) if $mandate->first_child('end_term')->first_child('reason'); } } else { if ($mandate->first_child('election_date')) { $mandate_str .= sprintf("Élections du %02d/%02d/%04d - ", $mandate->first_child('election_date')->first_child_text('day'), $mandate->first_child('election_date')->first_child_text('month'), $mandate->first_child('election_date')->first_child_text('year')); } if ($mandate->first_child('begin_term')) { $mandate_str .= sprintf("Mandat du %02d/%02d/%04d ", $mandate->first_child('begin_term')->first_child_text('day'), $mandate->first_child('begin_term')->first_child_text('month'), $mandate->first_child('begin_term')->first_child_text('year')); $mandate_str .= sprintf("(%s) ", $mandate->first_child('begin_term')->first_child_text('reason')) if $mandate->first_child('begin_term')->first_child('reason'); } if ($mandate->first_child('end_term')) { $mandate_str .= sprintf("au %02d/%02d/%04d", $mandate->first_child('end_term')->first_child_text('day'), $mandate->first_child('end_term')->first_child_text('month'), $mandate->first_child('end_term')->first_child_text('year')); $mandate_str .= sprintf(" (%s)", $mandate->first_child('end_term')->first_child_text('reason')) if $mandate->first_child('end_term')->first_child('reason'); } $mandate_str .= sprintf(" (%s)", $mandate->first_child_text('role')) if $mandate->first_child('role'); } if (exists $mandate{$type}) { if (exists($mandate{$type}{$institution_or_role})) { push @{$mandate{$type}{$institution_or_role}}, $mandate_str; } else { $mandate{$type}{$institution_or_role} = [$mandate_str]; } } else { # Second level is an ordered hash to keep chronology my %ordered_hash; tie %ordered_hash, "Tie::IxHash"; %ordered_hash = ($institution_or_role => [$mandate_str]); $mandate{$type} = \%ordered_hash; } } my @positions=(); if ($xml->root->first_child('cv')) { @positions = map {$_->text} $xml->root->first_child('cv')->children('position'); } my $activities = $xml->root->first_child('activities'); my $questions_url = $activities->first_child_text('questions'); my $propositions_url = $activities->first_child_text('propositions'); my $reports_url = $activities->first_child_text('reports'); my $speeches_url = $activities->first_child_text('speeches'); my @opinions=(); if ($xml->root->first_child('opinions')) { @opinions = map {"- ".$_->first_child_text('date')." [".$_->first_child_text('title')."->".$_->first_child_text('url')."]\n\n".$_->first_child_text('content')."\n\n"} $xml->root->first_child('opinions')->children('opinion'); } # Build spikini page my $spikini_page = << "SPIKINI_PAGE"; {{{GroupeMemoirePolitique : $first_name $last_name, $role}}} ----- - {{Informations générales}} -* $birth_str -* {Circonscription d'élection} : [$constituency_dep -> MemoirePolitiqueDeputes$constituency_wiki], $constituency_nb circonscription _ {Cantons de} $constituency_cantons -* {Groupe politique} : [$grp_str -> MemoirePolitiqueDeputes$grp_wiki] _ {Parti} : $party SPIKINI_PAGE $spikini_page .= "-* {Profession} : $prof\n" if $prof; $spikini_page .= "\n- {{Contact}}\n\n$address\n"; $spikini_page .= "-* {Courriel} : [$mail_str->mailto:$mail]\n" if $mail; foreach my $web (@webs) { $spikini_page .= "-* {$web->[2]} : [$web->[1] -> $web->[0]]\n"; } $spikini_page .= "\n- {{Fonctions à l'Assemblée nationale}}\n\n"; foreach my $function_type (sort keys %function) { if ($function{$function_type}) { $spikini_page .= "-* {$function_type} : $function{$function_type}\n"; } else { $spikini_page .= "-* $function_type\n"; } } $spikini_page .= "\n- {{Mandats}}\n\n"; foreach my $mandate_type (sort sort_mandate_type keys %mandate) { $spikini_page .= "-* {$mandate_type}\n"; foreach my $mandate_second_level (keys %{$mandate{$mandate_type}}) { if ($mandate_second_level eq 'null') { $spikini_page .= "-** ".join("\n-** ", @{$mandate{$mandate_type}{$mandate_second_level}})."\n"; } else { $spikini_page .= "-** $mandate_second_level\n"; if ($mandate{$mandate_type}{$mandate_second_level}[0]) { $spikini_page .= "-*** ".join("\n-*** ", @{$mandate{$mandate_type}{$mandate_second_level}})."\n"; } } } } if (scalar @positions) { $spikini_page .= "\n- {{Curriculum Vitae}}\n\n"; foreach my $pos (@positions) { $spikini_page .= "-* $pos\n"; } } $spikini_page .= <<"SPIKINI"; ----- {{{Prises de positions}}} {Merci d'enrichir cette partie en y rapportant les prises de positions de $first_name $last_name concernant les brevets (consultez la page MemoirePolitiqueAide pour savoir comment faire).} - {{Sources d'informations}} -* [Questions posées à l'Assemblée nationale->$questions_url] -* [Propositions de loi->$propositions_url] -* [Rapports parlementaires->$reports_url] -* [Interventions en séance à l'Assemblée nationale->$speeches_url] -* [Wikipédia->http://fr.wikipedia.org/wiki/${first_name}_${last_name}] -* [Google->http://www.google.fr/search?q=$first_name+$last_name+brevet] SPIKINI foreach my $opinion (@opinions) { $spikini_page .= "$opinion\n"; } # ISO-8859-1 doesn't include some characters $spikini_page =~ s/\x{153}/œ/g; $spikini_page =~ s/\x{2019}/'/g; # Output spikini page my $spikini_location = "$output_dir/$wiki_name.spikini"; open SPIKINI, ">$spikini_location" or die "Cannot write in file $spikini_location: $!\n"; print SPIKINI $spikini_page; close SPIKINI or die "Error when closing $spikini_location: $!\n"; warn "$spikini_location done\n"; }; if (my $e = catch std::exception) { warn "getDocument failed\n"; warn $e->what() . "\n"; exit( -1 ); } elsif ($@) { warn "getDocument failed\n"; warn $@; exit( -1 ); } sub sort_mandate_type { my $ordered_types = " Mandats et fonctions en cours à l'Assemblée nationale Anciens mandats et fonctions à l'Assemblée nationale Organismes extra-parlementaires Anciens mandats de sénateur Anciens mandats nationaux ou fonctions ministérielles Mandats locaux en cours Mandats intercommunaux en cours Anciens mandats locaux Anciens mandats intercommunaux Fonctions dans les instances internationales ou judiciaires en cours Anciennes fonctions dans les instances internationales ou judiciaires Anciens mandats européens "; return index($ordered_types, $a) <=> index($ordered_types, $b); } sub wikify_com { my $str = shift; $str =~ s/chargée de|de la|des//go; $str =~ s/l'application de l'|ier et d'apurer les//go; $str =~ s/(\w+)/\u\L$1/go; $str = desaccent($str); return $str; } sub desaccent { my $str = shift; my $mp_name = $str; $str =~ s/À/A/go; $str =~ s/Â/A/go; $str =~ s/Ä/A/go; $str =~ s/É/E/go; $str =~ s/È/E/go; $str =~ s/Ê/E/go; $str =~ s/Ë/E/go; $str =~ s/Í/I/go; $str =~ s/Î/I/go; $str =~ s/Ï/I/go; $str =~ s/Ó/O/go; $str =~ s/Ô/O/go; $str =~ s/Ö/O/go; $str =~ s/Ù/U/go; $str =~ s/Û/U/go; $str =~ s/Ü/U/go; $str =~ s/Ç/C/go; $str =~ s/à/a/go; $str =~ s/â/a/go; $str =~ s/ä/a/go; $str =~ s/é/e/go; $str =~ s/è/e/go; $str =~ s/ê/e/go; $str =~ s/ë/e/go; $str =~ s/í/i/go; $str =~ s/î/i/go; $str =~ s/ï/i/go; $str =~ s/ó/o/go; $str =~ s/ô/o/go; $str =~ s/ö/o/go; $str =~ s/ù/u/go; $str =~ s/û/u/go; $str =~ s/ü/u/go; $str =~ s/ç/c/go; $str =~ s/[ '-]//go; return $str; }