diff options
author | Jörg Frings-Fürst <jff@merkur> | 2014-07-06 15:20:38 +0200 |
---|---|---|
committer | Jörg Frings-Fürst <jff@merkur> | 2014-07-06 15:20:38 +0200 |
commit | 126bb8cb6b93240bb4d3a2b816b74c286c3d422b (patch) | |
tree | e66e1dfe77d53a52539489765c88d23e4423ae27 /lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm |
Imported Upstream version 1.7.0upstream/1.7.0
Diffstat (limited to 'lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm')
-rw-r--r-- | lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm | 403 |
1 files changed, 403 insertions, 0 deletions
diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm b/lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm new file mode 100644 index 0000000..db37774 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm @@ -0,0 +1,403 @@ +package GCPlugins::GCfilms::GCAllocine; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCfilms::GCfilmsCommon; + +{ + + package GCPlugins::GCfilms::GCPluginAllocine; + + use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($self->{insideResults} eq 1) + { + if ( ($tagname eq "a") + && ($attr->{href} =~ /^\/film\/fichefilm_gen_cfilm=/) + && ($self->{isMovie} eq 0)) + { + my $url = $attr->{href}; + $self->{isMovie} = 1; + $self->{isInfo} = 0; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url; + } + elsif (($tagname eq "td") && ($self->{isMovie} eq 1)) + { + $self->{isMovie} = 2; + } + elsif (($tagname eq "a") && ($self->{isMovie} eq 2)) + { + $self->{isMovie} = 3; + } + elsif (($tagname eq "br") && ($self->{isMovie} eq 3)) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} =~ s/^\s*//; + $self->{itemsList}[ $self->{itemIdx} ]->{title} =~ s/\s*$//; + $self->{itemsList}[ $self->{itemIdx} ]->{title} =~ s/\s+/ /g; + $self->{isMovie} = 4; + } + elsif (($tagname eq "span") + && ($attr->{class} eq "fs11") + && ($self->{isMovie} eq 4)) + { + $self->{isInfo} = 1; + $self->{isMovie} = 0; + } + elsif (($tagname eq "br") && ($self->{isInfo} eq 1)) + { + $self->{isInfo} = 2; + } + elsif (($tagname eq "br") && ($self->{isInfo} eq 2)) + { + $self->{isInfo} = 3; + } + } + } + else + { + if (($tagname eq "div") && ($attr->{class} eq "poster")) + { + $self->{insidePicture} = 1; + } + elsif (($tagname eq "img") && ($self->{insidePicture} eq 1)) + { + my $src = $attr->{src}; + if (!$self->{curInfo}->{image}) + { + if ($src =~ /r_160_240/) + { + $self->{curInfo}->{image} = $src; + } + else + { + $self->{curInfo}->{image} = "empty"; + } + } + } + elsif ($tagname eq "h1") + { + $self->{insideTitle} = 1; + } + elsif (($tagname eq "span") && ($self->{insideDate} eq 1)) + { + $self->{insideDate} = 2; + } + elsif (($tagname eq "span") && ($attr->{itemprop} eq "duration")) + { + $self->{insideTime} = 1; + } + elsif (($tagname eq "span") && ($self->{insideDirector} eq 1)) + { + $self->{insideDirector} = 2; + } + elsif (($tagname eq "a") && ($self->{insideActor} eq 1)) + { + $self->{insideActor} = 2; + } + elsif (($tagname eq "span") && ($self->{insideGenre} eq 1)) + { + $self->{insideGenre} = 2; + } + elsif (($tagname eq "span") && ($self->{insideCountry} eq 1)) + { + $self->{insideCountry} = 2; + } + elsif (($tagname eq "span") && ($attr->{class} eq "note") && ($self->{insidePressRating} eq 1)) + { + $self->{insidePressRating} = 2; + } + elsif (($tagname eq "div") && ($attr->{class} eq "breaker")) + { + $self->{insidePressRating} = 0; + } + elsif (($tagname eq "p") && ($attr->{itemprop} eq "description")) + { + $self->{insideSynopsis} = 1; + } + elsif (($tagname eq "td") && ($self->{insideOriginal} eq 1)) + { + $self->{insideOriginal} = 2; + } + + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + + if ($tagname eq "li") + { + $self->{insideDirector} = 0; + $self->{insideActor} = 0; + $self->{insideGenre} = 0; + } + elsif ($tagname eq "div") + { + $self->{insideCountry} = 0; + $self->{insideSynopsis} = 0; + $self->{insideActor} = 0; + } + elsif ($tagname eq "th") + { + $self->{insideSynopsis} = 0; + } + elsif ($tagname eq "table") + { + $self->{insideResults} = 0; + } + + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if (($origtext =~ m/(\d+) r..?sultats? trouv..?s? dans les titres de films/) && ($1 > 0)) + { + $self->{insideResults} = 1; + } + if ($self->{isMovie} eq 3) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} .= $origtext; + } + if ($self->{isInfo} eq 1) + { + if ($origtext =~ /\s*([0-9]{4})/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1; + } + } + elsif ($self->{isInfo} eq 2) + { + if ($origtext =~ /^\s*de (.*)/) + { + $self->{itemsList}[ $self->{itemIdx} ]->{director} = $1; + } + } + elsif ($self->{isInfo} eq 3) + { + if ( ($origtext =~ m/^\s*avec (.*)/) + && (!$self->{itemsList}[ $self->{itemIdx} ]->{actors})) + { + $self->{itemsList}[ $self->{itemIdx} ]->{actors} = $1; + } + $self->{isInfo} = 0; + } + } + else + { + my ($self, $origtext) = @_; + $origtext =~ s/[\r\n]//g; + $origtext =~ s/^\s*//; + $origtext =~ s/\s*$//; + + if ($self->{insideTitle} eq 1) + { + $self->{curInfo}->{title} = $origtext; + $self->{insideTitle} = 0; + } + elsif (($self->{insideDate} eq 2) && (length($origtext) > 1)) + { + $self->{curInfo}->{date} = $origtext + if !($origtext =~ /inconnu/); + $self->{insideDate} = 0; + } + elsif (($origtext =~ /^Date de sortie/) + && (!$self->{curInfo}->{date})) + { + $self->{insideDate} = 1; + } + elsif (($origtext =~ /^Date de reprise/) + && (!$self->{curInfo}->{date})) + { + $self->{insideDate} = 1; + } + elsif ($self->{insideTime} eq 1) + { + $origtext =~ /(\d+)h\s*(\d+)m/; + my $time = ($1*60) + $2; + $self->{curInfo}->{time} = $time." m."; + $self->{insideTime} = 0; + } + elsif ($self->{insideDirector} eq 2) + { + $origtext = ", " if $origtext =~ m/^,/; + $self->{curInfo}->{director} .= $origtext; + } + elsif ($origtext =~ /^R..?alis..? par/) + { + $self->{insideDirector} = 1; + } + elsif ($self->{insideActor} eq 2) + { + $origtext =~ s/plus//; + $origtext = "," if $origtext =~ m/^,/; + $self->{curInfo}->{actors} .= $origtext; + } + elsif ($origtext =~ /^Avec/) + { + $self->{insideActor} = 1; + } + elsif ($self->{insideGenre} eq 2) + { + $origtext = "," if $origtext =~ m/^,/; + $self->{curInfo}->{genre} .= $origtext; + } + elsif ($origtext =~ /^[\s\n]*Genre/) + { + $self->{insideGenre} = 1; + } + elsif ($self->{insideCountry} eq 2) + { + $origtext = "," if $origtext =~ m/^,/; + $self->{curInfo}->{country} .= $origtext; + } + elsif ($origtext =~ /Nationalité/) + { + $self->{insideCountry} = 1; + } + elsif ($origtext =~ /^Presse$/) + { + $self->{insidePressRating} = 1; + } + elsif ($self->{insidePressRating} eq 2) + { + $origtext =~ s/,/./; + $self->{curInfo}->{ratingpress} .= $origtext * 2; + } + elsif ($origtext =~ /^Interdit aux moins de (\d+) ans/) + { + $self->{curInfo}->{age} = $1; + } + elsif ($self->{insideSynopsis} eq 1) + { + $self->{curInfo}->{synopsis} .= $origtext; + } + elsif ($self->{insideOriginal} eq 2) + { + $self->{curInfo}->{original} = $origtext; + $self->{insideOriginal} = 0; + } + elsif ($origtext =~ /^Titre original/) + { + $self->{insideOriginal} = 1; + } + + + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + $self->{hasField} = { + title => 1, + date => 1, + director => 1, + actors => 1, + }; + + $self->{isInfo} = 0; + $self->{isMovie} = 0; + $self->{insideResults} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + $self->{actorsCounter} = 0; + + bless($self, $class); + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + # f=3 ? + # return "http://www.allocine.fr/recherche/?q=$word&f=3&rub=1"; + return "http://www.allocine.fr/recherche/1/?q=$word"; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "utf8"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.allocine.fr" . $url; + } + + sub getName + { + return "Allocine.fr"; + } + + sub getAuthor + { + return 'Tian'; + } + + sub getLang + { + return 'FR'; + } + + sub getCharset + { + # return "UTF-8"; # For 1.5.0 Win32 + return "ISO-8859-1"; # For 1.5.0 Win32 with /lib/gcstar/GCPlugins/ ver.1.5.9svn + } +} + +1; |