summaryrefslogtreecommitdiff
path: root/lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm')
-rw-r--r--lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm403
1 files changed, 403 insertions, 0 deletions
diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm b/lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm
new file mode 100644
index 0000000..db37774
--- /dev/null
+++ b/lib/gcstar/GCPlugins/GCfilms/GCAllocine.pm
@@ -0,0 +1,403 @@
+package GCPlugins::GCfilms::GCAllocine;
+
+###################################################
+#
+# Copyright 2005-2010 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCfilms::GCfilmsCommon;
+
+{
+
+ package GCPlugins::GCfilms::GCPluginAllocine;
+
+ use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase);
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{insideResults} eq 1)
+ {
+ if ( ($tagname eq "a")
+ && ($attr->{href} =~ /^\/film\/fichefilm_gen_cfilm=/)
+ && ($self->{isMovie} eq 0))
+ {
+ my $url = $attr->{href};
+ $self->{isMovie} = 1;
+ $self->{isInfo} = 0;
+ $self->{itemIdx}++;
+ $self->{itemsList}[ $self->{itemIdx} ]->{url} = $url;
+ }
+ elsif (($tagname eq "td") && ($self->{isMovie} eq 1))
+ {
+ $self->{isMovie} = 2;
+ }
+ elsif (($tagname eq "a") && ($self->{isMovie} eq 2))
+ {
+ $self->{isMovie} = 3;
+ }
+ elsif (($tagname eq "br") && ($self->{isMovie} eq 3))
+ {
+ $self->{itemsList}[ $self->{itemIdx} ]->{title} =~ s/^\s*//;
+ $self->{itemsList}[ $self->{itemIdx} ]->{title} =~ s/\s*$//;
+ $self->{itemsList}[ $self->{itemIdx} ]->{title} =~ s/\s+/ /g;
+ $self->{isMovie} = 4;
+ }
+ elsif (($tagname eq "span")
+ && ($attr->{class} eq "fs11")
+ && ($self->{isMovie} eq 4))
+ {
+ $self->{isInfo} = 1;
+ $self->{isMovie} = 0;
+ }
+ elsif (($tagname eq "br") && ($self->{isInfo} eq 1))
+ {
+ $self->{isInfo} = 2;
+ }
+ elsif (($tagname eq "br") && ($self->{isInfo} eq 2))
+ {
+ $self->{isInfo} = 3;
+ }
+ }
+ }
+ else
+ {
+ if (($tagname eq "div") && ($attr->{class} eq "poster"))
+ {
+ $self->{insidePicture} = 1;
+ }
+ elsif (($tagname eq "img") && ($self->{insidePicture} eq 1))
+ {
+ my $src = $attr->{src};
+ if (!$self->{curInfo}->{image})
+ {
+ if ($src =~ /r_160_240/)
+ {
+ $self->{curInfo}->{image} = $src;
+ }
+ else
+ {
+ $self->{curInfo}->{image} = "empty";
+ }
+ }
+ }
+ elsif ($tagname eq "h1")
+ {
+ $self->{insideTitle} = 1;
+ }
+ elsif (($tagname eq "span") && ($self->{insideDate} eq 1))
+ {
+ $self->{insideDate} = 2;
+ }
+ elsif (($tagname eq "span") && ($attr->{itemprop} eq "duration"))
+ {
+ $self->{insideTime} = 1;
+ }
+ elsif (($tagname eq "span") && ($self->{insideDirector} eq 1))
+ {
+ $self->{insideDirector} = 2;
+ }
+ elsif (($tagname eq "a") && ($self->{insideActor} eq 1))
+ {
+ $self->{insideActor} = 2;
+ }
+ elsif (($tagname eq "span") && ($self->{insideGenre} eq 1))
+ {
+ $self->{insideGenre} = 2;
+ }
+ elsif (($tagname eq "span") && ($self->{insideCountry} eq 1))
+ {
+ $self->{insideCountry} = 2;
+ }
+ elsif (($tagname eq "span") && ($attr->{class} eq "note") && ($self->{insidePressRating} eq 1))
+ {
+ $self->{insidePressRating} = 2;
+ }
+ elsif (($tagname eq "div") && ($attr->{class} eq "breaker"))
+ {
+ $self->{insidePressRating} = 0;
+ }
+ elsif (($tagname eq "p") && ($attr->{itemprop} eq "description"))
+ {
+ $self->{insideSynopsis} = 1;
+ }
+ elsif (($tagname eq "td") && ($self->{insideOriginal} eq 1))
+ {
+ $self->{insideOriginal} = 2;
+ }
+
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+ $self->{inside}->{$tagname}--;
+
+ if ($tagname eq "li")
+ {
+ $self->{insideDirector} = 0;
+ $self->{insideActor} = 0;
+ $self->{insideGenre} = 0;
+ }
+ elsif ($tagname eq "div")
+ {
+ $self->{insideCountry} = 0;
+ $self->{insideSynopsis} = 0;
+ $self->{insideActor} = 0;
+ }
+ elsif ($tagname eq "th")
+ {
+ $self->{insideSynopsis} = 0;
+ }
+ elsif ($tagname eq "table")
+ {
+ $self->{insideResults} = 0;
+ }
+
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ if (($origtext =~ m/(\d+) r..?sultats? trouv..?s? dans les titres de films/) && ($1 > 0))
+ {
+ $self->{insideResults} = 1;
+ }
+ if ($self->{isMovie} eq 3)
+ {
+ $self->{itemsList}[ $self->{itemIdx} ]->{title} .= $origtext;
+ }
+ if ($self->{isInfo} eq 1)
+ {
+ if ($origtext =~ /\s*([0-9]{4})/)
+ {
+ $self->{itemsList}[ $self->{itemIdx} ]->{date} = $1;
+ }
+ }
+ elsif ($self->{isInfo} eq 2)
+ {
+ if ($origtext =~ /^\s*de (.*)/)
+ {
+ $self->{itemsList}[ $self->{itemIdx} ]->{director} = $1;
+ }
+ }
+ elsif ($self->{isInfo} eq 3)
+ {
+ if ( ($origtext =~ m/^\s*avec (.*)/)
+ && (!$self->{itemsList}[ $self->{itemIdx} ]->{actors}))
+ {
+ $self->{itemsList}[ $self->{itemIdx} ]->{actors} = $1;
+ }
+ $self->{isInfo} = 0;
+ }
+ }
+ else
+ {
+ my ($self, $origtext) = @_;
+ $origtext =~ s/[\r\n]//g;
+ $origtext =~ s/^\s*//;
+ $origtext =~ s/\s*$//;
+
+ if ($self->{insideTitle} eq 1)
+ {
+ $self->{curInfo}->{title} = $origtext;
+ $self->{insideTitle} = 0;
+ }
+ elsif (($self->{insideDate} eq 2) && (length($origtext) > 1))
+ {
+ $self->{curInfo}->{date} = $origtext
+ if !($origtext =~ /inconnu/);
+ $self->{insideDate} = 0;
+ }
+ elsif (($origtext =~ /^Date de sortie/)
+ && (!$self->{curInfo}->{date}))
+ {
+ $self->{insideDate} = 1;
+ }
+ elsif (($origtext =~ /^Date de reprise/)
+ && (!$self->{curInfo}->{date}))
+ {
+ $self->{insideDate} = 1;
+ }
+ elsif ($self->{insideTime} eq 1)
+ {
+ $origtext =~ /(\d+)h\s*(\d+)m/;
+ my $time = ($1*60) + $2;
+ $self->{curInfo}->{time} = $time." m.";
+ $self->{insideTime} = 0;
+ }
+ elsif ($self->{insideDirector} eq 2)
+ {
+ $origtext = ", " if $origtext =~ m/^,/;
+ $self->{curInfo}->{director} .= $origtext;
+ }
+ elsif ($origtext =~ /^R..?alis..? par/)
+ {
+ $self->{insideDirector} = 1;
+ }
+ elsif ($self->{insideActor} eq 2)
+ {
+ $origtext =~ s/plus//;
+ $origtext = "," if $origtext =~ m/^,/;
+ $self->{curInfo}->{actors} .= $origtext;
+ }
+ elsif ($origtext =~ /^Avec/)
+ {
+ $self->{insideActor} = 1;
+ }
+ elsif ($self->{insideGenre} eq 2)
+ {
+ $origtext = "," if $origtext =~ m/^,/;
+ $self->{curInfo}->{genre} .= $origtext;
+ }
+ elsif ($origtext =~ /^[\s\n]*Genre/)
+ {
+ $self->{insideGenre} = 1;
+ }
+ elsif ($self->{insideCountry} eq 2)
+ {
+ $origtext = "," if $origtext =~ m/^,/;
+ $self->{curInfo}->{country} .= $origtext;
+ }
+ elsif ($origtext =~ /Nationalité/)
+ {
+ $self->{insideCountry} = 1;
+ }
+ elsif ($origtext =~ /^Presse$/)
+ {
+ $self->{insidePressRating} = 1;
+ }
+ elsif ($self->{insidePressRating} eq 2)
+ {
+ $origtext =~ s/,/./;
+ $self->{curInfo}->{ratingpress} .= $origtext * 2;
+ }
+ elsif ($origtext =~ /^Interdit aux moins de (\d+) ans/)
+ {
+ $self->{curInfo}->{age} = $1;
+ }
+ elsif ($self->{insideSynopsis} eq 1)
+ {
+ $self->{curInfo}->{synopsis} .= $origtext;
+ }
+ elsif ($self->{insideOriginal} eq 2)
+ {
+ $self->{curInfo}->{original} = $origtext;
+ $self->{insideOriginal} = 0;
+ }
+ elsif ($origtext =~ /^Titre original/)
+ {
+ $self->{insideOriginal} = 1;
+ }
+
+
+
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+
+ $self->{hasField} = {
+ title => 1,
+ date => 1,
+ director => 1,
+ actors => 1,
+ };
+
+ $self->{isInfo} = 0;
+ $self->{isMovie} = 0;
+ $self->{insideResults} = 0;
+ $self->{curName} = undef;
+ $self->{curUrl} = undef;
+ $self->{actorsCounter} = 0;
+
+ bless($self, $class);
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ # f=3 ?
+ # return "http://www.allocine.fr/recherche/?q=$word&f=3&rub=1";
+ return "http://www.allocine.fr/recherche/1/?q=$word";
+ }
+
+ sub getSearchCharset
+ {
+ my $self = shift;
+
+ # Need urls to be double character encoded
+ return "utf8";
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return "http://www.allocine.fr" . $url;
+ }
+
+ sub getName
+ {
+ return "Allocine.fr";
+ }
+
+ sub getAuthor
+ {
+ return 'Tian';
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+
+ sub getCharset
+ {
+ # return "UTF-8"; # For 1.5.0 Win32
+ return "ISO-8859-1"; # For 1.5.0 Win32 with /lib/gcstar/GCPlugins/ ver.1.5.9svn
+ }
+}
+
+1;