summaryrefslogtreecommitdiff
path: root/lib/gcstar/GCPlugins/GCfilms
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gcstar/GCPlugins/GCfilms')
-rw-r--r--lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm435
-rw-r--r--lib/gcstar/GCPlugins/GCfilms/GCImdb.pm888
2 files changed, 449 insertions, 874 deletions
diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm b/lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm
deleted file mode 100644
index 75c6854..0000000
--- a/lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm
+++ /dev/null
@@ -1,435 +0,0 @@
-package GCPlugins::GCfilms::GCAlpacineES;
-
-###################################################
-#
-# Copyright 2005-2010 Christian Jodar
-#
-# This file is part of GCstar.
-#
-# GCstar is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# GCstar is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GCstar; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
-#
-###################################################
-
-use strict;
-use utf8;
-
-use GCPlugins::GCfilms::GCfilmsCommon;
-
-{
- package GCPlugins::GCfilms::GCPluginAlpacineES;
-
- use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase);
-
-
- # text
- # Called each time some plain text (between tags) is processed.
- # $origtext is the read text.
- sub text
- {
- my ($self, $origtext) = @_;
-
- return if length($origtext) < 2;
-
- # Código para procesar el resultado de la busqueda
- if ($self->{parsingList}){
- # Guardamos la fecha.
- if ($self->{inside}->{li} && $self->{insideInfos}){
- $origtext =~ /. \(([0-9]{4})\)/;
- $self->{itemsList}[$self->{itemIdx}]->{date} = $origtext;
- }
- # Guardamos el título
- if ($self->{inside}->{a} && $self->{insideInfos}){
- $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext;
- }
- }
-
- else{
- # Eliminamos espacios iniciales, espacios dobles y espacios finales del texto
- $origtext =~ s/^\s*|\s{2,}|\s*$//g;
- # Estamos procesando el titulo
- if ($self->{insideTitle})
- {
- # Obtenemos titulo y fecha
- $origtext =~ /(.*) \(([0-9]{4})\)/;
- $self->{curInfo}->{title} = $1;
- $self->{curInfo}->{date} = $2;
- $self->{insideTitle} = 0;
- return;
- }
-
- # Si existe el hipervinculo "Ampliar" cambiamos la imagen por la ampliada
- if ($self->{inside}->{a} && $origtext eq "Ampliar"){
- $self->{curInfo}->{image} =~ /(http:\/\/img.alpacine.com\/carteles\/.*)-[0-9]*(\.jpg)/;
- $self->{curInfo}->{image} = $1 . $2;
- return;
- }
- # Estamos en la puntuación real
- if($self->{insideRating}){
- $self->{curInfo}->{ratingpress} = int( $origtext + 0.5 );
- $self->{insideRating} = 0;
- }
- # No hay puntuación real, asignamos 0 por defecto
- if($self->{inside}->{div}){
- if($origtext =~ /Esperando \d votos/){
- $self->{curInfo}->{ratingpress} = 0;
- }
- }
- # Procesamos el titulo original
- if ($self->{isOrigTit} eq 1) {
- $self->{isOrigTit} = 0;
- $self->{curInfo}->{original} = $origtext;
- return;
- }
- # Procesamos los generos (gen, gen, gen, gen...)
- if ($self->{isGenres} eq 1) {
- if($origtext ne ""){
- # hacemos uso de sus propias comas
- $self->{curInfo}->{genre} .= $origtext;
- }
- else{
- $self->{isGenres} = 0;
- }
- return;
- }
- # Procesamos el país
- if ($self->{isCountry} eq 1) {
- $self->{isCountry} = 0;
- $self->{curInfo}->{country} = $origtext;
- return;
- }
- # Procesamos la duración
- if ($self->{isTime} eq 1) {
- $self->{isTime} = 0;
- $self->{curInfo}->{time} = $origtext;
- return;
- }
- # Procesamos los directores
- if ($self->{isDirector} eq 1) {
- if($origtext ne ""){
- if($self->{curInfo}->{director} eq ""){
- $self->{curInfo}->{director} .= $origtext;
- }
- else{
- $self->{curInfo}->{director} .= ", $origtext";
- }
- }
- else{
- $self->{isDirector} = 0;
- }
- return;
- }
- # Actores
- if ($self->{isActors} eq 1) {
- if($origtext ne ""){
- if($self->{curInfo}->{actors} eq ""){
- $self->{curInfo}->{actors} .= $origtext;
- }
- else{
- $self->{curInfo}->{actors} .= ", $origtext";
- }
- }
- else{
- $self->{isActors} = 0;
- }
- return;
- }
- # Procesamos la Sinopsis
- if ($self->{isSynopsis} eq 1) {
- $self->{isSynopsis} = 0;
- $self->{curInfo}->{synopsis} = $origtext;
- return;
- }
- # Procesamos los premios
- if ($self->{isAwards} eq 1) {
- $self->{isAwards} = 0;
- $self->{curInfo}->{synopsis} = $self->{curInfo}->{synopsis}. "\n\nPremios:\n\t".$origtext;
- $self->{insideInfos} = 0;
- return;
- }
-
- # Condiciones para procesar los campos en el siguiente ciclo
- if($self->{insideInfos}){
- $self->{isOrigTit} = 1 if $origtext eq "Título original:";
- $self->{isGenres} = 1 if $origtext eq "Género:";
- $self->{isCountry} = 1 if $origtext eq "País:";
- $self->{isTime} = 1 if $origtext eq "Duración:";
- $self->{isDirector} = 1 if $origtext eq "Dirección:";
- $self->{isActors} = 1 if $origtext eq "Interpretación:";
- $self->{isSynopsis} = 1 if $origtext eq "Sinopsis:";
- $self->{isAwards} = 1 if $origtext eq "Premios:";
- }
- }
- }
-
-
- # end
- # Called each time a HTML tag ends.
- # $tagname is the tag name.
- sub end
- {
- my ($self, $tagname) = @_;
- $self->{inside}->{$tagname}--;
-
- # Código para procesar el resultado de la busqueda
- #if ($self->{parsingList}){
- #}
- # Código para procesar la información de la pelicula seleccionada
- #else {
- #}
- }
-
- # In processing functions below, self->{parsingList} can be used.
- # If true, we are processing a search results page
- # If false, we are processing a item information page.
-
- # $self->{inside}->{tagname} (with correct value for tagname) can be used to test
- # if we are in the corresponding tag.
-
- # You have a counter $self->{itemIdx} that have to be used when processing search results.
- # It is your responsability to increment it!
-
- # When processing search results, you have to fill the available fields for results
- #
- # $self->{itemsList}[$self->{movieIdx}]->{field_name}
- #
- # When processing a movie page, you need to fill the fields (if available)
- # in $self->{curInfo}.
- #
- # $self->{curInfo}->{field_name}
-
- # start
- # Called each time a new HTML tag begins.
- # $tagname is the tag name.
- # $attr is reference to an associative array of tag attributes.
- # $attrseq is an array reference containing all the attributes name.
- # $origtext is the tag text as found in source file
- # Returns nothing
- sub start
- {
- my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
- $self->{inside}->{$tagname}++;
-
- # Código para procesar el resultado de la busqueda para generar el listado
- if ($self->{parsingList})
- {
- # Comprobamos si estamos dentro de un título utilizando el atributo class
- if( ($tagname eq "li" ) && ($attr->{class} ne "mas" )){
- $self->{itemIdx}++;
- $self->{insideInfos} = 1 ;
- return;
- }
- if( ($tagname eq "li" ) && ($attr->{class} eq "mas" )){
- $self->{insideInfos} = 0;
- return;
- }
- # Si estamos en un título y encontramos una tag a, es un enlace a ficha
- if ($tagname eq "a" && $self->{insideInfos}){
- $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.alpacine.com".$attr->{href};
- return;
- }
- }
- # Código para procesar la información de la pelicula seleccionada
- else {
- if ($tagname eq "h1"){
- $self->{insideTitle} = 1;
- return;
- }
- # Si estamos dentro de una imagen y el src es el del thumb lo asignamos como imagen
- if ($tagname eq "img")
- {
- # Extraemos la dirección de la imagen thumb
- if ($attr->{src} =~ /http:\/\/img.alpacine.com\/carteles\/.*\.jpg/)
- {
- $self->{curInfo}->{image} = $attr->{src};
- }
- return;
- }
-
- if ($tagname eq "div" && $attr->{class} eq "voto"){
- $self->{insideRating} = 1;
- return;
- }
-
- if( $tagname eq "div" && $attr->{class} eq "datos" ){
- $self->{insideInfos} = 1 ;
- return;
- }
- }
- }
-
- # preProcess
- # Called before each page is processed. You can use it to do some substitutions.
- # $html is the page content.
- # Returns modified version of page content.
- sub preProcess
- {
- my ($self, $html) = @_;
-
- # Anulamos el html si coincide con el patron de no resultados
- if($html =~ /^.*No hay resultados para.*$/s){
- $html = "";
- return $html;
- }
-
- # Recorta el código del listado de resultados, quedandose solo con la parte que nos interesa del html
- # el modificador s/.../$1/s trata el flujo como una sola cadena y reemplaza todo el cuerpo con la parte que nos interesa
- if($html =~ s/^.*<div class="titulo">Pel.culas <span class="resultados">\([0-9]* resultado[s]?\)<\/span><\/div><ul>(<li><a.*<\/a> \([0-9]*\)<\/li>).*$/$1/s){
- return $html;
- }
-
- # Recorta el código de la ficha, quedandose solo con la parte que nos interesa del html
- # Comprobamos si la pelicula contiene o no premios y nos quedamos con lo que corresponda
- if($html =~ /^.*<div class="titulo">Premios:.*más\.\.\.<\/a><\/div><\/div>.*$/s){
- $html =~ s/^.*<div id="titulo">(.*<\/div><\/div>.*\n.*<div class="datox"><div class="titulo">Premios:.*)más\.\.\.<\/a><\/div><\/div>.*$/$1/s;
- }
- else{
- $html =~ s/^.*<div id="titulo">(.*<\/div><\/div>)\n\n\t\t\t\t\n\n\t\t\t\t<hr \/>.*$/$1/s;
- }
- return $html;
- }
-
- # changeUrl
- # Can be used to change URL if item URL and the one used to
- # extract information are different.
- # Return the modified URL.
- #sub changeUrl
- #{
- # my ($self, $url) = @_;
- # return $url;
- #}
-
- # getExtra
- # Used if the plugin wants an extra column to be displayed in search results
- # Return the column title or empty string to hide the column.
- #sub getExtra
- #{
- # return 'Extra';
- #}
-
-
- # getLang
- # Used to fill in plugin list with user language plugins
- # Return the language used for this site (2 letters code).
- sub getLang
- {
- return "ES";
- }
-
-
- # getAuthor
- # Used to display the plugin author in GUI.
- # Returns the plugin author name.
- sub getAuthor
- {
- return "DoVerMan";
- }
-
-
- # getName
- # Used to display plugin name in GUI.
- # Returns the plugin name.
- sub getName
- {
- return 'Alpacine';
- }
-
-
- # getCharset
- # Used to convert charset in web pages.
- # Returns the charset as specified in pages.
- #sub getCharset
- #{
- # my $self = shift;
- # # Charset de la web
- # return "UTF-8";
- #}
-
-
- # getItemUrl
- # Used to get the full URL of an item page.
- # Useful when url on results pages are relative.
- # $url is the URL as found with a search.
- # Returns the absolute URL.
- sub getItemUrl
- {
- my ($self, $url) = @_;
- return $url;
- }
-
-
- # getSearchUrl
- # Used to get the URL that to be used to perform searches.
- # $word is the query
- # Returns the full URL.
- sub getSearchUrl
- {
- my ($self, $word) = @_;
- # Hack para evitar problemas con acentos
- $word =~ s/%E1/a/g;
- $word =~ s/%E9/e/g;
- $word =~ s/%ED/i/g;
- $word =~ s/%F3/o/g;
- $word =~ s/%FA/u/g;
- $word =~ s/%C1/A/g;
- $word =~ s/%C9/E/g;
- $word =~ s/%CD/I/g;
- $word =~ s/%D3/O/g;
- $word =~ s/%DA/U/g;
-
- return "http://www.alpacine.com/buscar/?buscar=" . $word;
-
- }
-
-
- # Constructor
- sub new
- {
- # Inicialización
- my $proto = shift;
- my $class = ref($proto) || $proto;
- my $self = $class->SUPER::new();
- bless ($self, $class);
-
- # Campos que devuelve el plugin (1 si, 0 no). Son los que apareceran
- # en el listado de resultados
- $self->{hasField} = {
- title => 1,
- date => 1,
- director => 0,
- actors => 0,
- };
-
- # Indica si estamos procesando información útil
- $self->{insideInfos} = 0;
-
- # Indican el estado del procesado del listado de resultados
- $self->{insideRating} = 0;
- $self->{insideTitle} = 0;
-
- $self->{isOrigTit} = 0;
- $self->{isGenres} = 0;
- $self->{isCountry} = 0;
- $self->{isTime} = 0;
- $self->{isDirector} = 0;
- $self->{isActors} = 0;
- $self->{isSynopsis} = 0;
- $self->{isAwards} = 0;
-
- return $self;
- }
-
-}
-
-1;
diff --git a/lib/gcstar/GCPlugins/GCfilms/GCImdb.pm b/lib/gcstar/GCPlugins/GCfilms/GCImdb.pm
index 70af804..0d12717 100644
--- a/lib/gcstar/GCPlugins/GCfilms/GCImdb.pm
+++ b/lib/gcstar/GCPlugins/GCfilms/GCImdb.pm
@@ -1,439 +1,449 @@
-package GCPlugins::GCfilms::GCImdb;
-
-###################################################
-#
-# Copyright 2010 groms
-#
-# Features:
-# + Multiple directors separated by comma
-# + Multiple countries separated by comma
-# + Correct URL in case of redirection
-# + Fetches Original Title
-#
-# This file is part of GCstar.
-#
-# GCstar is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# GCstar is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GCstar; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-#
-###################################################
-
-use strict;
-
-use GCPlugins::GCfilms::GCfilmsCommon;
-
-{
- package GCPlugins::GCfilms::GCPluginImdb;
-
- use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase);
-
- sub start
- {
- my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
-
- $self->{inside}->{$tagname}++;
-
- if ($self->{parsingEnded})
- {
- return;
- }
-
- if ($self->{parsingList})
- {
- if ($tagname eq "a")
- {
- my $url = $attr->{href};
- if (($url =~ /^\/title\//) && (!$self->{alreadyListed}->{$url}))
- {
- $self->{isMovie} = 1;
- $self->{isInfo} = 1;
- $self->{itemIdx}++;
- $self->{itemsList}[$self->{itemIdx}]->{url} = $url;
- $self->{alreadyListed}->{$url} = 1;
- }
- }
- }
- else
- {
-
- if ($tagname eq "link")
- {
- if ($attr->{rel} eq "canonical")
- {
- $self->{curInfo}->{webPage} = $attr->{href};
- }
- }
- elsif ($tagname eq "h1")
- {
- if ($attr->{class} eq "header")
- {
- $self->{insideHeader} = 1;
- }
- }
- elsif ($tagname eq "div")
- {
- if ($attr->{class} eq "infobar")
- {
- $self->{insideInfobar} = 1;
- }
- }
- elsif ($tagname eq "table")
- {
- if ($attr->{class} eq "cast_list")
- {
- $self->{insideCastList} = 1;
- }
- }
- elsif ($tagname eq "span")
- {
- if ($attr->{itemprop} eq "ratingValue")
- {
- $self->{insideRating} = 1;
- }
- elsif ($attr->{class} eq "title-extra")
- {
- $self->{insideOriginalTitle} = 1;
- }
- }
- elsif ($tagname eq "img")
- {
- if ($self->{insidePrimaryImage})
- {
- if (!($attr->{src} =~ m/nopicture/))
- {
- ($self->{curInfo}->{image} = $attr->{src}) =~ s/_V1\._.+\./_V1\._SX1000_SY1000_\./;
- }
- }
- elsif ($self->{insideInfobar} && $attr->{src} =~ m|/certificates/us/|)
- {
- my $cert = $attr->{title};
- $self->{curInfo}->{age} = 1 if ($cert eq 'Unrated') || ($cert eq 'Open');
- $self->{curInfo}->{age} = 2 if ($cert eq 'G') || ($cert eq 'Approved');
- $self->{curInfo}->{age} = 5 if ($cert eq 'PG') || ($cert eq 'M') || ($cert eq 'GP');
- $self->{curInfo}->{age} = 13 if $cert eq 'PG_13';
- $self->{curInfo}->{age} = 17 if $cert eq 'R';
- $self->{curInfo}->{age} = 18 if ($cert eq 'NC_17') || ($cert eq 'X');
- }
- }
- elsif ($tagname eq "a")
- {
- if ($self->{insideHeader} && $attr->{href} =~ m/year/)
- {
- $self->{insideYear} = 1;
- }
- elsif ($self->{insideInfobar} && $attr->{href} =~ m/genre/)
- {
- $self->{insideGenre} = 1;
- }
- }
- elsif ($tagname eq 'td')
- {
- if ($self->{insideCastList})
- {
- if ($attr->{class} eq 'name')
- {
- $self->{insideActor} = 1;
- }
- elsif ($attr->{class} eq 'character')
- {
- $self->{insideRole} = 1;
- }
- }
- elsif ($attr->{id} eq "img_primary") {
- $self->{insidePrimaryImage} = 1;
- }
- }
- }
- }
-
- sub end
- {
- my ($self, $tagname) = @_;
-
- $self->{inside}->{$tagname}--;
- if ($self->{parsingList})
- {
- if ($self->{isMovie} && ($tagname eq 'a'))
- {
- $self->{isMovie} = 0;
- my $url = $self->{itemsList}[$self->{itemIdx}]->{url};
- if (!$self->{itemsList}[$self->{itemIdx}]->{title})
- {
- $self->{alreadyListed}->{$url} = 0;
- $self->{itemIdx}--;
- }
- }
- } else {
- if ($tagname eq "h1")
- {
- $self->{insideHeader} = 0;
- }
- elsif ($tagname eq "a")
- {
- $self->{insideYear} = 0;
- $self->{insideGenre} = 0;
- $self->{insideActor} = 0;
- $self->{insideRole} = 0;
- }
- elsif ($tagname eq "div")
- {
- $self->{insideInfobar} = 0;
- $self->{insideNat} = 0;
- $self->{insideDirector} = 0;
- $self->{insideStoryline} = 0;
- $self->{insideReleaseDate} = 0;
- }
- elsif ($tagname eq "span")
- {
- $self->{insideRating} = 0;
- $self->{insideOriginalTitle} = 0;
- }
- elsif ($tagname eq "table")
- {
- $self->{insideCastList} = 0;
- }
- elsif ($tagname eq "td")
- {
- $self->{insidePrimaryImage} = 0;
- }
- elsif ($self->{insideCastList})
- {
- if ($self->{actor} && $self->{role})
- {
- $self->{actor} =~ s/^\s+|\s+$//g;
- $self->{actor} =~ s/\s{2,}/ /g;
- push @{$self->{curInfo}->{actors}}, [$self->{actor}];
- $self->{role} =~ s/^\s+|\s+$//g;
- $self->{role} =~ s/\s{2,}/ /g;
- push @{$self->{curInfo}->{actors}->[$self->{actorsCounter}]}, $self->{role};
- $self->{actorsCounter}++;
- }
- $self->{actor} = "";
- $self->{role} = "";
- }
- }
- }
-
- sub text
- {
- my ($self, $origtext) = @_;
-
- return if length($origtext) < 2;
-
- $origtext =~ s/^\s+|\s+$//g;
-
- return if ($self->{parsingEnded});
-
- if ($self->{parsingList})
- {
- if ($self->{inside}->{h1} && $origtext !~ m/IMDb\s*Title\s*Search/i)
- {
- $self->{parsingEnded} = 1;
- $self->{itemIdx} = 0;
- $self->{itemsList}[0]->{url} = $self->{loadedUrl};
- }
- if ($self->{isMovie})
- {
- $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext;
- $self->{isMovie} = 0;
- $self->{isInfo} = 1;
- return;
- }
- if ($self->{isInfo})
- {
- $self->{itemsList}[$self->{itemIdx}]->{date} = $1 if $origtext =~ m|\(([0-9]*)(/I+)?\)|;
- $self->{isInfo} = 0;
- }
- }
- else
- {
- if ($self->{insideHeader})
- {
- if ($self->{insideYear})
- {
- $self->{curInfo}->{date} = $origtext;
- }
- elsif (!$self->{curInfo}->{title})
- {
- $self->{curInfo}->{title} = $origtext;
- if (!$self->{curInfo}->{original})
- {
- $self->{curInfo}->{original} = $origtext;
- }
- }
- elsif ($self->{insideOriginalTitle} && !$self->{inside}->{i})
- {
- $self->{curInfo}->{original} = $origtext;
- }
- }
- elsif ($self->{insideInfobar})
- {
- if ($self->{insideGenre})
- {
- if ($self->{curInfo}->{genre})
- {
- $self->{curInfo}->{genre} .= ",";
- }
- $self->{curInfo}->{genre} .= $origtext;
- }
- elsif ($origtext =~ m/([0-9]+ min)/)
- {
- $self->{curInfo}->{time} = $1;
- }
- }
- elsif ($self->{insideRating} && $origtext =~ m/[0-9]\.[0-9]/)
- {
- $self->{curInfo}->{ratingpress} = int($origtext + 0.5);
- }
- elsif ($self->{insideSynopsis})
- {
- $self->{curInfo}->{synopsis} .= $origtext;
- }
- elsif ($self->{insideNat})
- {
- if ($origtext =~ m/[^\s].+/)
- {
- if ($self->{curInfo}->{country} =~ m/.+/)
- {
- $self->{curInfo}->{country} .= ", ".$origtext;
- }
- else
- {
- $self->{curInfo}->{country} = $origtext;
- }
- }
- }
- elsif ($self->{insideCastList})
- {
- if ($self->{insideActor})
- {
- $self->{actor} .= $origtext;
- }
- elsif ($self->{insideRole})
- {
- $self->{role} .= $origtext;
- }
- }
- elsif ($self->{insideStoryline} && $self->{inside}{p})
- {
- $self->{curInfo}->{synopsis} = $origtext;
- $self->{insideStoryline} = 0;
- }
- elsif ($self->{insideDirector} && $self->{inside}->{div})
- {
- $origtext =~ s/,/, /;
- $self->{curInfo}->{director} .= $origtext;
- }
- elsif ($self->{insideReleaseDate} && !$self->{curInfo}->{date}) {
- if ($origtext =~ m/([0-9]{4})/)
- {
- $self->{curInfo}->{date} = $1;
- $self->{insideReleaseDate} = 0;
- }
- }
-
- if ($self->{inside}->{h2})
- {
- $self->{insideStoryline} = 1 if ($origtext eq "Storyline");
- }
- elsif ($self->{inside}->{h4})
- {
- $self->{insideDirector} = 1 if $origtext =~ m/Directors?:/;
- $self->{insideTime} = 1 if $origtext =~ m/Runtime:/;
- $self->{insideNat} = 1 if $origtext =~ m/Country:/;
- $self->{insideReleaseDate} = 1 if $origtext =~ m/Release Date:/;
- }
- }
- }
-
- sub new
- {
- my $proto = shift;
- my $class = ref($proto) || $proto;
- my $self = $class->SUPER::new();
- bless ($self, $class);
-
- $self->{hasField} = {
- title => 1,
- date => 1,
- director => 0,
- actors => 0,
- };
-
- $self->{isInfo} = 0;
- $self->{isMovie} = 0;
- $self->{curName} = undef;
- $self->{curUrl} = undef;
-
- return $self;
- }
-
- sub preProcess
- {
- my ($self, $html) = @_;
-
- $self->{parsingEnded} = 0;
-
- if ($self->{parsingList})
- {
- $self->{alreadyListed} = {};
- }
- else
- {
- #$html =~ s|<a href="synopsis">[^<]*</a>||gi;
- #$html =~ s|<a href="/name/.*?"[^>]*>([^<]*)</a>|$1|gi;
- #$html =~ s|<a href="/character/ch[0-9]*/">([^<]*)</a>|$1|gi;
- #$html =~ s|<a href="/Sections/.*?">([^<]*)</a>|$1|gi;
-
- # Commented out this line, causes bug #14420 when importing from named lists
- #$self->{curInfo}->{actors} = [];
- }
-
-
- return $html;
- }
-
- sub getSearchUrl
- {
- my ($self, $word) = @_;
-
- return "http://www.imdb.com/find?s=tt&q=$word";
- }
-
- sub getItemUrl
- {
- my ($self, $url) = @_;
-
- return "http://www.imdb.com" if $url eq "";
- return $url if $url =~ /^http:/;
- return "http://www.imdb.com".$url;
- }
-
- sub getName
- {
- return "IMDb";
- }
-
- sub getAuthor
- {
- return 'groms';
- }
-
- sub getLang
- {
- return 'EN';
- }
-
-}
-
-1;
+package GCPlugins::GCfilms::GCImdb;
+
+###################################################
+#
+# Features:
+# + Multiple directors separated by comma
+# + Multiple countries separated by comma
+# + Correct URL in case of redirection
+# + Fetches Original Title
+#
+###################################################
+#
+# Copyright 2005-2014 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+
+use GCPlugins::GCfilms::GCfilmsCommon;
+
+{
+ package GCPlugins::GCfilms::GCPluginImdb;
+
+ use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase);
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingEnded})
+ {
+ return;
+ }
+
+ if ($self->{parsingList})
+ {
+ if ($tagname eq "a")
+ {
+ my $url = $attr->{href};
+ if (($url =~ /^\/title\//) && (!$self->{alreadyListed}->{$url}))
+ {
+ $self->{isMovie} = 1;
+ $self->{isInfo} = 1;
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $url;
+ $self->{alreadyListed}->{$url} = 1;
+ }
+ }
+ }
+ else
+ {
+
+ if ($tagname eq "link")
+ {
+ if ($attr->{rel} eq "canonical")
+ {
+ $self->{curInfo}->{webPage} = $attr->{href};
+ }
+ }
+ elsif ($tagname eq "h1")
+ {
+ if ($attr->{class} eq "header")
+ {
+ $self->{insideHeader} = 1;
+ }
+ }
+ elsif ($tagname eq "div")
+ {
+ if ($attr->{class} eq "infobar")
+ {
+ $self->{insideInfobar} = 1;
+ }
+ }
+ elsif ($tagname eq "table")
+ {
+ if ($attr->{class} eq "cast_list")
+ {
+ $self->{insideCastList} = 1;
+ }
+ }
+ elsif ($tagname eq "span")
+ {
+ if ($attr->{itemprop} eq "ratingValue")
+ {
+ $self->{insideRating} = 1;
+ }
+ elsif ($attr->{class} eq "title-extra")
+ {
+ $self->{insideOriginalTitle} = 1;
+ }
+ elsif ($self->{insideCastList})
+ {
+ if ($attr->{itemprop} eq 'name')
+ {
+ $self->{insideActor} = 1;
+ }
+ }
+ }
+ elsif ($tagname eq "img")
+ {
+ if ($self->{insidePrimaryImage})
+ {
+ if (!($attr->{src} =~ m/nopicture/))
+ {
+ ($self->{curInfo}->{image} = $attr->{src}) =~ s/_V1\._.+\./_V1\._SX1000_SY1000_\./;
+ }
+ }
+ elsif ($self->{insideInfobar} && $attr->{src} =~ m|/certificates/us/|)
+ {
+ my $cert = $attr->{title};
+ $self->{curInfo}->{age} = 1 if ($cert eq 'Unrated') || ($cert eq 'Open');
+ $self->{curInfo}->{age} = 2 if ($cert eq 'G') || ($cert eq 'Approved');
+ $self->{curInfo}->{age} = 5 if ($cert eq 'PG') || ($cert eq 'M') || ($cert eq 'GP');
+ $self->{curInfo}->{age} = 13 if $cert eq 'PG_13';
+ $self->{curInfo}->{age} = 17 if $cert eq 'R';
+ $self->{curInfo}->{age} = 18 if ($cert eq 'NC_17') || ($cert eq 'X');
+ }
+ }
+ elsif ($tagname eq "a")
+ {
+ if ($self->{insideHeader} && $attr->{href} =~ m/year/)
+ {
+ $self->{insideYear} = 1;
+ }
+ elsif ($self->{insideInfobar} && $attr->{href} =~ m/genre/)
+ {
+ $self->{insideGenre} = 1;
+ }
+ }
+ elsif ($tagname eq 'td')
+ {
+ if ($self->{insideCastList})
+ {
+ #if ($attr->{class} eq 'name')
+ #{
+ #$self->{insideActor} = 1;
+ #}
+ if ($attr->{class} eq 'character')
+ {
+ $self->{insideRole} = 1;
+ }
+ }
+ elsif ($attr->{id} eq "img_primary") {
+ $self->{insidePrimaryImage} = 1;
+ }
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{inside}->{$tagname}--;
+ if ($self->{parsingList})
+ {
+ if ($self->{isMovie} && ($tagname eq 'a'))
+ {
+ $self->{isMovie} = 0;
+ my $url = $self->{itemsList}[$self->{itemIdx}]->{url};
+ if (!$self->{itemsList}[$self->{itemIdx}]->{title})
+ {
+ $self->{alreadyListed}->{$url} = 0;
+ $self->{itemIdx}--;
+ }
+ }
+ } else {
+ if ($tagname eq "h1")
+ {
+ $self->{insideHeader} = 0;
+ }
+ elsif ($tagname eq "a")
+ {
+ $self->{insideYear} = 0;
+ $self->{insideGenre} = 0;
+ $self->{insideActor} = 0;
+ $self->{insideRole} = 0;
+ }
+ elsif ($tagname eq "div")
+ {
+ $self->{insideInfobar} = 0;
+ $self->{insideNat} = 0;
+ $self->{insideDirector} = 0;
+ $self->{insideStoryline} = 0;
+ $self->{insideReleaseDate} = 0;
+ }
+ elsif ($tagname eq "span")
+ {
+ $self->{insideRating} = 0;
+ $self->{insideOriginalTitle} = 0;
+ }
+ elsif ($tagname eq "table")
+ {
+ $self->{insideCastList} = 0;
+ }
+ elsif ($tagname eq "td")
+ {
+ $self->{insidePrimaryImage} = 0;
+ }
+ elsif ($self->{insideCastList})
+ {
+ if ($self->{actor} && $self->{role})
+ {
+ $self->{actor} =~ s/^\s+|\s+$//g;
+ $self->{actor} =~ s/\s{2,}/ /g;
+ push @{$self->{curInfo}->{actors}}, [$self->{actor}];
+ $self->{role} =~ s/^\s+|\s+$//g;
+ $self->{role} =~ s/\s{2,}/ /g;
+ push @{$self->{curInfo}->{actors}->[$self->{actorsCounter}]}, $self->{role};
+ $self->{actorsCounter}++;
+ }
+ $self->{actor} = "";
+ $self->{role} = "";
+ }
+ }
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ return if length($origtext) < 2;
+
+ $origtext =~ s/^\s+|\s+$//g;
+
+ return if ($self->{parsingEnded});
+
+ if ($self->{parsingList})
+ {
+ #if ($self->{inside}->{h1} && $origtext !~ m/IMDb\s*Title\s*Search/i)
+ if ($self->{inside}->{title} && $origtext !~ m/Find\s-\sIMDb/i)
+ {
+ $self->{parsingEnded} = 1;
+ $self->{itemIdx} = 0;
+ $self->{itemsList}[0]->{url} = $self->{loadedUrl};
+ }
+ if ($self->{isMovie})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext;
+ $self->{isMovie} = 0;
+ $self->{isInfo} = 1;
+ return;
+ }
+ if ($self->{isInfo})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{date} = $1 if $origtext =~ m|\(([0-9]*)(/I+)?\)|;
+ $self->{isInfo} = 0;
+ }
+ }
+ else
+ {
+ if ($self->{insideHeader})
+ {
+ if ($self->{insideYear})
+ {
+ $self->{curInfo}->{date} = $origtext;
+ }
+ elsif (!$self->{curInfo}->{title})
+ {
+ $self->{curInfo}->{title} = $origtext;
+ if (!$self->{curInfo}->{original})
+ {
+ $self->{curInfo}->{original} = $origtext;
+ }
+ }
+ elsif ($self->{insideOriginalTitle} && !$self->{inside}->{i})
+ {
+ $self->{curInfo}->{original} = $origtext;
+ }
+ }
+ elsif ($self->{insideInfobar})
+ {
+ if ($self->{insideGenre})
+ {
+ if ($self->{curInfo}->{genre})
+ {
+ $self->{curInfo}->{genre} .= ",";
+ }
+ $self->{curInfo}->{genre} .= $origtext;
+ }
+ elsif ($origtext =~ m/([0-9]+ min)/)
+ {
+ $self->{curInfo}->{time} = $1;
+ }
+ }
+ elsif ($self->{insideRating} && $origtext =~ m/[0-9]\.[0-9]/)
+ {
+ $self->{curInfo}->{ratingpress} = int($origtext + 0.5);
+ }
+ elsif ($self->{insideSynopsis})
+ {
+ $self->{curInfo}->{synopsis} .= $origtext;
+ }
+ elsif ($self->{insideNat})
+ {
+ if ($origtext =~ m/[^\s].+/)
+ {
+ if ($self->{curInfo}->{country} =~ m/.+/)
+ {
+ $self->{curInfo}->{country} .= ", ".$origtext;
+ }
+ else
+ {
+ $self->{curInfo}->{country} = $origtext;
+ }
+ }
+ }
+ elsif ($self->{insideCastList})
+ {
+ if ($self->{insideActor})
+ {
+ $self->{actor} .= $origtext;
+ }
+ elsif ($self->{insideRole})
+ {
+ $self->{role} .= $origtext;
+ }
+ }
+ elsif ($self->{insideStoryline} && $self->{inside}{p})
+ {
+ $self->{curInfo}->{synopsis} = $origtext;
+ $self->{insideStoryline} = 0;
+ }
+ elsif ($self->{insideDirector} && $self->{inside}->{div})
+ {
+ $origtext =~ s/,/, /;
+ $self->{curInfo}->{director} .= $origtext;
+ }
+ elsif ($self->{insideReleaseDate} && !$self->{curInfo}->{date}) {
+ if ($origtext =~ m/([0-9]{4})/)
+ {
+ $self->{curInfo}->{date} = $1;
+ $self->{insideReleaseDate} = 0;
+ }
+ }
+
+ if ($self->{inside}->{h2})
+ {
+ $self->{insideStoryline} = 1 if ($origtext eq "Storyline");
+ }
+ elsif ($self->{inside}->{h4})
+ {
+ $self->{insideDirector} = 1 if $origtext =~ m/Directors?:/;
+ $self->{insideTime} = 1 if $origtext =~ m/Runtime:/;
+ $self->{insideNat} = 1 if $origtext =~ m/Country:/;
+ $self->{insideReleaseDate} = 1 if $origtext =~ m/Release Date:/;
+ }
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ title => 1,
+ date => 1,
+ director => 0,
+ actors => 0,
+ };
+
+ $self->{isInfo} = 0;
+ $self->{isMovie} = 0;
+ $self->{curName} = undef;
+ $self->{curUrl} = undef;
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ $self->{parsingEnded} = 0;
+
+ if ($self->{parsingList})
+ {
+ $self->{alreadyListed} = {};
+ }
+ else
+ {
+ #$html =~ s|<a href="synopsis">[^<]*</a>||gi;
+ #$html =~ s|<a href="/name/.*?"[^>]*>([^<]*)</a>|$1|gi;
+ #$html =~ s|<a href="/character/ch[0-9]*/">([^<]*)</a>|$1|gi;
+ #$html =~ s|<a href="/Sections/.*?">([^<]*)</a>|$1|gi;
+
+ # Commented out this line, causes bug #14420 when importing from named lists
+ #$self->{curInfo}->{actors} = [];
+ }
+
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ return "http://www.imdb.com/find?s=tt&q=$word";
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return "http://www.imdb.com" if $url eq "";
+ return $url if $url =~ /^http:/;
+ return "http://www.imdb.com".$url;
+ }
+
+ sub getName
+ {
+ return "IMDb";
+ }
+
+ sub getAuthor
+ {
+ return 'groms';
+ }
+
+ sub getLang
+ {
+ return 'EN';
+ }
+
+}
+
+1;