diff options
Diffstat (limited to 'lib/gcstar/GCPlugins/GCfilms')
-rw-r--r-- | lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm | 435 | ||||
-rw-r--r-- | lib/gcstar/GCPlugins/GCfilms/GCImdb.pm | 888 |
2 files changed, 449 insertions, 874 deletions
diff --git a/lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm b/lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm deleted file mode 100644 index 75c6854..0000000 --- a/lib/gcstar/GCPlugins/GCfilms/GCAlpacineES.pm +++ /dev/null @@ -1,435 +0,0 @@ -package GCPlugins::GCfilms::GCAlpacineES; - -################################################### -# -# Copyright 2005-2010 Christian Jodar -# -# This file is part of GCstar. -# -# GCstar is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# GCstar is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GCstar; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA -# -################################################### - -use strict; -use utf8; - -use GCPlugins::GCfilms::GCfilmsCommon; - -{ - package GCPlugins::GCfilms::GCPluginAlpacineES; - - use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); - - - # text - # Called each time some plain text (between tags) is processed. - # $origtext is the read text. - sub text - { - my ($self, $origtext) = @_; - - return if length($origtext) < 2; - - # Código para procesar el resultado de la busqueda - if ($self->{parsingList}){ - # Guardamos la fecha. - if ($self->{inside}->{li} && $self->{insideInfos}){ - $origtext =~ /. \(([0-9]{4})\)/; - $self->{itemsList}[$self->{itemIdx}]->{date} = $origtext; - } - # Guardamos el título - if ($self->{inside}->{a} && $self->{insideInfos}){ - $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; - } - } - - else{ - # Eliminamos espacios iniciales, espacios dobles y espacios finales del texto - $origtext =~ s/^\s*|\s{2,}|\s*$//g; - # Estamos procesando el titulo - if ($self->{insideTitle}) - { - # Obtenemos titulo y fecha - $origtext =~ /(.*) \(([0-9]{4})\)/; - $self->{curInfo}->{title} = $1; - $self->{curInfo}->{date} = $2; - $self->{insideTitle} = 0; - return; - } - - # Si existe el hipervinculo "Ampliar" cambiamos la imagen por la ampliada - if ($self->{inside}->{a} && $origtext eq "Ampliar"){ - $self->{curInfo}->{image} =~ /(http:\/\/img.alpacine.com\/carteles\/.*)-[0-9]*(\.jpg)/; - $self->{curInfo}->{image} = $1 . $2; - return; - } - # Estamos en la puntuación real - if($self->{insideRating}){ - $self->{curInfo}->{ratingpress} = int( $origtext + 0.5 ); - $self->{insideRating} = 0; - } - # No hay puntuación real, asignamos 0 por defecto - if($self->{inside}->{div}){ - if($origtext =~ /Esperando \d votos/){ - $self->{curInfo}->{ratingpress} = 0; - } - } - # Procesamos el titulo original - if ($self->{isOrigTit} eq 1) { - $self->{isOrigTit} = 0; - $self->{curInfo}->{original} = $origtext; - return; - } - # Procesamos los generos (gen, gen, gen, gen...) - if ($self->{isGenres} eq 1) { - if($origtext ne ""){ - # hacemos uso de sus propias comas - $self->{curInfo}->{genre} .= $origtext; - } - else{ - $self->{isGenres} = 0; - } - return; - } - # Procesamos el país - if ($self->{isCountry} eq 1) { - $self->{isCountry} = 0; - $self->{curInfo}->{country} = $origtext; - return; - } - # Procesamos la duración - if ($self->{isTime} eq 1) { - $self->{isTime} = 0; - $self->{curInfo}->{time} = $origtext; - return; - } - # Procesamos los directores - if ($self->{isDirector} eq 1) { - if($origtext ne ""){ - if($self->{curInfo}->{director} eq ""){ - $self->{curInfo}->{director} .= $origtext; - } - else{ - $self->{curInfo}->{director} .= ", $origtext"; - } - } - else{ - $self->{isDirector} = 0; - } - return; - } - # Actores - if ($self->{isActors} eq 1) { - if($origtext ne ""){ - if($self->{curInfo}->{actors} eq ""){ - $self->{curInfo}->{actors} .= $origtext; - } - else{ - $self->{curInfo}->{actors} .= ", $origtext"; - } - } - else{ - $self->{isActors} = 0; - } - return; - } - # Procesamos la Sinopsis - if ($self->{isSynopsis} eq 1) { - $self->{isSynopsis} = 0; - $self->{curInfo}->{synopsis} = $origtext; - return; - } - # Procesamos los premios - if ($self->{isAwards} eq 1) { - $self->{isAwards} = 0; - $self->{curInfo}->{synopsis} = $self->{curInfo}->{synopsis}. "\n\nPremios:\n\t".$origtext; - $self->{insideInfos} = 0; - return; - } - - # Condiciones para procesar los campos en el siguiente ciclo - if($self->{insideInfos}){ - $self->{isOrigTit} = 1 if $origtext eq "Título original:"; - $self->{isGenres} = 1 if $origtext eq "Género:"; - $self->{isCountry} = 1 if $origtext eq "País:"; - $self->{isTime} = 1 if $origtext eq "Duración:"; - $self->{isDirector} = 1 if $origtext eq "Dirección:"; - $self->{isActors} = 1 if $origtext eq "Interpretación:"; - $self->{isSynopsis} = 1 if $origtext eq "Sinopsis:"; - $self->{isAwards} = 1 if $origtext eq "Premios:"; - } - } - } - - - # end - # Called each time a HTML tag ends. - # $tagname is the tag name. - sub end - { - my ($self, $tagname) = @_; - $self->{inside}->{$tagname}--; - - # Código para procesar el resultado de la busqueda - #if ($self->{parsingList}){ - #} - # Código para procesar la información de la pelicula seleccionada - #else { - #} - } - - # In processing functions below, self->{parsingList} can be used. - # If true, we are processing a search results page - # If false, we are processing a item information page. - - # $self->{inside}->{tagname} (with correct value for tagname) can be used to test - # if we are in the corresponding tag. - - # You have a counter $self->{itemIdx} that have to be used when processing search results. - # It is your responsability to increment it! - - # When processing search results, you have to fill the available fields for results - # - # $self->{itemsList}[$self->{movieIdx}]->{field_name} - # - # When processing a movie page, you need to fill the fields (if available) - # in $self->{curInfo}. - # - # $self->{curInfo}->{field_name} - - # start - # Called each time a new HTML tag begins. - # $tagname is the tag name. - # $attr is reference to an associative array of tag attributes. - # $attrseq is an array reference containing all the attributes name. - # $origtext is the tag text as found in source file - # Returns nothing - sub start - { - my ($self, $tagname, $attr, $attrseq, $origtext) = @_; - $self->{inside}->{$tagname}++; - - # Código para procesar el resultado de la busqueda para generar el listado - if ($self->{parsingList}) - { - # Comprobamos si estamos dentro de un título utilizando el atributo class - if( ($tagname eq "li" ) && ($attr->{class} ne "mas" )){ - $self->{itemIdx}++; - $self->{insideInfos} = 1 ; - return; - } - if( ($tagname eq "li" ) && ($attr->{class} eq "mas" )){ - $self->{insideInfos} = 0; - return; - } - # Si estamos en un título y encontramos una tag a, es un enlace a ficha - if ($tagname eq "a" && $self->{insideInfos}){ - $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.alpacine.com".$attr->{href}; - return; - } - } - # Código para procesar la información de la pelicula seleccionada - else { - if ($tagname eq "h1"){ - $self->{insideTitle} = 1; - return; - } - # Si estamos dentro de una imagen y el src es el del thumb lo asignamos como imagen - if ($tagname eq "img") - { - # Extraemos la dirección de la imagen thumb - if ($attr->{src} =~ /http:\/\/img.alpacine.com\/carteles\/.*\.jpg/) - { - $self->{curInfo}->{image} = $attr->{src}; - } - return; - } - - if ($tagname eq "div" && $attr->{class} eq "voto"){ - $self->{insideRating} = 1; - return; - } - - if( $tagname eq "div" && $attr->{class} eq "datos" ){ - $self->{insideInfos} = 1 ; - return; - } - } - } - - # preProcess - # Called before each page is processed. You can use it to do some substitutions. - # $html is the page content. - # Returns modified version of page content. - sub preProcess - { - my ($self, $html) = @_; - - # Anulamos el html si coincide con el patron de no resultados - if($html =~ /^.*No hay resultados para.*$/s){ - $html = ""; - return $html; - } - - # Recorta el código del listado de resultados, quedandose solo con la parte que nos interesa del html - # el modificador s/.../$1/s trata el flujo como una sola cadena y reemplaza todo el cuerpo con la parte que nos interesa - if($html =~ s/^.*<div class="titulo">Pel.culas <span class="resultados">\([0-9]* resultado[s]?\)<\/span><\/div><ul>(<li><a.*<\/a> \([0-9]*\)<\/li>).*$/$1/s){ - return $html; - } - - # Recorta el código de la ficha, quedandose solo con la parte que nos interesa del html - # Comprobamos si la pelicula contiene o no premios y nos quedamos con lo que corresponda - if($html =~ /^.*<div class="titulo">Premios:.*más\.\.\.<\/a><\/div><\/div>.*$/s){ - $html =~ s/^.*<div id="titulo">(.*<\/div><\/div>.*\n.*<div class="datox"><div class="titulo">Premios:.*)más\.\.\.<\/a><\/div><\/div>.*$/$1/s; - } - else{ - $html =~ s/^.*<div id="titulo">(.*<\/div><\/div>)\n\n\t\t\t\t\n\n\t\t\t\t<hr \/>.*$/$1/s; - } - return $html; - } - - # changeUrl - # Can be used to change URL if item URL and the one used to - # extract information are different. - # Return the modified URL. - #sub changeUrl - #{ - # my ($self, $url) = @_; - # return $url; - #} - - # getExtra - # Used if the plugin wants an extra column to be displayed in search results - # Return the column title or empty string to hide the column. - #sub getExtra - #{ - # return 'Extra'; - #} - - - # getLang - # Used to fill in plugin list with user language plugins - # Return the language used for this site (2 letters code). - sub getLang - { - return "ES"; - } - - - # getAuthor - # Used to display the plugin author in GUI. - # Returns the plugin author name. - sub getAuthor - { - return "DoVerMan"; - } - - - # getName - # Used to display plugin name in GUI. - # Returns the plugin name. - sub getName - { - return 'Alpacine'; - } - - - # getCharset - # Used to convert charset in web pages. - # Returns the charset as specified in pages. - #sub getCharset - #{ - # my $self = shift; - # # Charset de la web - # return "UTF-8"; - #} - - - # getItemUrl - # Used to get the full URL of an item page. - # Useful when url on results pages are relative. - # $url is the URL as found with a search. - # Returns the absolute URL. - sub getItemUrl - { - my ($self, $url) = @_; - return $url; - } - - - # getSearchUrl - # Used to get the URL that to be used to perform searches. - # $word is the query - # Returns the full URL. - sub getSearchUrl - { - my ($self, $word) = @_; - # Hack para evitar problemas con acentos - $word =~ s/%E1/a/g; - $word =~ s/%E9/e/g; - $word =~ s/%ED/i/g; - $word =~ s/%F3/o/g; - $word =~ s/%FA/u/g; - $word =~ s/%C1/A/g; - $word =~ s/%C9/E/g; - $word =~ s/%CD/I/g; - $word =~ s/%D3/O/g; - $word =~ s/%DA/U/g; - - return "http://www.alpacine.com/buscar/?buscar=" . $word; - - } - - - # Constructor - sub new - { - # Inicialización - my $proto = shift; - my $class = ref($proto) || $proto; - my $self = $class->SUPER::new(); - bless ($self, $class); - - # Campos que devuelve el plugin (1 si, 0 no). Son los que apareceran - # en el listado de resultados - $self->{hasField} = { - title => 1, - date => 1, - director => 0, - actors => 0, - }; - - # Indica si estamos procesando información útil - $self->{insideInfos} = 0; - - # Indican el estado del procesado del listado de resultados - $self->{insideRating} = 0; - $self->{insideTitle} = 0; - - $self->{isOrigTit} = 0; - $self->{isGenres} = 0; - $self->{isCountry} = 0; - $self->{isTime} = 0; - $self->{isDirector} = 0; - $self->{isActors} = 0; - $self->{isSynopsis} = 0; - $self->{isAwards} = 0; - - return $self; - } - -} - -1; diff --git a/lib/gcstar/GCPlugins/GCfilms/GCImdb.pm b/lib/gcstar/GCPlugins/GCfilms/GCImdb.pm index 70af804..0d12717 100644 --- a/lib/gcstar/GCPlugins/GCfilms/GCImdb.pm +++ b/lib/gcstar/GCPlugins/GCfilms/GCImdb.pm @@ -1,439 +1,449 @@ -package GCPlugins::GCfilms::GCImdb; - -################################################### -# -# Copyright 2010 groms -# -# Features: -# + Multiple directors separated by comma -# + Multiple countries separated by comma -# + Correct URL in case of redirection -# + Fetches Original Title -# -# This file is part of GCstar. -# -# GCstar is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# GCstar is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GCstar; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# -################################################### - -use strict; - -use GCPlugins::GCfilms::GCfilmsCommon; - -{ - package GCPlugins::GCfilms::GCPluginImdb; - - use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase); - - sub start - { - my ($self, $tagname, $attr, $attrseq, $origtext) = @_; - - $self->{inside}->{$tagname}++; - - if ($self->{parsingEnded}) - { - return; - } - - if ($self->{parsingList}) - { - if ($tagname eq "a") - { - my $url = $attr->{href}; - if (($url =~ /^\/title\//) && (!$self->{alreadyListed}->{$url})) - { - $self->{isMovie} = 1; - $self->{isInfo} = 1; - $self->{itemIdx}++; - $self->{itemsList}[$self->{itemIdx}]->{url} = $url; - $self->{alreadyListed}->{$url} = 1; - } - } - } - else - { - - if ($tagname eq "link") - { - if ($attr->{rel} eq "canonical") - { - $self->{curInfo}->{webPage} = $attr->{href}; - } - } - elsif ($tagname eq "h1") - { - if ($attr->{class} eq "header") - { - $self->{insideHeader} = 1; - } - } - elsif ($tagname eq "div") - { - if ($attr->{class} eq "infobar") - { - $self->{insideInfobar} = 1; - } - } - elsif ($tagname eq "table") - { - if ($attr->{class} eq "cast_list") - { - $self->{insideCastList} = 1; - } - } - elsif ($tagname eq "span") - { - if ($attr->{itemprop} eq "ratingValue") - { - $self->{insideRating} = 1; - } - elsif ($attr->{class} eq "title-extra") - { - $self->{insideOriginalTitle} = 1; - } - } - elsif ($tagname eq "img") - { - if ($self->{insidePrimaryImage}) - { - if (!($attr->{src} =~ m/nopicture/)) - { - ($self->{curInfo}->{image} = $attr->{src}) =~ s/_V1\._.+\./_V1\._SX1000_SY1000_\./; - } - } - elsif ($self->{insideInfobar} && $attr->{src} =~ m|/certificates/us/|) - { - my $cert = $attr->{title}; - $self->{curInfo}->{age} = 1 if ($cert eq 'Unrated') || ($cert eq 'Open'); - $self->{curInfo}->{age} = 2 if ($cert eq 'G') || ($cert eq 'Approved'); - $self->{curInfo}->{age} = 5 if ($cert eq 'PG') || ($cert eq 'M') || ($cert eq 'GP'); - $self->{curInfo}->{age} = 13 if $cert eq 'PG_13'; - $self->{curInfo}->{age} = 17 if $cert eq 'R'; - $self->{curInfo}->{age} = 18 if ($cert eq 'NC_17') || ($cert eq 'X'); - } - } - elsif ($tagname eq "a") - { - if ($self->{insideHeader} && $attr->{href} =~ m/year/) - { - $self->{insideYear} = 1; - } - elsif ($self->{insideInfobar} && $attr->{href} =~ m/genre/) - { - $self->{insideGenre} = 1; - } - } - elsif ($tagname eq 'td') - { - if ($self->{insideCastList}) - { - if ($attr->{class} eq 'name') - { - $self->{insideActor} = 1; - } - elsif ($attr->{class} eq 'character') - { - $self->{insideRole} = 1; - } - } - elsif ($attr->{id} eq "img_primary") { - $self->{insidePrimaryImage} = 1; - } - } - } - } - - sub end - { - my ($self, $tagname) = @_; - - $self->{inside}->{$tagname}--; - if ($self->{parsingList}) - { - if ($self->{isMovie} && ($tagname eq 'a')) - { - $self->{isMovie} = 0; - my $url = $self->{itemsList}[$self->{itemIdx}]->{url}; - if (!$self->{itemsList}[$self->{itemIdx}]->{title}) - { - $self->{alreadyListed}->{$url} = 0; - $self->{itemIdx}--; - } - } - } else { - if ($tagname eq "h1") - { - $self->{insideHeader} = 0; - } - elsif ($tagname eq "a") - { - $self->{insideYear} = 0; - $self->{insideGenre} = 0; - $self->{insideActor} = 0; - $self->{insideRole} = 0; - } - elsif ($tagname eq "div") - { - $self->{insideInfobar} = 0; - $self->{insideNat} = 0; - $self->{insideDirector} = 0; - $self->{insideStoryline} = 0; - $self->{insideReleaseDate} = 0; - } - elsif ($tagname eq "span") - { - $self->{insideRating} = 0; - $self->{insideOriginalTitle} = 0; - } - elsif ($tagname eq "table") - { - $self->{insideCastList} = 0; - } - elsif ($tagname eq "td") - { - $self->{insidePrimaryImage} = 0; - } - elsif ($self->{insideCastList}) - { - if ($self->{actor} && $self->{role}) - { - $self->{actor} =~ s/^\s+|\s+$//g; - $self->{actor} =~ s/\s{2,}/ /g; - push @{$self->{curInfo}->{actors}}, [$self->{actor}]; - $self->{role} =~ s/^\s+|\s+$//g; - $self->{role} =~ s/\s{2,}/ /g; - push @{$self->{curInfo}->{actors}->[$self->{actorsCounter}]}, $self->{role}; - $self->{actorsCounter}++; - } - $self->{actor} = ""; - $self->{role} = ""; - } - } - } - - sub text - { - my ($self, $origtext) = @_; - - return if length($origtext) < 2; - - $origtext =~ s/^\s+|\s+$//g; - - return if ($self->{parsingEnded}); - - if ($self->{parsingList}) - { - if ($self->{inside}->{h1} && $origtext !~ m/IMDb\s*Title\s*Search/i) - { - $self->{parsingEnded} = 1; - $self->{itemIdx} = 0; - $self->{itemsList}[0]->{url} = $self->{loadedUrl}; - } - if ($self->{isMovie}) - { - $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; - $self->{isMovie} = 0; - $self->{isInfo} = 1; - return; - } - if ($self->{isInfo}) - { - $self->{itemsList}[$self->{itemIdx}]->{date} = $1 if $origtext =~ m|\(([0-9]*)(/I+)?\)|; - $self->{isInfo} = 0; - } - } - else - { - if ($self->{insideHeader}) - { - if ($self->{insideYear}) - { - $self->{curInfo}->{date} = $origtext; - } - elsif (!$self->{curInfo}->{title}) - { - $self->{curInfo}->{title} = $origtext; - if (!$self->{curInfo}->{original}) - { - $self->{curInfo}->{original} = $origtext; - } - } - elsif ($self->{insideOriginalTitle} && !$self->{inside}->{i}) - { - $self->{curInfo}->{original} = $origtext; - } - } - elsif ($self->{insideInfobar}) - { - if ($self->{insideGenre}) - { - if ($self->{curInfo}->{genre}) - { - $self->{curInfo}->{genre} .= ","; - } - $self->{curInfo}->{genre} .= $origtext; - } - elsif ($origtext =~ m/([0-9]+ min)/) - { - $self->{curInfo}->{time} = $1; - } - } - elsif ($self->{insideRating} && $origtext =~ m/[0-9]\.[0-9]/) - { - $self->{curInfo}->{ratingpress} = int($origtext + 0.5); - } - elsif ($self->{insideSynopsis}) - { - $self->{curInfo}->{synopsis} .= $origtext; - } - elsif ($self->{insideNat}) - { - if ($origtext =~ m/[^\s].+/) - { - if ($self->{curInfo}->{country} =~ m/.+/) - { - $self->{curInfo}->{country} .= ", ".$origtext; - } - else - { - $self->{curInfo}->{country} = $origtext; - } - } - } - elsif ($self->{insideCastList}) - { - if ($self->{insideActor}) - { - $self->{actor} .= $origtext; - } - elsif ($self->{insideRole}) - { - $self->{role} .= $origtext; - } - } - elsif ($self->{insideStoryline} && $self->{inside}{p}) - { - $self->{curInfo}->{synopsis} = $origtext; - $self->{insideStoryline} = 0; - } - elsif ($self->{insideDirector} && $self->{inside}->{div}) - { - $origtext =~ s/,/, /; - $self->{curInfo}->{director} .= $origtext; - } - elsif ($self->{insideReleaseDate} && !$self->{curInfo}->{date}) { - if ($origtext =~ m/([0-9]{4})/) - { - $self->{curInfo}->{date} = $1; - $self->{insideReleaseDate} = 0; - } - } - - if ($self->{inside}->{h2}) - { - $self->{insideStoryline} = 1 if ($origtext eq "Storyline"); - } - elsif ($self->{inside}->{h4}) - { - $self->{insideDirector} = 1 if $origtext =~ m/Directors?:/; - $self->{insideTime} = 1 if $origtext =~ m/Runtime:/; - $self->{insideNat} = 1 if $origtext =~ m/Country:/; - $self->{insideReleaseDate} = 1 if $origtext =~ m/Release Date:/; - } - } - } - - sub new - { - my $proto = shift; - my $class = ref($proto) || $proto; - my $self = $class->SUPER::new(); - bless ($self, $class); - - $self->{hasField} = { - title => 1, - date => 1, - director => 0, - actors => 0, - }; - - $self->{isInfo} = 0; - $self->{isMovie} = 0; - $self->{curName} = undef; - $self->{curUrl} = undef; - - return $self; - } - - sub preProcess - { - my ($self, $html) = @_; - - $self->{parsingEnded} = 0; - - if ($self->{parsingList}) - { - $self->{alreadyListed} = {}; - } - else - { - #$html =~ s|<a href="synopsis">[^<]*</a>||gi; - #$html =~ s|<a href="/name/.*?"[^>]*>([^<]*)</a>|$1|gi; - #$html =~ s|<a href="/character/ch[0-9]*/">([^<]*)</a>|$1|gi; - #$html =~ s|<a href="/Sections/.*?">([^<]*)</a>|$1|gi; - - # Commented out this line, causes bug #14420 when importing from named lists - #$self->{curInfo}->{actors} = []; - } - - - return $html; - } - - sub getSearchUrl - { - my ($self, $word) = @_; - - return "http://www.imdb.com/find?s=tt&q=$word"; - } - - sub getItemUrl - { - my ($self, $url) = @_; - - return "http://www.imdb.com" if $url eq ""; - return $url if $url =~ /^http:/; - return "http://www.imdb.com".$url; - } - - sub getName - { - return "IMDb"; - } - - sub getAuthor - { - return 'groms'; - } - - sub getLang - { - return 'EN'; - } - -} - -1; +package GCPlugins::GCfilms::GCImdb;
+
+###################################################
+#
+# Features:
+# + Multiple directors separated by comma
+# + Multiple countries separated by comma
+# + Correct URL in case of redirection
+# + Fetches Original Title
+#
+###################################################
+#
+# Copyright 2005-2014 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+
+use GCPlugins::GCfilms::GCfilmsCommon;
+
+{
+ package GCPlugins::GCfilms::GCPluginImdb;
+
+ use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase);
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingEnded})
+ {
+ return;
+ }
+
+ if ($self->{parsingList})
+ {
+ if ($tagname eq "a")
+ {
+ my $url = $attr->{href};
+ if (($url =~ /^\/title\//) && (!$self->{alreadyListed}->{$url}))
+ {
+ $self->{isMovie} = 1;
+ $self->{isInfo} = 1;
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $url;
+ $self->{alreadyListed}->{$url} = 1;
+ }
+ }
+ }
+ else
+ {
+
+ if ($tagname eq "link")
+ {
+ if ($attr->{rel} eq "canonical")
+ {
+ $self->{curInfo}->{webPage} = $attr->{href};
+ }
+ }
+ elsif ($tagname eq "h1")
+ {
+ if ($attr->{class} eq "header")
+ {
+ $self->{insideHeader} = 1;
+ }
+ }
+ elsif ($tagname eq "div")
+ {
+ if ($attr->{class} eq "infobar")
+ {
+ $self->{insideInfobar} = 1;
+ }
+ }
+ elsif ($tagname eq "table")
+ {
+ if ($attr->{class} eq "cast_list")
+ {
+ $self->{insideCastList} = 1;
+ }
+ }
+ elsif ($tagname eq "span")
+ {
+ if ($attr->{itemprop} eq "ratingValue")
+ {
+ $self->{insideRating} = 1;
+ }
+ elsif ($attr->{class} eq "title-extra")
+ {
+ $self->{insideOriginalTitle} = 1;
+ }
+ elsif ($self->{insideCastList})
+ {
+ if ($attr->{itemprop} eq 'name')
+ {
+ $self->{insideActor} = 1;
+ }
+ }
+ }
+ elsif ($tagname eq "img")
+ {
+ if ($self->{insidePrimaryImage})
+ {
+ if (!($attr->{src} =~ m/nopicture/))
+ {
+ ($self->{curInfo}->{image} = $attr->{src}) =~ s/_V1\._.+\./_V1\._SX1000_SY1000_\./;
+ }
+ }
+ elsif ($self->{insideInfobar} && $attr->{src} =~ m|/certificates/us/|)
+ {
+ my $cert = $attr->{title};
+ $self->{curInfo}->{age} = 1 if ($cert eq 'Unrated') || ($cert eq 'Open');
+ $self->{curInfo}->{age} = 2 if ($cert eq 'G') || ($cert eq 'Approved');
+ $self->{curInfo}->{age} = 5 if ($cert eq 'PG') || ($cert eq 'M') || ($cert eq 'GP');
+ $self->{curInfo}->{age} = 13 if $cert eq 'PG_13';
+ $self->{curInfo}->{age} = 17 if $cert eq 'R';
+ $self->{curInfo}->{age} = 18 if ($cert eq 'NC_17') || ($cert eq 'X');
+ }
+ }
+ elsif ($tagname eq "a")
+ {
+ if ($self->{insideHeader} && $attr->{href} =~ m/year/)
+ {
+ $self->{insideYear} = 1;
+ }
+ elsif ($self->{insideInfobar} && $attr->{href} =~ m/genre/)
+ {
+ $self->{insideGenre} = 1;
+ }
+ }
+ elsif ($tagname eq 'td')
+ {
+ if ($self->{insideCastList})
+ {
+ #if ($attr->{class} eq 'name')
+ #{
+ #$self->{insideActor} = 1;
+ #}
+ if ($attr->{class} eq 'character')
+ {
+ $self->{insideRole} = 1;
+ }
+ }
+ elsif ($attr->{id} eq "img_primary") {
+ $self->{insidePrimaryImage} = 1;
+ }
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{inside}->{$tagname}--;
+ if ($self->{parsingList})
+ {
+ if ($self->{isMovie} && ($tagname eq 'a'))
+ {
+ $self->{isMovie} = 0;
+ my $url = $self->{itemsList}[$self->{itemIdx}]->{url};
+ if (!$self->{itemsList}[$self->{itemIdx}]->{title})
+ {
+ $self->{alreadyListed}->{$url} = 0;
+ $self->{itemIdx}--;
+ }
+ }
+ } else {
+ if ($tagname eq "h1")
+ {
+ $self->{insideHeader} = 0;
+ }
+ elsif ($tagname eq "a")
+ {
+ $self->{insideYear} = 0;
+ $self->{insideGenre} = 0;
+ $self->{insideActor} = 0;
+ $self->{insideRole} = 0;
+ }
+ elsif ($tagname eq "div")
+ {
+ $self->{insideInfobar} = 0;
+ $self->{insideNat} = 0;
+ $self->{insideDirector} = 0;
+ $self->{insideStoryline} = 0;
+ $self->{insideReleaseDate} = 0;
+ }
+ elsif ($tagname eq "span")
+ {
+ $self->{insideRating} = 0;
+ $self->{insideOriginalTitle} = 0;
+ }
+ elsif ($tagname eq "table")
+ {
+ $self->{insideCastList} = 0;
+ }
+ elsif ($tagname eq "td")
+ {
+ $self->{insidePrimaryImage} = 0;
+ }
+ elsif ($self->{insideCastList})
+ {
+ if ($self->{actor} && $self->{role})
+ {
+ $self->{actor} =~ s/^\s+|\s+$//g;
+ $self->{actor} =~ s/\s{2,}/ /g;
+ push @{$self->{curInfo}->{actors}}, [$self->{actor}];
+ $self->{role} =~ s/^\s+|\s+$//g;
+ $self->{role} =~ s/\s{2,}/ /g;
+ push @{$self->{curInfo}->{actors}->[$self->{actorsCounter}]}, $self->{role};
+ $self->{actorsCounter}++;
+ }
+ $self->{actor} = "";
+ $self->{role} = "";
+ }
+ }
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ return if length($origtext) < 2;
+
+ $origtext =~ s/^\s+|\s+$//g;
+
+ return if ($self->{parsingEnded});
+
+ if ($self->{parsingList})
+ {
+ #if ($self->{inside}->{h1} && $origtext !~ m/IMDb\s*Title\s*Search/i)
+ if ($self->{inside}->{title} && $origtext !~ m/Find\s-\sIMDb/i)
+ {
+ $self->{parsingEnded} = 1;
+ $self->{itemIdx} = 0;
+ $self->{itemsList}[0]->{url} = $self->{loadedUrl};
+ }
+ if ($self->{isMovie})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext;
+ $self->{isMovie} = 0;
+ $self->{isInfo} = 1;
+ return;
+ }
+ if ($self->{isInfo})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{date} = $1 if $origtext =~ m|\(([0-9]*)(/I+)?\)|;
+ $self->{isInfo} = 0;
+ }
+ }
+ else
+ {
+ if ($self->{insideHeader})
+ {
+ if ($self->{insideYear})
+ {
+ $self->{curInfo}->{date} = $origtext;
+ }
+ elsif (!$self->{curInfo}->{title})
+ {
+ $self->{curInfo}->{title} = $origtext;
+ if (!$self->{curInfo}->{original})
+ {
+ $self->{curInfo}->{original} = $origtext;
+ }
+ }
+ elsif ($self->{insideOriginalTitle} && !$self->{inside}->{i})
+ {
+ $self->{curInfo}->{original} = $origtext;
+ }
+ }
+ elsif ($self->{insideInfobar})
+ {
+ if ($self->{insideGenre})
+ {
+ if ($self->{curInfo}->{genre})
+ {
+ $self->{curInfo}->{genre} .= ",";
+ }
+ $self->{curInfo}->{genre} .= $origtext;
+ }
+ elsif ($origtext =~ m/([0-9]+ min)/)
+ {
+ $self->{curInfo}->{time} = $1;
+ }
+ }
+ elsif ($self->{insideRating} && $origtext =~ m/[0-9]\.[0-9]/)
+ {
+ $self->{curInfo}->{ratingpress} = int($origtext + 0.5);
+ }
+ elsif ($self->{insideSynopsis})
+ {
+ $self->{curInfo}->{synopsis} .= $origtext;
+ }
+ elsif ($self->{insideNat})
+ {
+ if ($origtext =~ m/[^\s].+/)
+ {
+ if ($self->{curInfo}->{country} =~ m/.+/)
+ {
+ $self->{curInfo}->{country} .= ", ".$origtext;
+ }
+ else
+ {
+ $self->{curInfo}->{country} = $origtext;
+ }
+ }
+ }
+ elsif ($self->{insideCastList})
+ {
+ if ($self->{insideActor})
+ {
+ $self->{actor} .= $origtext;
+ }
+ elsif ($self->{insideRole})
+ {
+ $self->{role} .= $origtext;
+ }
+ }
+ elsif ($self->{insideStoryline} && $self->{inside}{p})
+ {
+ $self->{curInfo}->{synopsis} = $origtext;
+ $self->{insideStoryline} = 0;
+ }
+ elsif ($self->{insideDirector} && $self->{inside}->{div})
+ {
+ $origtext =~ s/,/, /;
+ $self->{curInfo}->{director} .= $origtext;
+ }
+ elsif ($self->{insideReleaseDate} && !$self->{curInfo}->{date}) {
+ if ($origtext =~ m/([0-9]{4})/)
+ {
+ $self->{curInfo}->{date} = $1;
+ $self->{insideReleaseDate} = 0;
+ }
+ }
+
+ if ($self->{inside}->{h2})
+ {
+ $self->{insideStoryline} = 1 if ($origtext eq "Storyline");
+ }
+ elsif ($self->{inside}->{h4})
+ {
+ $self->{insideDirector} = 1 if $origtext =~ m/Directors?:/;
+ $self->{insideTime} = 1 if $origtext =~ m/Runtime:/;
+ $self->{insideNat} = 1 if $origtext =~ m/Country:/;
+ $self->{insideReleaseDate} = 1 if $origtext =~ m/Release Date:/;
+ }
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ title => 1,
+ date => 1,
+ director => 0,
+ actors => 0,
+ };
+
+ $self->{isInfo} = 0;
+ $self->{isMovie} = 0;
+ $self->{curName} = undef;
+ $self->{curUrl} = undef;
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ $self->{parsingEnded} = 0;
+
+ if ($self->{parsingList})
+ {
+ $self->{alreadyListed} = {};
+ }
+ else
+ {
+ #$html =~ s|<a href="synopsis">[^<]*</a>||gi;
+ #$html =~ s|<a href="/name/.*?"[^>]*>([^<]*)</a>|$1|gi;
+ #$html =~ s|<a href="/character/ch[0-9]*/">([^<]*)</a>|$1|gi;
+ #$html =~ s|<a href="/Sections/.*?">([^<]*)</a>|$1|gi;
+
+ # Commented out this line, causes bug #14420 when importing from named lists
+ #$self->{curInfo}->{actors} = [];
+ }
+
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ return "http://www.imdb.com/find?s=tt&q=$word";
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return "http://www.imdb.com" if $url eq "";
+ return $url if $url =~ /^http:/;
+ return "http://www.imdb.com".$url;
+ }
+
+ sub getName
+ {
+ return "IMDb";
+ }
+
+ sub getAuthor
+ {
+ return 'groms';
+ }
+
+ sub getLang
+ {
+ return 'EN';
+ }
+
+}
+
+1;
|