package GCPlugins::GCfilms::GCMetropoliES;
###################################################
#
# Copyright 2005-2010 Christian Jodar
#
# This file is part of GCstar.
#
# GCstar is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# GCstar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCstar; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
#
###################################################
use strict;
use utf8;
use GCPlugins::GCfilms::GCfilmsCommon;
{
package GCPlugins::GCfilms::GCPluginMetropoliES;
use base qw(GCPlugins::GCfilms::GCfilmsPluginsBase);
# preProcess
# Called before each page is processed. You can use it to do some substitutions.
# $html is the page content.
# Returns modified version of page content.
sub preProcess
{
my ($self, $html) = @_;
# Recorta el código del listado de resultados, quedandose solo con la parte que nos interesa del html
# el modificador s/.../$1/s trata el flujo como una sola cadena y reemplaza todo el cuerpo con la parte que nos interesa
$html =~ s/^.*(
.*<\/td>\n <\/tr>\n<\/table>)\n\n\n.*$/$1/gs;
# Recorta el código de la ficha, quedandose solo con la parte que nos interesa del html
$html =~ s/^.*(.*<\/td>\n <\/tr>\n<\/table>)\n{parsingList})
{
if ($self->{isDate} eq 2)
{
$self->{isDate} = 0;
$self->{itemsList}[ $self->{itemIdx} ]->{date} = $origtext;
$self->{isTitle} = 1;
return;
}
if ($self->{isTitle} eq 2)
{
$self->{isTitle} = 0;
$self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext;
$self->{isOrigTit} = 1;
return;
}
if ($self->{isOrigTit} eq 2)
{
$self->{isOrigTit} = 0;
$self->{isDirector} = 1;
return;
}
if ($self->{isDirector} eq 2)
{
$self->{isDirector} = 0;
$self->{itemsList}[ $self->{itemIdx} ]->{director} = $origtext;
$self->{insedeInfos} = 0;
return;
}
return;
}
else
{
$origtext =~ s/\s{2,}//g;
#$origtext =~ s/\n//g if !$self->{insideSynopsis};
if ($self->{insideName})
{
if ($origtext =~ /([^\(]*) \(([0-9]{4})\)/)
{
$self->{curInfo}->{title} = $1;
$self->{curInfo}->{date} = $2;
}
$self->{insideName} = 0;
}
if ($self->{inside}->{td})
{
if ($origtext =~ /(.*), (.*), (.*) Min\./)
{
$self->{curInfo}->{original} = $1;
$self->{curInfo}->{country} = $2;
$self->{curInfo}->{time} = $3;
}
elsif ($self->{insideActors})
{
$self->{insideActors}--;
if ($self->{insideActors} eq 0)
{
$self->{insideActors} = 0;
$self->{curInfo}->{actors} = $origtext;
}
}
}
if ($self->{insideDirector})
{
$self->{insideDirector} = 0;
$self->{curInfo}->{director} = $origtext;
}
if ($self->{inside}->{span})
{
if ($origtext =~ /Int.rpretes:/)
{
$self->{insideActors} = 2;
}
}
if ($self->{insideSynopsis})
{
$self->{curInfo}->{synopsis} = $origtext;
$self->{insideSynopsis} = 0;
$self->{insideInfos} = 0;
}
}
}
# end
# Called each time a HTML tag ends.
# $tagname is the tag name.
sub end
{
my ($self, $tagname) = @_;
$self->{inside}->{$tagname}--;
# Código para procesar el resultado de la busqueda
#if ($self->{parsingList}){
#}
# Código para procesar la información de la pelicula seleccionada
#else {
#}
}
# In processing functions below, self->{parsingList} can be used.
# If true, we are processing a search results page
# If false, we are processing a item information page.
# $self->{inside}->{tagname} (with correct value for tagname) can be used to test
# if we are in the corresponding tag.
# You have a counter $self->{itemIdx} that have to be used when processing search results.
# It is your responsability to increment it!
# When processing search results, you have to fill the available fields for results
#
# $self->{itemsList}[$self->{movieIdx}]->{field_name}
#
# When processing a movie page, you need to fill the fields (if available)
# in $self->{curInfo}.
#
# $self->{curInfo}->{field_name}
# start
# Called each time a new HTML tag begins.
# $tagname is the tag name.
# $attr is reference to an associative array of tag attributes.
# $attrseq is an array reference containing all the attributes name.
# $origtext is the tag text as found in source file
# Returns nothing
sub start
{
my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
$self->{inside}->{$tagname}++;
# Código para procesar el resultado de la busqueda para generar el listado
if ($self->{parsingList})
{
# Comprobamos si estamos dentro de un tr con la info de un titulo
if (($tagname eq "tr") && (($attr->{bgcolor} eq "#ECF5FF") || ($attr->{bgcolor} eq "#FFFFFF")))
{
$self->{insideInfos} = 1;
# Lo primero a leer es la fecha. Indicamos que es el siguiente a procesar
$self->{isDate} = 1;
$self->{isTitle} = 0;
$self->{isOrigTit} = 0;
$self->{isDirector} = 0;
# Aumentamos el número de resultados encontrados
$self->{itemIdx}++;
return;
}
# Comprobamos que campo de la información estamos pocesando
if ($tagname eq "td" && $self->{insideInfos})
{
$self->{isDate} = 2 if $self->{isDate} eq 1;
$self->{isOrigTit} = 2 if $self->{isOrigTit} eq 1;
$self->{isDirector} = 2 if $self->{isDirector} eq 1;
}
if ($tagname eq "a" && $self->{isTitle})
{
$self->{isTitle} = 2;
# Guardamos la Url del enlace
my $url = $attr->{href};
$self->{itemsList}[ $self->{itemIdx} ]->{url} = $url;
}
}
# Código para procesar la información de la pelicula seleccionada
else
{
# Si estamos dentro de una imagen y no se ha asignado ninguna, la asignamos
if (($tagname eq "img") & !$self->{curInfo}->{image})
{
# Imágenes en cmg:
# Thumb http://carteles.metropoliglobal.com/galerias/data/1149/1563-2008-rastrooculto-espanol-210459-thumb.jpg
# Normal: http://carteles.metropoliglobal.com/galerias/data/1149/1563-2008-rastrooculto-espanol-210459.jpg
# Extraemos la dirección de la imagen a partir del thumb
if ($attr->{src} =~ /\.\.\/(galerias\/data\/[0-9]*\/.*)-thumb\.jpg/)
{
$self->{curInfo}->{image} = "http://carteles.metropoliglobal.com/" . $1 . ".jpg";
}
}
# Comprobamos el rating
if ($tagname eq "img")
{
# En cmg la puntuación está asignada con una imagen con el formato ratingX.gif donde
# X está entre 0 y 5
if ($attr->{src} =~ /imagenes\/rating([0-5])\.gif/)
{
$self->{curInfo}->{ratingpress} = ($1 / 5) * 10;
}
}
elsif ($tagname eq "span")
{
$self->{insideName} = 1 if $attr->{class} eq "title";
$self->{insideInfos} = 1 if $attr->{class} eq "title";
}
elsif ($tagname eq "td")
{
$self->{insideDirector} = 1 if $attr->{width} eq "84%";
if ($self->{insideInfos})
{
$self->{insideSynopsis} = 1 if $attr->{colspan} eq "2";
}
}
}
}
# changeUrl
# Can be used to change URL if item URL and the one used to
# extract information are different.
# Return the modified URL.
#sub changeUrl
#{
# my ($self, $url) = @_;
# return $url;
#}
# getExtra
# Used if the plugin wants an extra column to be displayed in search results
# Return the column title or empty string to hide the column.
#sub getExtra
#{
# return 'Extra';
#}
# getLang
# Used to fill in plugin list with user language plugins
# Return the language used for this site (2 letters code).
sub getLang
{
return "ES";
}
# getAuthor
# Used to display the plugin author in GUI.
# Returns the plugin author name.
sub getAuthor
{
return "DoVerMan";
}
# getName
# Used to display plugin name in GUI.
# Returns the plugin name.
sub getName
{
return 'CartelesMetropoliGlobal';
}
# getCharset
# Used to convert charset in web pages.
# Returns the charset as specified in pages.
sub getCharset
{
my $self = shift;
# Charset de la web
return "iso-8859-1";
}
# getItemUrl
# Used to get the full URL of an item page.
# Useful when url on results pages are relative.
# $url is the URL as found with a search.
# Returns the absolute URL.
sub getItemUrl
{
my ($self, $url) = @_;
# url contendrá ficha.php?......
return "http://carteles.metropoliglobal.com/paginas/$url";
}
# getSearchUrl
# Used to get the URL that to be used to perform searches.
# $word is the query
# Returns the full URL.
sub getSearchUrl
{
my ($self, $word) = @_;
return "http://carteles.metropoliglobal.com/paginas/ficha.php"
. "?qbtitulo=$word&qbbuscar=titulo&Submit=Buscar&qsec=buscar";
}
# Constructor
sub new
{
# Inicialización
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = $class->SUPER::new();
bless($self, $class);
# Campos que devuelve el plugin (1 si, 0 no). Son los que apareceran
# en el listado de resultados
$self->{hasField} = {
title => 1,
date => 1,
director => 1,
actors => 0,
};
# Indica si estamos procesando información útil
$self->{insideInfos} = 0;
# Indican el estado del procesado del listado de resultados (0 no procesar, 1 es el siguiente, 2 procesando)
$self->{isDate} = 0;
$self->{isTitle} = 0;
$self->{isOrigTit} = 0;
$self->{isDirector} = 0;
$self->{curName} = undef;
$self->{curUrl} = undef;
return $self;
}
}
1;