package GCPlugins::GCbooks::GCSaraiva;
###################################################
#
# Plugin for a brazilian bookstore named "Saraiva".
# Code written by Guilherme "nirev" Nogueira.
# guilherme at nirev dot org
#
###################################################
use strict;
use utf8;
use GCPlugins::GCbooks::GCbooksCommon;
{
package GCPlugins::GCbooks::GCPluginSaraiva;
use base qw(GCPlugins::GCbooks::GCbooksPluginsBase);
use URI::Escape;
sub start
{
my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
$self->{inside}->{$tagname}++;
if ($self->{parsingList})
{
if (($tagname eq 'div') && ($attr->{class} eq 'hsliceLista'))
{
$self->{isResult} = 1;
$self->{linkCount} = 0;
$self->{itemIdx}++;
}
if (($tagname eq 'span') && ($attr->{class} eq 'entry-title'))
{
$self->{isTitle} = 1;
}
if (($tagname eq 'h2') && ($attr->{class} eq 'titulo_autor'))
{
$self->{isAuthor} = 1;
}
if (($tagname eq 'a') && $self->{isResult} && $self->{linkCount} == 0 )
{
$self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
$self->{linkCount}++;
}
elsif (($tagname eq 'div') && ($attr->{class} eq 'entry-content'))
{
$self->{isResult} = 0;
}
}
else
{
if (($tagname eq 'img') && ($attr->{id} eq 'imgProd'))
{
my $imgid = $attr->{src};
$imgid =~ s/(.)*pro_id=//;
$imgid =~ s/&.*$//;
$self->{curInfo}->{cover} = 'http://www.livrariasaraiva.com.br/imagem/imagem.dll?tam=2&pro_id='.$imgid;
}
elsif (($tagname eq 'div') && ($attr->{id} eq 'aba1'))
{
$self->{isDescription} = 1;
}
elsif (($tagname eq 'div') && ($attr->{id} eq 'aba2'))
{
$self->{divInfo} = 1;
}
elsif (($tagname eq 'div') && ($attr->{id} eq 'produtosAbasMenus'))
{
$self->{divInfo} = 0;
}
elsif (($tagname eq 'div') && ($attr->{id} eq 'tituloprod'))
{
$self->{isTitle} = 1;
}
elsif (($tagname eq 'a') && ($attr->{href} eq 'javascript:PesquisaAutor();'))
{
$self->{isAuthor} = 1;
}
elsif (($tagname eq 'a') && ($attr->{href} eq 'javascript:PesquisaMarca();'))
{
$self->{isPublisher} = 1;
}
elsif (($tagname eq 'font'))
{
$self->{isAnalyse} = 1;
}
elsif (($tagname eq 'b') && $self->{divInfo} == 1)
{
$self->{isAnalyse} = 1;
}
}
}
sub end
{
my ($self, $tagname) = @_;
$self->{inside}->{$tagname}--;
}
sub text
{
my ($self, $origtext) = @_;
if ($self->{parsingList})
{
if ($self->{isTitle})
{
my $texto = $origtext;
$self->{itemsList}[$self->{itemIdx}]->{title} = $texto;
$self->{isTitle} = 0;
}
if ($self->{isAuthor})
{
my $texto = $origtext;
$texto =~ s/
//;
my @dados = split(' / ', $texto);
$self->{itemsList}[$self->{itemIdx}]->{authors} = $dados[0];
$self->{isAuthor} = 0;
}
}
else
{
if ($self->{isAuthor})
{
my @authors = split(';', $origtext);
my $authors = '';
my $tam = @authors;
my $count = 0;
for($count = 0; $count < $tam; $count++)
{
$authors[$count] =~ s/^\s*//gi;
$authors[$count] =~ s/\s*$//gi;
my @names = split(', ', $authors[$count]);
$authors .= ',' if ($count);
$authors .= $names[1].' '.$names[0];
}
$self->{curInfo}->{authors} = $authors;
$self->{isAuthor} = 0;
}
elsif ($self->{isPublisher})
{
$self->{curInfo}->{publisher} = $origtext;
$self->{isPublisher} = 0;
}
elsif ($self->{isTitle})
{
$self->{curInfo}->{title} = $origtext;
$self->{isTitle} = 0;
}
elsif ($self->{isDescription})
{
$self->{curInfo}->{description} = $origtext;
$self->{curInfo}->{description} =~ s/^\s*//;
$self->{curInfo}->{description} =~ s/\s+/ /;
$self->{isDescription} = 0;
}
elsif ($self->{isAnalyse})
{
$self->{isISBN} = 1 if ($origtext =~ m/I\.S\.B\.N/i);
$self->{isFormat} = 1 if ($origtext =~ m/Acabamento/i);
$self->{isPublication} = 1 if ($origtext =~ m/Edição/i);
$self->{isPage} = 1 if ($origtext =~ m/Número de Paginas/i);
$self->{isAnalyse} = 0 ;
}
elsif ($self->{isISBN})
{
$self->{curInfo}->{isbn} = $origtext;
$self->{isISBN} = 0;
}
elsif ($self->{isFormat})
{
$self->{curInfo}->{format} = $origtext;
$self->{isFormat} = 0;
}
elsif ($self->{isPublication})
{
$self->{curInfo}->{publication} = $origtext;
$self->{isPublication} = 0;
}
elsif ($self->{isPage})
{
$self->{curInfo}->{pages} = $origtext;
$self->{isPage} = 0;
}
}
}
sub new
{
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = $class->SUPER::new();
bless ($self, $class);
$self->{hasField} = {
title => 1,
authors => 1,
publication => 0,
format => 0,
edition => 0,
serie => 0,
};
$self->{isTitle} = 0;
$self->{isAuthor} = 0;
$self->{isAnalyse} = 0;
$self->{isPublisher} = 0;
$self->{isPublication} = 0;
$self->{isPage} = 0;
$self->{isISBN} = 0;
$self->{isFormat} = 0;
$self->{isDescription} = 0;
$self->{isResult} = 0;
$self->{linkCount} = 0;
$self->{divInfo} = 0;
return $self;
}
sub preProcess
{
my ($self, $html) = @_;
if ($self->{parsingList})
{
my $inicio_res = index($html,'