diff options
author | Jörg Frings-Fürst <jff@merkur> | 2014-07-06 15:20:38 +0200 |
---|---|---|
committer | Jörg Frings-Fürst <jff@merkur> | 2014-07-06 15:20:38 +0200 |
commit | 126bb8cb6b93240bb4d3a2b816b74c286c3d422b (patch) | |
tree | e66e1dfe77d53a52539489765c88d23e4423ae27 /lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm |
Imported Upstream version 1.7.0upstream/1.7.0
Diffstat (limited to 'lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm')
-rw-r--r-- | lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm | 303 |
1 files changed, 303 insertions, 0 deletions
diff --git a/lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm b/lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm new file mode 100644 index 0000000..54dd119 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm @@ -0,0 +1,303 @@ +package GCPlugins::GCbooks::GCSaraiva; + +################################################### +# +# Plugin for a brazilian bookstore named "Saraiva". +# Code written by Guilherme "nirev" Nogueira. +# guilherme at nirev dot org +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginSaraiva; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if (($tagname eq 'div') && ($attr->{class} eq 'hsliceLista')) + { + $self->{isResult} = 1; + $self->{linkCount} = 0; + $self->{itemIdx}++; + } + if (($tagname eq 'span') && ($attr->{class} eq 'entry-title')) + { + $self->{isTitle} = 1; + } + if (($tagname eq 'h2') && ($attr->{class} eq 'titulo_autor')) + { + $self->{isAuthor} = 1; + } + if (($tagname eq 'a') && $self->{isResult} && $self->{linkCount} == 0 ) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{linkCount}++; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'entry-content')) + { + $self->{isResult} = 0; + } + } + else + { + if (($tagname eq 'img') && ($attr->{id} eq 'imgProd')) + { + my $imgid = $attr->{src}; + $imgid =~ s/(.)*pro_id=//; + $imgid =~ s/&.*$//; + $self->{curInfo}->{cover} = 'http://www.livrariasaraiva.com.br/imagem/imagem.dll?tam=2&pro_id='.$imgid; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'aba1')) + { + $self->{isDescription} = 1; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'aba2')) + { + $self->{divInfo} = 1; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'produtosAbasMenus')) + { + $self->{divInfo} = 0; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'tituloprod')) + { + $self->{isTitle} = 1; + } + elsif (($tagname eq 'a') && ($attr->{href} eq 'javascript:PesquisaAutor();')) + { + $self->{isAuthor} = 1; + } + elsif (($tagname eq 'a') && ($attr->{href} eq 'javascript:PesquisaMarca();')) + { + $self->{isPublisher} = 1; + } + elsif (($tagname eq 'font')) + { + $self->{isAnalyse} = 1; + } + elsif (($tagname eq 'b') && $self->{divInfo} == 1) + { + $self->{isAnalyse} = 1; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + my $texto = $origtext; + $self->{itemsList}[$self->{itemIdx}]->{title} = $texto; + $self->{isTitle} = 0; + } + if ($self->{isAuthor}) + { + my $texto = $origtext; + $texto =~ s/<br>//; + my @dados = split(' / ', $texto); + $self->{itemsList}[$self->{itemIdx}]->{authors} = $dados[0]; + $self->{isAuthor} = 0; + } + } + else + { + if ($self->{isAuthor}) + { + my @authors = split(';', $origtext); + my $authors = ''; + my $tam = @authors; + my $count = 0; + for($count = 0; $count < $tam; $count++) + { + $authors[$count] =~ s/^\s*//gi; + $authors[$count] =~ s/\s*$//gi; + my @names = split(', ', $authors[$count]); + $authors .= ',' if ($count); + $authors .= $names[1].' '.$names[0]; + + } + $self->{curInfo}->{authors} = $authors; + $self->{isAuthor} = 0; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0; + } + elsif ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} = $origtext; + $self->{curInfo}->{description} =~ s/^\s*//; + $self->{curInfo}->{description} =~ s/\s+/ /; + $self->{isDescription} = 0; + } + elsif ($self->{isAnalyse}) + { + $self->{isISBN} = 1 if ($origtext =~ m/I\.S\.B\.N/i); + $self->{isFormat} = 1 if ($origtext =~ m/Acabamento/i); + $self->{isPublication} = 1 if ($origtext =~ m/Edição/i); + $self->{isPage} = 1 if ($origtext =~ m/Número de Paginas/i); + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isISBN}) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + elsif ($self->{isFormat}) + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0; + } + elsif ($self->{isPublication}) + { + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0; + } + elsif ($self->{isPage}) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 0, + serie => 0, + }; + + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + $self->{isPage} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isDescription} = 0; + $self->{isResult} = 0; + $self->{linkCount} = 0; + $self->{divInfo} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + my $inicio_res = index($html,'<div id="esquerdaPesquisa" style="display:none;">esquerdaPesquisa</div>'); + if ( $inicio_res >= 0 ) + { + $html = substr($html, $inicio_res); + } + my $fim_res = index($html,'<div id="direitaPesquisa" style="display:none;">direitaPesquisa</div>'); + if ( $fim_res >= 0 ) + { + $html = substr($html, 0, $fim_res); + } + $html = '' if ($inicio_res < 0); + } + else + { + + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + $word =~ s|\s+|\+|; + + if ($self->{searchField} eq 'isbn') + { + return "http://www.livrariasaraiva.com.br/pesquisaweb/pesquisaweb.dll/pesquisa?ORDEMN2=E&ESTRUTN1=0301&PALAVRASN1=".$word; + } + else + { + return "http://www.livrariasaraiva.com.br/pesquisaweb/pesquisaweb.dll/pesquisa?ORDEMN2=E&ESTRUTN1=0301&PALAVRASN1=".$word; + } + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.livrariasaraiva.com.br".$url; + } + + sub getName + { + return "Saraiva"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-1"; + } + + sub getAuthor + { + return 'nirev'; + } + + sub getLang + { + return 'PT'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } +} + +1; |