From 126bb8cb6b93240bb4d3a2b816b74c286c3d422b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Sun, 6 Jul 2014 15:20:38 +0200 Subject: Imported Upstream version 1.7.0 --- lib/gcstar/GCPlugins/GCbooks/GCAdlibrisFI.pm | 59 +++ lib/gcstar/GCPlugins/GCbooks/GCAdlibrisSV.pm | 59 +++ lib/gcstar/GCPlugins/GCbooks/GCAlapage.pm | 391 +++++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCAmazon.pm | 352 +++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCAmazonCA.pm | 61 +++ lib/gcstar/GCPlugins/GCbooks/GCAmazonDE.pm | 56 +++ lib/gcstar/GCPlugins/GCbooks/GCAmazonFR.pm | 57 +++ lib/gcstar/GCPlugins/GCbooks/GCAmazonUK.pm | 61 +++ lib/gcstar/GCPlugins/GCbooks/GCBDGest.pm | 477 ++++++++++++++++++++ .../GCPlugins/GCbooks/GCBibliotekaNarodowa.pm | 374 ++++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCBokkilden.pm | 295 +++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCBol.pm | 485 +++++++++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCBuscape.pm | 479 ++++++++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCCasadelibro.pm | 420 ++++++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCChapitre.pm | 430 ++++++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCDoubanbook.pm | 238 ++++++++++ lib/gcstar/GCPlugins/GCbooks/GCFnac.pm | 462 ++++++++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCFnacPT.pm | 390 +++++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCISBNdb.pm | 370 ++++++++++++++++ .../GCPlugins/GCbooks/GCInternetBokHandeln.pm | 464 ++++++++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCInternetBookShop.pm | 376 ++++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCLeLivre.pm | 334 ++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCLiberOnWeb.pm | 418 ++++++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCMareno.pm | 365 ++++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCMediabooks.pm | 333 ++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCMerlin.pm | 389 +++++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCNUKat.pm | 447 +++++++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCNooSFere.pm | 462 ++++++++++++++++++++ lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm | 303 +++++++++++++ .../GCPlugins/GCbooks/GCbooksAdlibrisCommon.pm | 331 ++++++++++++++ .../GCPlugins/GCbooks/GCbooksAmazonCommon.pm | 65 +++ lib/gcstar/GCPlugins/GCbooks/GCbooksCommon.pm | 61 +++ 32 files changed, 9864 insertions(+) create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCAdlibrisFI.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCAdlibrisSV.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCAlapage.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCAmazon.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCAmazonCA.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCAmazonDE.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCAmazonFR.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCAmazonUK.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCBDGest.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCBibliotekaNarodowa.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCBokkilden.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCBol.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCBuscape.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCCasadelibro.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCChapitre.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCDoubanbook.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCFnac.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCFnacPT.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCISBNdb.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCInternetBokHandeln.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCInternetBookShop.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCLeLivre.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCLiberOnWeb.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCMareno.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCMediabooks.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCMerlin.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCNUKat.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCNooSFere.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCbooksAdlibrisCommon.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCbooksAmazonCommon.pm create mode 100644 lib/gcstar/GCPlugins/GCbooks/GCbooksCommon.pm (limited to 'lib/gcstar/GCPlugins/GCbooks') diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisFI.pm b/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisFI.pm new file mode 100644 index 0000000..34997a8 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisFI.pm @@ -0,0 +1,59 @@ +package GCPlugins::GCbooks::GCAdlibrisFI; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksAdlibrisCommon; + +{ + package GCPlugins::GCbooks::GCPluginAdlibrisFI; + + use base qw(GCPlugins::GCbooks::GCbooksAdlibrisPluginsBase); + use URI::Escape; + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{isLang} = 'fi'; + + return $self; + } + + sub getName + { + return "Adlibris (FI)"; + } + + sub getLang + { + return 'FI'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisSV.pm b/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisSV.pm new file mode 100644 index 0000000..f17abdb --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisSV.pm @@ -0,0 +1,59 @@ +package GCPlugins::GCbooks::GCAdlibrisSV; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksAdlibrisCommon; + +{ + package GCPlugins::GCbooks::GCPluginAdlibrisSV; + + use base qw(GCPlugins::GCbooks::GCbooksAdlibrisPluginsBase); + use URI::Escape; + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{isLang} = 'se'; + + return $self; + } + + sub getName + { + return "Adlibris (SV)"; + } + + sub getLang + { + return 'SV'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAlapage.pm b/lib/gcstar/GCPlugins/GCbooks/GCAlapage.pm new file mode 100644 index 0000000..44f3da0 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAlapage.pm @@ -0,0 +1,391 @@ +package GCPlugins::GCbooks::GCAlapage; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginAlapage; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'div') && ($attr->{class} eq 'infos_produit')) + { + $self->{isBook} = 1 ; + $self->{isUrl} = 1 ; + } + elsif ($tagname eq 'div') + { + $self->{isBook} = 0 ; + } + elsif (($tagname eq 'a') && ($self->{isUrl}) && ($self->{isBook})) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{itemsList}[$self->{itemIdx}]->{title} = $attr->{title}; + $self->{isUrl} = 0 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"mot_auteurs") >= 0) && ($self->{isBook})) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'br') && ($self->{isBook})) + { + $self->{isPublisher} = 1 ; + } + } + else + { + if ($self->{isISBN} eq 1) + { + $self->{isISBN} = 2 ; + } + elsif ($self->{isPublication} eq 1) + { + $self->{isPublication} = 2 ; + } + elsif ($self->{isFormat} eq 1) + { + $self->{isFormat} = 2 ; + } + elsif ($self->{isPage} eq 1) + { + $self->{isPage} = 2 ; + } + elsif ($tagname eq 'h2') + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'tpfcommentaire') && ($self->{isDescription} eq 1)) + { + $self->{isDescription} = 2 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"mot_auteurs") >= 0)) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{class} eq 'thickbox tooltip') && ($self->{curInfo}->{cover} eq '')) + { + my $html = $self->loadPage( "http://www.alapage.com" . $attr->{href}, 0, 1); + my $found = index($html,"\"laplusgrande\""); + if ( $found >= 0 ) + { + my $found2 = index($html,"&m=v"); + $html = substr($html, $found +length('"laplusgrande"'),length($html)- $found -length('"laplusgrande"')); + + my @array = split(/"/,$html); + #" + $self->{curInfo}->{cover} = "http://www.alapage.com" . $array[1]; + if ( $found2 >= 0 ) + { + $self->{curInfo}->{backpic} = $self->{curInfo}->{cover}; + $self->{curInfo}->{backpic} =~ s|&m=r|&m=v|gi; + } + } + } + elsif ($tagname eq 'li') + { + $self->{isAnalyse} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"mot_cdu") >= 0)) + { + $self->{isGenre} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"mot_coll_serie") >= 0)) + { + $self->{isSerie} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"mot_editeur") >= 0) && ( index($attr->{href},"mot_coll_serie") == -1)) + { + $self->{isPublisher} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{name} eq 'comment')) + { + $self->{isDescription} = 1 ; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'blocWithMargin') && ($self->{isDescription}) && ($self->{curInfo}->{description} eq '') ) + { + $self->{isDescription} = 2 ; + } + elsif (($tagname eq 'a') && ($attr->{name} ne '')) + { + $self->{isDescription} = 0 ; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'edito FP_commentaire')) + { + $self->{isDescription} = 1 ; + } + + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isAuthor}) + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//; + + if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ', '; + $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext; + } + $self->{isAuthor} = 0 ; + } + elsif ($self->{isPublisher}) + { + my @array = split(/,/,$origtext); + + $self->{itemsList}[$self->{itemIdx}]->{edition} = $array[0]; + $self->{itemsList}[$self->{itemIdx}]->{edition} =~ s/^\s+//; + $self->{itemsList}[$self->{itemIdx}]->{edition} =~ s/\s+$//; + + if ($#array ne 0 ) + { + $self->{itemsList}[$self->{itemIdx}]->{publication} = $array[$#array]; + $self->{itemsList}[$self->{itemIdx}]->{publication} =~ s/^\s+//; + $self->{itemsList}[$self->{itemIdx}]->{publication} =~ s/\s+$//; + } + + $self->{isPublisher} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{curInfo}->{language} = 'Français'; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + $self->{curInfo}->{authors} .= $origtext; + $self->{curInfo}->{authors} .= ","; + $self->{isAuthor} = 0 ; + } + elsif ($self->{isAnalyse}) + { + $self->{isISBN} = 1 if ($origtext =~ m/ISBN/i); + $self->{isFormat} = 1 if ($origtext =~ m/Dimensions/i); + $self->{isPublication} = 1 if ($origtext =~ m/Date de parution/i); + $self->{isPage} = 1 if ($origtext =~ m/Nombre de pages/i); + + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isISBN} eq 2) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0 ; + } + elsif ($self->{isGenre}) + { + my @array = split(/,/,$origtext); + my $element; + foreach $element (@array) + { + $element =~ s/^\s+//; + $self->{curInfo}->{genre} .= $element; + $self->{curInfo}->{genre} .= ","; + } + $self->{isGenre} = 0 ; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isSerie}) + { + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0 ; + } + elsif ($self->{isFormat} eq 2) + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0 ; + } + elsif ($self->{isPublication} eq 2) + { + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0 ; + } + elsif ($self->{isPage} eq 2) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0 ; + } + elsif ($self->{isDescription} eq 2) + { + $self->{curInfo}->{description} = $origtext; + $self->{isDescription} = 0 ; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 0, + edition => 1, + serie => 0, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isAnalyse} = 0; + $self->{isISBN} = 0; + $self->{isGenre} = 0; + $self->{isPublication} = 0; + $self->{isPage} = 0; + $self->{isFormat} = 0; + $self->{isSerie} = 0; + $self->{isDescription} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|

||gi; + $html =~ s|

||gi; + } + else + { + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|

||gi; + $html =~ s|

||gi; + } + + return $html; + + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + return "http://www.alapage.com/-/Recherche/?type=1&mot_isbn=" . $word; + } + else + { + return "http://www.alapage.com/-/Recherche/?type=1&mot_titre=" . $word; + } + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.alapage.com" . $url; + } + + sub getName + { + return "Alapage"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'FR'; + } + + sub getSearchFieldsArray + { + return ['isbn','title']; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-15"; + } + + sub getDefaultPictureSuffix + { + return '.jpg'; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAmazon.pm b/lib/gcstar/GCPlugins/GCbooks/GCAmazon.pm new file mode 100644 index 0000000..7d70ec4 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAmazon.pm @@ -0,0 +1,352 @@ +package GCPlugins::GCbooks::GCAmazon; + +################################################### +# +# Copyright 2005-2009 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginAmazon; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use XML::Simple; + use LWP::Simple qw($ua); + use Encode; + use HTML::Entities; + use GCUtils; + + sub parse + { + my ($self, $page) = @_; + return if $page =~ /^new; + + if ($self->{parsingList}) + { + $xml = $xs->XMLin($page, ForceArray => ['Item','Author'], KeyAttr => []); + my $book; + foreach $book ( @{ $xml -> {'Items'} -> {'Item'} }) + { + $self->{itemIdx}++; + my $url = $self->baseAWSUrl."&Operation=ItemLookup&ResponseGroup=Large,EditorialReview&ItemId=".$book->{ASIN}; + + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + $self->{itemsList}[$self->{itemIdx}]->{title} = $book->{ItemAttributes}->{'Title'}; + for my $author (@{$book->{ItemAttributes}->{'Author'}}) + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ", " + if $self->{itemsList}[$self->{itemIdx}]->{authors}; + $self->{itemsList}[$self->{itemIdx}]->{authors} .= $author; + } + $self->{itemsList}[$self->{itemIdx}]->{publication} = $book->{ItemAttributes}->{'PublicationDate'}; + $self->{itemsList}[$self->{itemIdx}]->{format} = $book->{ItemAttributes}->{'Binding'}; + $self->{itemsList}[$self->{itemIdx}]->{edition} = $book->{ItemAttributes}->{'Edition'}; + } + } + else + { + $xml = $xs->XMLin($page, ForceArray => ['Author','EditorialReview','Language'], KeyAttr => []); + $self->{curInfo}->{title} = $xml->{Items}->{Item}->{ItemAttributes}->{Title}; + for my $author (@{$xml->{Items}->{Item}->{ItemAttributes}->{Author}}) + { + push @{$self->{curInfo}->{authors}}, [$author]; + } + + my $htmlDescription; + if ($xml->{Items}->{Item}->{EditorialReviews}->{EditorialReview}[0]->{Content}) + { + $htmlDescription = $xml->{Items}->{Item}->{EditorialReviews}->{EditorialReview}[0]->{Content}; + } + else + { + # Unfortunately the api doesn't always return the product description, which is due to + # copyright concerns or something. In this case, grab the product html and parse it for + # the description. + my $response = $ua->get($xml->{Items}->{Item}->{DetailPageURL}); + my $result; + eval { + $result = $response->decoded_content; + }; + + # Replace some bad characters. TODO - will probably need to extend this for de/jp plugins + $result =~ s|\x{92}|'|gi; + $result =~ s|’|'|gi; + $result =~ s|•|*|gi; + $result =~ s|œ|oe|gi; + $result =~ s|…|...|gi; + $result =~ s|\x{85}|...|gi; + $result =~ s|\x{8C}|OE|gi; + $result =~ s|\x{9C}|oe|gi; + $result =~ s|ü|ü|g; + $result =~ s|ß|ß|g; + $result =~ s|ö|ö|g; + $result =~ s|Ü|Ü|g; + $result =~ s|ä|ä|g; + $result =~ s/„/»/gm; + $result =~ s/“/«/gm; + + # Chop out the product description + $result =~ /
(.*?)<(\/)*?div/s; + $htmlDescription = $1; + + # Decode + decode_entities($htmlDescription); + $htmlDescription = decode('ISO-8859-1', $htmlDescription); + } + + # Replace some html with line breaks, strip out the rest + $htmlDescription =~ s/
/\n/ig; + $htmlDescription =~ s/

/\n\n/ig; + $htmlDescription =~ s/<(.*?)>//gi; + $htmlDescription =~ s/^\s*//; + $htmlDescription =~ s/\s*$//; + $htmlDescription =~ s/ {1,}/ /g; + $self->{curInfo}->{description} = $htmlDescription; + + $self->{curInfo}->{publisher} = $xml->{Items}->{Item}->{ItemAttributes}->{Publisher} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{Publisher})); + $self->{curInfo}->{publication} = $xml->{Items}->{Item}->{ItemAttributes}->{PublicationDate} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{PublicationDate})); + $self->{curInfo}->{language} = $xml->{Items}->{Item}->{ItemAttributes}->{Languages}->{Language}[0]->{Name} + if (ref($xml->{Items}->{Item}->{ItemAttributes}->{Languages}->{Language})); + $self->{curInfo}->{pages} = $xml->{Items}->{Item}->{ItemAttributes}->{NumberOfPages} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{NumberOfPages})); + $self->{curInfo}->{isbn} = $xml->{Items}->{Item}->{ItemAttributes}->{EAN} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{EAN})); + $self->{curInfo}->{format} = $xml->{Items}->{Item}->{ItemAttributes}->{Binding} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{Binding})); + $self->{curInfo}->{edition} = $xml->{Items}->{Item}->{ItemAttributes}->{Edition} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{Edition})); + $self->{curInfo}->{web} = $xml->{Items}->{Item}->{DetailPageURL}; + + # Genre handling via Amazon's browsenodes. Stupidly complicated way of doing things, IMO + # Loop through all the nodes: + for my $node (@{$xml->{Items}->{Item}->{BrowseNodes}->{BrowseNode}}) + { + my $genre = ''; + my $ancestor = $node; + + # Push the lowest node to the temporary genre list + my @genre_list = ($node->{Name}); + + # Start stepping down through the current node to find it's children + while ($ancestor->{Ancestors}->{BrowseNode}) + { + $ancestor = $ancestor->{Ancestors}->{BrowseNode}; + if (($ancestor->{Name} eq 'Specialty Stores') || + ($ancestor->{Name} eq 'Refinements') || + ($ancestor->{Name} eq 'Self Service') || + ($ancestor->{Name} eq 'Specialty Boutique')) + { + # Some categories we definetly want to ignore, since they are full of rubbish tags + $genre = 'ignore'; + last; + } + elsif ($ancestor->{Name} =~ m/A\-Z/) + { + # Clear out the current genres from the node, will be full of rubbish like "Authors A-K" + # Keep looping afterwards though, since there could be valid tags below the author + # specific ones + undef(@genre_list); + } + elsif ($ancestor->{Name} eq 'Subjects') + { + # Don't go deeper than a Subjects node + last; + } + else + { + # Add the current node to the temporary list, if it's not already included in either list + push @genre_list, $ancestor->{Name} + if ((!GCUtils::inArrayTest($ancestor->{Name}, @genre_list)) && + (!GCUtils::inArrayTest($ancestor->{Name}, @{$self->{curInfo}->{genre}}))); + } + } + + if ($genre ne 'ignore') + { + # Add temporary list to item info + push @{$self->{curInfo}->{genre}}, [$_] foreach @genre_list; + } + } + + # Let's sort the list for good measure + @{$self->{curInfo}->{genre}} = sort @{$self->{curInfo}->{genre}}; + + + # Fetch either the big original pic, or just the small thumbnail pic + if ($self->{bigPics}) + { + $self->{curInfo}->{cover} = $xml->{Items}->{Item}->{LargeImage}->{URL}; + } + else + { + $self->{curInfo}->{cover} = $xml->{Items}->{Item}->{SmallImage}->{URL}; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 1, + edition => 1, + }; + + return $self; + } + + sub getItemUrl + { + my ($self, $url) = @_; + if (!$url) + { + # If we're not passed a url, return a hint so that gcstar knows what type + # of addresses this plugin handles + $url = "http://".$self->baseWWWamazonUrl(); + } + elsif ($url !~ m/sowacs.appspot.com/) + { + # Convert amazon url to aws url + $url =~ /\/dp\/(\w*)[\/|%3F]/; + my $asinid = $1; + $url = $self->baseAWSUrl."&Operation=ItemLookup&ResponseGroup=Large,EditorialReview&ItemId=".$asinid; + } + return $url; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub decodeEntitiesWanted + { + return 0; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + my $key = + ($self->{searchField} eq 'authors') ? 'Author' : + ($self->{searchField} eq 'title') ? 'Title' : + ($self->{searchField} eq 'isbn') ? 'Keywords' : + ''; + $word =~ s/\D//g + if $key eq 'Keywords'; + return $self->baseAWSUrl."&Operation=ItemSearch&$key=$word&SearchIndex=Books&ResponseGroup=Medium"; + } + + sub baseAWSUrl + { + my $self = shift; + return "http://sowacs.appspot.com/AWS/%5Bamazon\@gcstar.org%5D".$self->baseAmazonUrl()."/onca/xml?Service=AWSECommerceService&AWSAccessKeyId=AKIAJJ5TJWI62A5OOTQQ&AssociateTag=AKIAJJ5TJWI62A5OOTQQ"; + } + + sub baseAmazonUrl + { + return "ecs.amazonaws.com"; + } + + sub baseWWWamazonUrl + { + return "www.amazon.com"; + } + + sub changeUrl + { + my ($self, $url) = @_; + # Make sure the url is for the api, not the main movie page + return $self->getItemUrl($url); + } + + sub getName + { + return "Amazon (US)"; + } + + sub getAuthor + { + return 'Zombiepig'; + } + + sub getLang + { + return 'EN'; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "utf8"; + } + + sub convertCharset + { + my ($self, $value) = @_; + return $value; + } + + sub getNotConverted + { + my $self = shift; + return []; + } + + sub isPreferred + { + return 1; + } + + sub getSearchFieldsArray + { + return ['title', 'authors', 'isbn']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAmazonCA.pm b/lib/gcstar/GCPlugins/GCbooks/GCAmazonCA.pm new file mode 100644 index 0000000..eb51a4c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAmazonCA.pm @@ -0,0 +1,61 @@ +package GCPlugins::GCbooks::GCAmazonCA; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCAmazon; + +{ + package GCPlugins::GCbooks::GCPluginAmazonCA; + + use base qw(GCPlugins::GCbooks::GCPluginAmazon); + + sub baseWWWamazonUrl + { + return "www.amazon.ca"; + } + + sub baseAmazonUrl + { + return "ecs.amazonaws.ca"; + } + + sub getName + { + return "Amazon (CA)"; + } + + sub getLang + { + return 'EN'; + } + + sub isPreferred + { + return 0; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAmazonDE.pm b/lib/gcstar/GCPlugins/GCbooks/GCAmazonDE.pm new file mode 100644 index 0000000..0c87502 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAmazonDE.pm @@ -0,0 +1,56 @@ +package GCPlugins::GCbooks::GCAmazonDE; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCAmazon; + +{ + package GCPlugins::GCbooks::GCPluginAmazonDE; + + use base qw(GCPlugins::GCbooks::GCPluginAmazon); + + sub baseWWWamazonUrl + { + return "www.amazon.de"; + } + + sub baseAmazonUrl + { + return "ecs.amazonaws.de"; + } + + sub getName + { + return "Amazon (DE)"; + } + + sub getLang + { + return 'DE'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAmazonFR.pm b/lib/gcstar/GCPlugins/GCbooks/GCAmazonFR.pm new file mode 100644 index 0000000..d87af48 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAmazonFR.pm @@ -0,0 +1,57 @@ +package GCPlugins::GCbooks::GCAmazonFR; + +################################################### +# +# Copyright 2005-2009 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCAmazon; + +{ + package GCPlugins::GCbooks::GCPluginAmazonFR; + + use base qw(GCPlugins::GCbooks::GCPluginAmazon); + + sub baseWWWamazonUrl + { + return "www.amazon.fr"; + } + + sub baseAmazonUrl + { + return "ecs.amazonaws.fr"; + } + + sub getName + { + return "Amazon (FR)"; + } + + sub getLang + { + return 'FR'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAmazonUK.pm b/lib/gcstar/GCPlugins/GCbooks/GCAmazonUK.pm new file mode 100644 index 0000000..e39a2de --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAmazonUK.pm @@ -0,0 +1,61 @@ +package GCPlugins::GCbooks::GCAmazonUK; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCAmazon; + +{ + package GCPlugins::GCbooks::GCPluginAmazonUK; + + use base qw(GCPlugins::GCbooks::GCPluginAmazon); + + sub baseWWWamazonUrl + { + return "www.amazon.co.uk"; + } + + sub baseAmazonUrl + { + return "ecs.amazonaws.co.uk"; + } + + sub getName + { + return "Amazon (UK)"; + } + + sub getLang + { + return 'EN'; + } + + sub isPreferred + { + return 0; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCBDGest.pm b/lib/gcstar/GCPlugins/GCbooks/GCBDGest.pm new file mode 100644 index 0000000..36074aa --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCBDGest.pm @@ -0,0 +1,477 @@ +package GCPlugins::GCbooks::GCBDGest; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginBDGest; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + # tableau pour stocker l'identifiant propre à bdgest + my @tableau; + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + # parse une liste de résultat + if ($self->{parsingList}) + { + if (($tagname eq 'tr')) + { + $self->{isBook} = 1 ; + $self->{isUrl} = 1 ; + } + elsif (($tagname eq 'a') && ($self->{isUrl}) && ($self->{isBook}) && (index($attr->{href},"serie-") >= 0)) + { + $self->{itemIdx}++; + $self->{isFound} = 1 ; + $tableau[$self->{itemIdx}] = substr($attr->{href},index( $attr->{href},"#")+1); + #on retravaille l'url pour avoir toutes les pages de la série + my $urlRecherche = substr($attr->{href},0,index($attr->{href},"."))."__10000".substr($attr->{href},index($attr->{href},".")); + $self->{itemsList}[$self->{itemIdx}]->{url} = $urlRecherche; + $self->{isSerie} = 1 ; + $self->{isUrl} = 0 ; + } + elsif (($tagname eq 'a') && ($attr->{name} eq 'TitreAlbum')&& ($self->{isBook}) && ($attr->{title} ne '')) + { + $self->{isTitle} = 1 ; + $self->{itemsList}[$self->{itemIdx}]->{title} = $attr->{title}; + } + elsif (($tagname eq 'td') && $self->{isTitle} eq 1) + { + $self->{isPublisher} = 1 ; + $self->{isTitle} = 0; + } + elsif (($tagname eq 'td') && $self->{isPublisher} eq 2) + { + $self->{isPublication} = 1 ; + $self->{isPublisher} = 0; + } + } + else # parse un item + { + if (($tagname eq 'a') && ($attr->{name} eq $tableau[$self->{wantedIdx}])) + { + $self->{isTitle} = 1 ; + $self->{isCover} = 1; + $self->{isBook} = 1 ; + } + elsif ($tagname eq 'html') + { + $self->{isCover} = 0 ; + } + elsif ($tagname eq 'head') + { + $self->{isCover} = 0 ; + } + elsif (($tagname eq 'a') && ($attr->{name} ne $tableau[$self->{wantedIdx}]) && ($attr->{name} ne '')) + { + $self->{isBook} = 0 ; + } + elsif (($tagname eq 'font') && ($attr->{color} eq '#294A6B') && ($attr->{style} eq 'font-family:Trebuchet MS; FONT-SIZE: 11pt;') && ($self->{isTitle} eq 1)) + { + $self->{isTitle} = 2 ; + } +# elsif (($tagname eq 'a') && ($self->{isCover} eq 0) && (index($attr->{href},"Couvertures") >= 0)) + elsif (($tagname eq 'a') && ($self->{isCover} eq 0)) + { + my $urlimage = $attr->{href}; + $urlimage =~ s/\'//g; + $urlimage =~ s/\)//g; + $urlimage = substr($urlimage,index($urlimage,"Couvertures/")); + $self->{curInfo}->{cover} = 'http://www.bedetheque.com/'.$urlimage; + } + elsif (($tagname eq 'a') && ($self->{isBook}) && (index($attr->{href},"auteur") >= 0)) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'td') && ($self->{isPublisher} eq 1)) + { + $self->{isPublisher} = 2 ; + } + elsif (($tagname eq 'td') && $self->{isPublication} eq 1) + { + $self->{isPublication} = 2 ; + } + elsif (($tagname eq 'td') && $self->{isEdition} eq 1) + { + $self->{isEdition} = 2 ; + } + elsif (($tagname eq 'td') && $self->{isFormatPublication} eq 1) + { + $self->{isFormatPublication} = 2 ; + } + elsif (($tagname eq 'td') && $self->{isISBN} eq 1) + { + $self->{isISBN} = 2 ; + } + elsif (($tagname eq 'td') && $self->{isPage} eq 1) + { + $self->{isPage} = 2 ; + } + elsif (($tagname eq 'i') && $self->{isDescription} eq 1) + { + $self->{isDescription} = 2 ; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{isFound} = 0; + $self->{inside}->{$tagname}--; + + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isSerie}) + { + $self->{itemsList}[$self->{itemIdx}]->{serie} = $origtext; + $self->{isSerie} = 0 ; + } + if ($self->{isPublisher} eq 1) + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublisher} = 2 ; + } + if ($self->{isPublication} eq 1) + { + $self->{itemsList}[$self->{itemIdx}]->{publication} = $origtext; + $self->{isPublication} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Je reinitialise le champs s il est cense etre vide + $origtext =~ s/#TPFCHAMPSVIDE#//; + if ($self->{isTitle} eq 2) + { + # si le titre contient INT (cas intégrale et donc sans titre de la série) on concaténe la série au titre. + if($origtext =~ /INT/i) + { + # on enléve le préfixe INT ou int et le point + $origtext =~ s/INT//i; + $origtext =~ s/.//; + $self->{curInfo}->{title} = $self->{itemsList}[$self->{wantedIdx}]->{serie}." ".$origtext; + } + else + { + # si numéro avant titre on le transforme en tome et on concaténe avec le nom de la série. + if($origtext =~ /[0-9]./) + { + my $tome = substr($origtext,0,index($origtext,".")); + $tome =~ s/^\s+//; + my $titre = substr($origtext,index($origtext,".")+1); + $titre =~ s/^\s+//; + $self->{curInfo}->{title} = $self->{itemsList}[$self->{wantedIdx}]->{serie}." Tome ".$tome ." : ".$titre; + } + else + { + $self->{curInfo}->{title} = $origtext; + } + } + $self->{curInfo}->{web} = "http://www.bedetheque.com/".$self->{itemsList}[$self->{wantedIdx}]->{url}; + $self->{curInfo}->{serie} = $self->{itemsList}[$self->{wantedIdx}]->{serie}; + $self->{curInfo}->{language} = 'Français'; + $self->{isTitle} = 0 ; + } + elsif (($self->{isAuthor}) && ($self->{nbAuthor} < 3)) + { + # Enleve la virgule entre le nom et le prenom + $origtext =~ s/,//g; + if (($origtext ne '') && ($origtext ne '#TPF NOIR ET BLANC TPF#')) + { + $self->{curInfo}->{authors} .= $origtext; + $self->{curInfo}->{authors} .= ","; + } + $self->{isAuthor} = 0; + $self->{nbAuthor} = $self->{nbAuthor} + 1; + } + elsif ($self->{isPublisher} eq 2) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 3 ; + } + elsif ($self->{isPublication} eq 2) + { + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 3 ; + } + elsif ($self->{isEdition} eq 2) + { + $self->{curInfo}->{edition} = $origtext; + $self->{isEdition} = 3 ; + } + elsif ($self->{isFormatPublication} eq 2 ) + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormatPublication} = 3 ; + } + elsif ($self->{isISBN} eq 2) + { + $origtext =~ s/978\-//; + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 3 ; + } + elsif ($self->{isPage} eq 2) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 3 ; + } + elsif ($self->{isDescription} eq 2) + { + if($origtext ne '') + { + if($self->{curInfo}->{description} ne '') + { + $self->{curInfo}->{description} .= "\n\n"; + } + $self->{curInfo}->{description} .= "Info sur cette edition : ".$origtext; + } + $self->{isDescription} = 3 ; + } + elsif ($self->{isBook} eq 1) + { + if (($origtext eq "Editeur :") && ($self->{isPublisher} ne 3)) + { + $self->{isPublisher} = 1; + } + elsif (($origtext eq "Dépot légal :") && ($self->{isPublication} ne 3)) + { + $self->{isPublication} = 1; + } + elsif (($origtext eq "Collection :") && ($self->{isEdition} ne 3)) + { + $self->{isEdition} = 1; + } + elsif (($origtext eq "Taille :") && ($self->{isFormatPublication} ne 3)) + { + $self->{isFormatPublication} = 1; + } + elsif (($origtext eq "ISBN :") && ($self->{isISBN} ne 3)) + { + $self->{isISBN} = 1; + } + elsif (($origtext eq "Planches :") && ($self->{isPage} ne 3)) + { + $self->{isPage} = 1; + } + elsif (($origtext eq "Info édition : ") && ($self->{isDescription} ne 3)) + { + $self->{isDescription} = 1; + } + } + } + } + + sub new + { + #la classe est instancié une seule fois au démarrage de l'appli. + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + serie => 1, + title => 1, + publication => 1, + format => 0, + edition => 1, + }; + $self->{idPage} = 0; + $self->{nbAuthor} = 0; + $self->{isFound} = 0; + $self->{isSerie} = 0; + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isFormatPublication} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isPublication} = 0; + $self->{isFormat} = 0; + $self->{isSerie} = 0; + $self->{isPage} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + #RAZ des variables (entre 2 recherches la classe reste en mémoire) + $self->{idPage} = 0; + $self->{nbAuthor} = 0; + $self->{isFound} = 0; + $self->{isSerie} = 0; + $self->{isEdition} = 0; + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isFormatPublication} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isPublication} = 0; + $self->{isFormat} = 0; + $self->{isSerie} = 0; + $self->{isPage} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + + if ($self->{parsingList}) + { + } + else + { + $html =~ s|||gi; + $html =~ s|

  • |\n* |gi; + $html =~ s|
    |\n|gi; + $html =~ s|
    |\n|gi; + $html =~ s|||gi; + $html =~ s|||gi; +# $html =~ s|||gi; +# $html =~ s|||gi; + $html =~ s|

    |\n|gi; + $html =~ s|

    ||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + $html =~ s|…|...|gi; + $html =~ s|\x{8C}|OE|gi; + $html =~ s|\x{9C}|oe|gi; + + # Quand un champs n est pas renseigne il peut y avoir un souci + $html =~ s||#TPFCHAMPSVIDE#|gi; + + $html =~ s|||gi; + $html =~ s|||gi; + # Ce n est pas un coloriste donc il ne faut pas le rajouter a la liste des auteurs + $html =~ s|<N&B>|#TPF NOIR ET BLANC TPF#|gi; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + # si isbn renseigné alors url de recherche différent + if((length($word) eq 13 || length($word) eq 10) && ($word =~ /[0-9]/)) + { + # si contient pas de caractére - alors insertion de ceux ci pour recherche chez bdgest (ISBN sur 10 au lieu de 13) + if($word =~ /\-/) + { + $word =~ s/978\-//; + return "http://www.bedetheque.com/index.php?R=1&RechISBN=". $word; + } + else + { + # Ajouts des - et enléve le 978 en début si présent + $word =~ s/978//; + + #re calcul de la clé de vérification + my $total = substr($word,0,1)*10; + $total = $total + (substr($word,1,1)*9); + $total = $total + (substr($word,2,1)*8); + $total = $total + (substr($word,3,1)*7); + $total = $total + (substr($word,4,1)*6); + $total = $total + (substr($word,5,1)*5); + $total = $total + (substr($word,6,1)*4); + $total = $total + (substr($word,7,1)*3); + $total = $total + (substr($word,8,1)*2); + $total = 11 - ($total%11); + + if($total eq 10) + { + $total = 'X'; + } + + $word = substr($word,0,1)."-".substr($word,1,2)."%25-%25".substr($word,7,2)."-".$total; + return "http://www.bedetheque.com/index.php?R=1&RechISBN=". $word; + } + } + else + { + return "http://www.bedetheque.com/index.php?R=1&RechSerie=". $word; + } + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.bedetheque.com/" . $url; + } + + sub getName + { + return "BDGest"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-1"; + } + + sub getAuthor + { + return 'Rataflo'; + } + + sub getLang + { + return 'FR'; + } + + sub getSearchFieldsArray + { + return ['isbn','title']; + } + + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCBibliotekaNarodowa.pm b/lib/gcstar/GCPlugins/GCbooks/GCBibliotekaNarodowa.pm new file mode 100644 index 0000000..927be0b --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCBibliotekaNarodowa.pm @@ -0,0 +1,374 @@ +package GCPlugins::GCbooks::GCbooksBibliotekaNarodowa; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +my $searchISBN = ""; + +{ + package GCPlugins::GCbooks::GCPluginBibliotekaNarodowa; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'frameset') #od razu mamy wynik + { + $self->{isBook} = 7; + $self->{itemIdx}++; + } + if (($tagname eq 'frame') && ($attr->{name} eq 'bib_frame') && $self->{isBook} == 7) #od razu mamy wynik + { + $self->{isUrl} = 1; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://alpha.bn.org.pl".$attr->{src}; + $self->{isUrl} = 0; + $self->{isBook} = 0; + } + + if (($tagname eq 'tr') && ($attr->{class} eq 'browseEntry')) + { + $self->{isBook} = 1; + $self->{itemIdx}++; + } + if (($tagname eq 'td') && ($attr->{class} eq 'browseEntryData') && ($self->{isBook} == 1)) + { + $self->{isAuthor} = 2; + } + if (($tagname eq 'a') && ($self->{isBook} == 1) && ($self->{isAuthor} > 0)) + { + $self->{isUrl} = 1; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://alpha.bn.org.pl".$attr->{href}; + $self->{itemsList}[$self->{itemIdx}]->{url} =~ s|frameset|bibframe|; + $self->{isUrl} = 0; + $self->{isAuthor} = 0; + $self->{isTitle} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'browseEntryYear') && ($self->{isBook} == 1)) + { + $self->{isPublication} = 1; + } + } + else + { + if (($tagname eq 'div') && ($attr->{id} eq 'wrgTIAUTR')) + { + $self->{isTitle} = 1; + $self->{isAuthor} = 1; + $self->{isTranslator} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgISBN')) + { + $self->{isISBN} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPAGES')) + { + $self->{isPage} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPUBPD')) + { + $self->{isPublisher} = 1; + $self->{isPublication} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgSERIA')) + { + $self->{isSerie} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgEDITI')) + { + $self->{isEdition} = 1; + } + } + } + + + sub end + { + my ($self, $tagname) = @_; + + if ($self->{parsingList}) + { + if (($tagname eq 'tr') && ($self->{isBook} == 1)) + { + $self->{isBook} = 0; + } + } + + $self->{isFound} = 0; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + $origtext =~ s|^\s*||m; + $origtext =~ s|\s*$||m; + + if ($self->{parsingList}) + { + if ($self->{isBook} == 1) + { + if ($self->{isTitle} == 1) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + } + if ($self->{isAuthor} > 0) + { + $origtext =~ s|\s*\/\s*(.*)\s*;|$1|; + $self->{itemsList}[$self->{itemIdx}]->{authors} = $1; + $self->{isAuthor} = 1; + } + if ($self->{isPublication} == 1) + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublication} = 0; + } + } + + } + else + { + if (($self->{isTitle} == 1) && ($self->{isAuthor} == 1) && ($self->{isTranslator} == 1)) + { + my ($ti, $au, $tr, $bubu); + $origtext =~ m|.*(\.){1}$|; + $bubu = $1; + if ($bubu eq '.') + { + $origtext =~ s|(.*)\.$|$1|; + } + $origtext =~ m/([^\/]+)(\/\s+[^;]*)?(;\s*.*(tł|przeł|przekł)\..*)?$/; +# $origtext =~ m|([^/]+)(/\s[^;]+)?(;.*)?$|; + $ti = $1; + $au = $2; + $tr = $3; + $ti =~ s|([^:]*):?.*$|$1|; + $ti =~ s|\s*$||; + $self->{curInfo}->{title} = $ti; + $self->{isTitle} = 0; + $au =~ s|^(.*)il\..*$|$1|; + $au =~ s/(\/|tekst)//g; + $au =~ s| i |,|g; + $au =~ s|, |,|g; + $au =~ s|^\s*||; + $au =~ s|\s*$||; + $self->{curInfo}->{authors} = $au; + $self->{isAuthor} = 0; + $tr =~ s|[\[\]]||g; + $tr =~ s/;\s*.*(tł|przeł|przekł)\. (\[.*\] )?(.*)\.?$/$3/; + $tr =~ s|(z \w+\. )?(.*)|$2|; + $self->{curInfo}->{translator} = $tr; + $self->{isTranslator} = 0; + } + if (($self->{isPublisher} == 1) && ($self->{isPublication} == 1)) + { + my ($pu, $pd); + $origtext =~ m|(.*)\s:\s(.*),\s(.*)|; + $pu = $2; + $pd = $3; + $pu =~ s|([^"]*")?([^"]*)"?|$2|; + $pu =~ s|[\[\]]||g; + $self->{curInfo}->{publisher} = $pu; + $self->{isPublisher} = 0; + $pd =~ s|[^\d]||g; + $self->{curInfo}->{publication} = $pd; + $self->{isPublication} = 0; + } + if ($self->{isISBN} eq '1') + { + my ($pom1, $pom2); + if ($self->{searchField} eq 'isbn') + { + $pom1 = $self->{searchISBN}; + $pom2 = $origtext; + $pom2 =~ s|[^\dX]||g; + $pom1 =~ s|-||g; + $pom2 =~ s|-||g; + if ($pom1 eq $pom2) + { + $self->{curInfo}->{isbn} = $self->{searchISBN}; + } + else + { + $self->{curInfo}->{isbn} = $origtext; + } + } + else + { + $self->{curInfo}->{isbn} = $origtext; + } + $self->{isISBN} = 0; + } + if ($self->{isPage} eq '1') + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + if ($self->{isEdition} eq '1') + { + $origtext =~ s|[\[\]]||g; + $origtext =~ s|(.*)\.{1}$|$1|; + $self->{curInfo}->{edition} = $origtext; + $self->{isEdition} = 0; + } + if ($self->{isSerie} eq '1') + { + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 1, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isEditor_Publication_Format_Lang} = 0 ; + $self->{isAnalyse} = 0; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isEdition} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublication} = 0; + $self->{isSerie} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + $self->{isTranslator} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{insideResults} = 0; + + if ($self->{parsingList}) + { + $html =~ s|(.*?)|$1|gms; + $html =~ s|\s*\s*(.*)\s*|$2|gm; + } + else + { + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|||gi; + + $html =~ s|ISBN\s*<.*>\s*(\w*)|
    $1
    |m; + $html =~ s|Seria\s*<.*>\s*(.*)\s*|
    $1
    |m; + $html =~ s|
    (.*)( / [^<]*)
    |
    $1
    |; + $html =~ s|Opis fiz\s*<.*>\s*(\d*)\D.*|
    $1
    |m; + $html =~ s|TytuŁ\s*<.*>\s*(.*)\s*|
    $1
    |m; + $html =~ s|Adres wyd\s*<.*>\s*(.*)\s*|
    $1
    |m; + $html =~ s|Wydanie\s*<.*>\s*(.*)\s*|
    $1
    |m; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + my $bubu; + if ($self->{searchField} eq 'isbn') + { + $bubu = "i"; + $self->{searchISBN} = $word; + } + else + { + $bubu = "t"; + $self->{searchISBN} = ""; + } + return "http://alpha.bn.org.pl/search*pol/".$bubu."?SEARCH=".$word; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return 'http://alpha.bn.org.pl' + } + + sub getName + { + return "Biblioteka Narodowa"; + } + + sub getCharset + { + my $self = shift; + #return "UTF-8"; + return "ISO-8859-2"; + } + + sub getAuthor + { + return 'WG'; + } + + sub getLang + { + return 'PL'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCBokkilden.pm b/lib/gcstar/GCPlugins/GCbooks/GCBokkilden.pm new file mode 100644 index 0000000..d32c1f4 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCBokkilden.pm @@ -0,0 +1,295 @@ +package GCPlugins::GCbooks::GCBokkilden; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginBokkilden; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + if ($self->{itemIdx} < 0) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + return; + } + + if ($self->{parsingList}) + { + if (($tagname eq 'h1') && ($attr->{class} eq 'normal')) + { + $self->{isBook} = 1; + $self->{itemIdx}++; + } + elsif ($self->{isBook}) + { + if ($tagname eq 'a') + { + if (($attr->{href} =~ /produkt\.do/) + && (!$self->{itemsList}[$self->{itemIdx}]->{title})) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{isTitle} = 1; + } + elsif ($attr->{href} =~ /sok\.do\?enkeltsok/) + { + $self->{isAuthor} = 1; + } + } + } + } + else + { + if ($tagname eq 'table') + { + $self->{isBook} = 1 + if ($attr->{class} eq 'bokfaktatabell'); + } + elsif ($tagname eq 'div') + { + $self->{isCover} = 1 if ($attr->{class} eq 'img-ilus') + && ($attr->{style} eq 'width:120px;'); + $self->{is} = 'description' if $attr->{id} eq 'omtale-hidden'; + } + elsif ($tagname eq 'img') + { + if ($self->{isCover}) + { + $self->{curInfo}->{cover} = 'http://www.bokkilden.no/SamboWeb/' + . $attr->{src}; + $self->{isCover} = 0; + } + } + elsif ($tagname eq 'h1') + { + $self->{h1Style} = $attr->{style}; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{inside}->{title}) + { + $self->{parsingEnded} = 1 if $origtext !~ /S..?k p..?/; + } + + elsif ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ($self->{isAuthor}) + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ',' + if $self->{itemsList}[$self->{itemIdx}]->{authors}; + $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext; + $self->{isAuthor} = 0; + } + elsif ($self->{isBook}) + { + if ($origtext =~ / \| /) + { + $origtext =~ /(\d{4})/; + $self->{itemsList}[$self->{itemIdx}]->{publication} = $1; + $self->{isBook} = 0; + } + } + } + else + { + if ($self->{is}) + { + $origtext =~ s/^\s*//; + $self->{curInfo}->{$self->{is}} = $origtext; + if ($self->{is} eq 'genre') + { + $self->{curInfo}->{genre} =~ s/;\s*/,/g; + } + elsif ($self->{is} eq 'pages') + { + $self->{curInfo}->{pages} =~ s/[^0-9]//g; + } + $self->{is} = ''; + } + elsif ($self->{inside}->{title}) + { + $self->{tmpTitle} = $origtext; + } + elsif ($self->{inside}->{h1}) + { + if (!$self->{curInfo}->{title}) + { + if ($self->{h1Style}) + { + $self->{tmpTitle} =~ /\s*(.*?) av (.*?) »/gim; + $self->{curInfo}->{title} = $1; + $self->{curInfo}->{authors} = $2; + } + else + { + $self->{curInfo}->{title} = $origtext; + } + } + } + elsif ($self->{inside}->{author}) + { + $self->{curInfo}->{authors} .= ',' + if $self->{curInfo}->{authors}; + $self->{curInfo}->{authors} .= $origtext; + } + if ($self->{inside}->{translator}) + { + $self->{curInfo}->{translator} .= ', ' + if $self->{curInfo}->{translator}; + $self->{curInfo}->{translator} .= $origtext; + } + elsif (($self->{isBook}) && $self->{inside}->{b}) + { + $self->{is} = + ($origtext eq 'Utgitt: ') ? 'publication' + : ($origtext eq 'Forlag: ') ? 'publisher' + : ($origtext eq 'Innb.: ') ? 'format' + : ($origtext =~ /Spr..?k:/) ? 'language' + : ($origtext eq 'Sider: ') ? 'pages' + : ($origtext eq 'ISBN: ') ? 'isbn' + : ($origtext eq 'Utgave: ') ? 'edition' + : ($origtext eq 'Genre:') ? 'genre' + : ''; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 0, + edition => 0, + }; + + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{isBook} = 0; + if ($self->{parsingList}) + { + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + } + else + { + $self->{is} = ''; + $self->{isCover} = 0; + $html =~ s|(.*?)|$1|gim; + $html =~ s|([^<]*)|$1|gim; + #" + $html =~ s|(.*?)|$1|gim; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.bokkilden.no/SamboWeb/sok.do?rom=MP&enkeltsok=$word&innsnevre=ja"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return "http://www.bokkilden.no/SamboWeb/$url" + if $url !~ m|http://www.bokkilden.no/|; + return $url; + } + + sub getCharset + { + my $self = shift; + + return 'UTF-8'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + + sub getName + { + return 'Bokkilden'; + } + + sub getLang + { + return 'NO'; + } + + sub getAuthor + { + return 'Tian'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCBol.pm b/lib/gcstar/GCPlugins/GCbooks/GCBol.pm new file mode 100644 index 0000000..6e882b1 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCBol.pm @@ -0,0 +1,485 @@ +package GCPlugins::GCbooks::GCBol; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginBol; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'td') + { + if($self->{bookStep} == 0) + { + $self->{bookStep} = 1 ; + } + } + elsif ($tagname eq 'img') + { + if($self->{bookStep} == 1) + { + $self->{bookStep} = 2; + } + } + elsif ($tagname eq 'a') + { + if($self->{bookStep}==2) + { + $self->{url} = "http://www.bol.it" . $attr->{href}; + $self->{bookStep} = 3 ; + $self->{isTitle} = 1 ; + } + elsif($self->{bookStep}==3) + { + $self->{bookStep} = 4 ; + $self->{isAuthor} = 1 ; + } + } + elsif ($tagname eq 'br') + { + if($self->{bookStep}==4) + { + $self->{isBook} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{url}; + $self->{itemsList}[$self->{itemIdx}]->{title} = $self->{title}; + $self->{itemsList}[$self->{itemIdx}]->{authors} = $self->{author}; + + $self->{isFormat} = 1 ; + #$self->{bookStep} = 0 ; + } + } + elsif ( + (($tagname ne 'h3') || ( ($tagname eq 'h3') && ($self->{bookStep} != 2) )) && + (($tagname ne 'p') || ( ($tagname eq 'p') && ($self->{bookStep} != 3) )) && + (($tagname ne 'span') || ( ($tagname eq 'span') && ($self->{bookStep} != 4) )) + ) + { + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isDescription} = 0; + $self->{isTranslator} = 0; + $self->{isCover} = 0; + $self->{isGenre} = 0; + $self->{isFormat} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + $self->{isISBN} = 0; + + $self->{isBook} = 0; + $self->{bookStep} = 0; + } + } + else + { + if (($tagname eq 'img') && ($attr->{class} eq 'cover')) + { + $self->{curInfo}->{cover} = "http://www.bol.it" . $attr->{src}; + $self->{bookStep} = 1; + } + elsif (($tagname eq 'h1') && ($self->{bookStep} == 1)) + { + $self->{curInfo}->{title} = "http://www.bol.it" . $attr->{src}; + $self->{isTitle} = 1; + $self->{bookStep} = 2; + } + elsif ($self->{bookStep} == 2) + { + if (($tagname eq 'a') && ($self->{areAuthors} == 0)) + { + $self->{isAuthor} = 1; + $self->{areAuthors} = 1; + } + if ($self->{areAuthors} == 1) + { + if ($tagname eq 'a') + { + $self->{isAuthor} = 1; + } + else + { + $self->{bookStep} = 3; + $self->{areAuthors} = 0; + } + } + } + elsif ($self->{bookStep} == 4) + { + if (($tagname eq 'a') && ($self->{areGenres} == 0)) + { + $self->{isGenre} = 1; + $self->{areGenres} = 1; + } + if ($self->{areGenres} == 1) + { + if ($tagname eq 'a') + { + $self->{isGenre} = 1; + } + else + { + $self->{bookStep} = 5; + $self->{areGenres} = 0; + } + } + } + elsif ($self->{bookStep} == 6) + { + if (($tagname eq 'a') && ($self->{areTranslators} == 0)) + { + $self->{isTranslator} = 1; + $self->{areTranslators} = 1; + } + if ($self->{areTranslators} == 1) + { + if ($tagname eq 'a') + { + $self->{isTranslator} = 1; + } + else + { + $self->{bookStep} = 6; + $self->{areTranslators} = 0; + } + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ($self->{isAuthor}) + { + $self->{author} = $origtext; + $self->{isAuthor} = 0; + } + elsif ($self->{isFormat}) + { + my @array = split(/\|/,$origtext); + + $self->{itemsList}[$self->{itemIdx}]->{format} = $array[0]; + $self->{itemsList}[$self->{itemIdx}]->{format} =~ s/^\s+//; + $self->{isFormat} = 0; + $self->{isPublisher} = 1; + } + elsif ($self->{isPublisher}) + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublisher} = 0; + $self->{isPublication} = 1; + } + elsif ($self->{isPublication}) + { + my @array = split(/\|/,$origtext); + + $self->{itemsList}[$self->{itemIdx}]->{publication} = $array[1]; + $self->{itemsList}[$self->{itemIdx}]->{publication} =~ s/^\s+//; + $self->{isPublication} = 0; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + + if ($origtext eq 'I contenuti') + { + $self->{isDescription} = 1; + } + elsif ($origtext eq 'Formato:') + { + $self->{isFormat} = 1; + } + elsif (substr($origtext,0,7) eq 'Pagine:') + { + $self->{isPage} = 1; + } + elsif ($origtext eq 'Lingua:') + { + $self->{isLanguage} = 1; + } + elsif ($origtext eq 'Editore:') + { + $self->{isPublisher} = 1; + } + elsif ($origtext eq 'Anno di pubblicazione') + { + $self->{isPublication} = 1; + } + elsif ($origtext eq 'Codice EAN:') + { + $self->{isISBN} = 1; + } + elsif (($origtext eq 'Traduttore:') || ($origtext eq 'Traduttori:')) + { + $self->{bookStep} = 6; + } + elsif ($origtext eq 'Generi:') + { + $self->{bookStep} = 4; + } + elsif ($origtext ne '') + { + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + if ($self->{curInfo}->{authors} eq '') + { + $self->{curInfo}->{authors} = $origtext; + } + else + { + $self->{curInfo}->{authors} .= ", " . $origtext; + } + $self->{isAuthor} = 0 ; + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} = $origtext; + $self->{isDescription} = 0 ; + } + elsif ($self->{isFormat}) + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0; + } + elsif ($self->{isPage}) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + elsif ($self->{isLanguage}) + { + $self->{curInfo}->{language} = $origtext; + $self->{isLanguage} = 0; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0; + } + elsif ($self->{isPublication}) + { + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0; + } + elsif ($self->{isISBN}) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + elsif ($self->{isGenre}) + { + if ($self->{curInfo}->{genre} eq '') + { + $self->{curInfo}->{genre} = $origtext; + } + else + { + $self->{curInfo}->{genre} .= ", " . $origtext; + } + $self->{isGenre} = 0 ; + } + elsif ($self->{isTranslator}) + { + if ($self->{curInfo}->{translator} eq '') + { + $self->{curInfo}->{translator} = $origtext; + } + else + { + $self->{curInfo}->{translator} .= ", " . $origtext; + } + $self->{isTranslator} = 0 ; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 1, + edition => 1, + serie => 0, + }; + + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isDescription} = 0; + $self->{isTranslator} = 0; + $self->{isCover} = 0; + $self->{isGenre} = 0; + $self->{isFormat} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + $self->{isISBN} = 0; + $self->{areAuthors} = 0; + $self->{areGenres} = 0; + $self->{areTranslators} = 0; + + $self->{isBook} = 0; + $self->{bookStep} = 0; + $self->{title} = 0; + $self->{author} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|
    ||gi; + #$html =~ s/[\n\r\t]//g; + } + else + { + my $found = index($html,''); + if ( $found >= 0 ) + { + $html = substr($html, 0, $found); + } + + $html =~ s|||gi; + $html =~ s|
  • |\n* |gi; + #$html =~ s|
    |\n|gi; + #$html =~ s|
    |\n|gi; + #$html =~ s|||gi; + #$html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|

    |\n|gi; + $html =~ s|

    ||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + return ('http://www.bol.it/libri/ricerca', ["crc" => "100", "crcselect" => "100", "g" => "$word", "tpr" => "10"] ); + } + else + { + $word =~ s/\+/ /g; + return ('http://www.bol.it/libri/ricerca', ["crc" => "100", "crcselect" => "100", "g" => "$word", "tpr" => "10"] ); + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url; + return 'http://www.bol.it'; + } + + sub getName + { + return "Bol"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-15"; + } + + sub getAuthor + { + return 'TPF, UnclePetros'; + } + + sub getLang + { + return 'IT'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCBuscape.pm b/lib/gcstar/GCPlugins/GCbooks/GCBuscape.pm new file mode 100644 index 0000000..ad46177 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCBuscape.pm @@ -0,0 +1,479 @@ +package GCPlugins::GCbooks::GCBuscape; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginBuscape; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'a') && ($attr->{class} eq 'xu')) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{isTitle} = 1 ; + } + elsif (( $attr->{class} eq 'xj') && ($self->{itemIdx} eq '-1') && ($self->{searchField} eq 'isbn')) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl}; + } + elsif (($tagname eq 'meta') && ($self->{itemIdx} eq '-1') && ($self->{searchField} eq 'isbn')) + { + my $html = $self->loadPage($self->{loadedUrl}, 0, 1); + my $found = index($html,"URL="); + if ( $found >= 0 ) + { + $html = substr($html, $found +length('URL='),length($html)- $found -length('URL=')); + $html = substr($html, 0, index($html,"\"")); + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $html; + } + } + } + else + { + if (( $attr->{class} eq 'xj') && ($self->{isAnalyse} eq 0)) + { + $self->{isAnalyse} = 1 ; + } + elsif (($tagname eq 'img') && ($attr->{onerror} ne '') && ($self->{curInfo}->{title} eq '')) + { + # Attention il y a 2 formats differents pour ce site + if ($attr->{alt} ne '') + { + $self->{curInfo}->{title} = $attr->{alt}; + } + if ($attr->{title} ne '') + { + my @array = split(/\(/,reverse($attr->{title})); + my @array2; + if ($array[1] ne '') + { + $self->{curInfo}->{isbn} = reverse($array[0]); + $self->{curInfo}->{isbn} =~ s/\)//; + # J enleve le premier champs qui est sense etre le code ISBN + shift(@array); + my $element1; + my $element2; + foreach $element1 (@array) + { + if ($element2 eq '') + { + $element2 = $element1; + } + else + { + $element2 .= "(" .$element1; + } + } + @array2 = split(/-/,$element2); + } + else + { + @array2 = split(/-/,$array[0]); + } + + if ($array2[1] ne '') + { + # J enleve le dernier champs qui est l auteur + shift(@array2); + } + my $element; + foreach $element (@array2) + { + if ($self->{curInfo}->{title} eq '') + { + $self->{curInfo}->{title} = $element; + } + else + { + $self->{curInfo}->{title} .= "-" .$element; + } + } + $self->{curInfo}->{title} = reverse($self->{curInfo}->{title}); + } + + $self->{curInfo}->{cover} = $attr->{src}; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + my @array = split(/\(/,reverse($origtext)); + my @array2; + if ($array[1] ne '') + { + # J enleve le premier champs qui est sense etre le code ISBN + shift(@array); + my $element1; + my $element2; + foreach $element1 (@array) + { + if ($element2 eq '') + { + $element2 = $element1; + } + else + { + $element2 .= "(" .$element1; + } + } + @array2 = split(/-/,$element2); + } + else + { + @array2 = split(/-/,$array[0]); + } + + if ($array2[1] ne '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = reverse($array2[0]); + my $found = index($self->{itemsList}[$self->{itemIdx}]->{authors}," Cod:"); + if ( $found >= 0 ) + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = substr($self->{itemsList}[$self->{itemIdx}]->{authors}, 0, $found); + } + # Enleve les blancs en debut de chaine + $self->{itemsList}[$self->{itemIdx}]->{authors} =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $self->{itemsList}[$self->{itemIdx}]->{authors} =~ s/\s+$//g; + shift(@array2); + } + my $element; + foreach $element (@array2) + { + if ($self->{itemsList}[$self->{itemIdx}]->{title} eq '') + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $element; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{title} .= "-" .$element; + } + } + $self->{itemsList}[$self->{itemIdx}]->{title} = reverse($self->{itemsList}[$self->{itemIdx}]->{title}); + $self->{isTitle} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if ($self->{isAnalyse} eq 1) + { + if ($origtext =~ m/Autor/i) + { + $self->{isAuthor} = 1 ; + $self->{isAnalyse} = 2 ; + } + elsif ($origtext =~ m/Editora/i) + { + $self->{isPublisher} = 1 ; + $self->{isAnalyse} = 2 ; + } + elsif ($origtext =~ m/Ano de edi/i) + { + $self->{isPublication} = 1 ; + $self->{isAnalyse} = 2 ; + } + elsif ($origtext =~ m/N.* de p.*ginas/i) + { + $self->{isPage} = 1 ; + $self->{isAnalyse} = 2 ; + } + elsif ($origtext =~ m/ISBN/i) + { + $self->{isISBN} = 1 ; + $self->{isAnalyse} = 2 ; + } + elsif ($origtext =~ m/Encaderna/i) + { + $self->{isFormat} = 1 ; + $self->{isAnalyse} = 2 ; + } + else + { + $self->{isAnalyse} = 0 ; + } + + } + elsif ($self->{isAuthor} eq 1) + { + $self->{isAuthor} = 2 ; + } + elsif ($self->{isAuthor} eq 2) + { + if ($origtext =~ m/N.*o Cadastrado/i) + { + } + else + { + my @nom_prenom = split(/,/,$origtext); + # Enleve les blancs en debut de chaine + $nom_prenom[0] =~ s/^\s//; + $nom_prenom[1] =~ s/^\s//; + # Enleve les blancs en fin de chaine + $nom_prenom[0] =~ s/\s+$//; + $nom_prenom[1] =~ s/\s+$//; + if ($self->{curInfo}->{authors} eq '') + { + if ($nom_prenom[1] ne '') + { + $self->{curInfo}->{authors} = $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{curInfo}->{authors} = $nom_prenom[0]; + } + } + else + { + if ($nom_prenom[1] ne '') + { + $self->{curInfo}->{authors} .= ", " . $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{curInfo}->{authors} .= ", " . $nom_prenom[0]; + } + } + } + + $self->{isAuthor} = 0 ; + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isISBN} eq 1) + { + $self->{isISBN} = 2 ; + } + elsif ($self->{isISBN} eq 2) + { + $self->{curInfo}->{isbn} = $origtext if ( !($origtext =~ m/N.*o Cadastrado/i) && !($origtext =~ m/n.*o dispon.*vel/i)); + $self->{isISBN} = 0 ; + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isPublisher} eq 1) + { + $self->{isPublisher} = 2 ; + } + elsif ($self->{isPublisher} eq 2) + { + $self->{curInfo}->{publisher} = $origtext if ( !($origtext =~ m/N.*o Cadastrado/i) && !($origtext =~ m/n.*o dispon.*vel/i)); + $self->{isPublisher} = 0 ; + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isPublication} eq 1) + { + $self->{isPublication} = 2 ; + } + elsif ($self->{isPublication} eq 2) + { + $self->{curInfo}->{publication} = $origtext if ( !($origtext =~ m/N.*o Cadastrado/i) && !($origtext =~ m/n.*o dispon.*vel/i)); + $self->{isPublication} = 0 ; + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isPage} eq 1) + { + $self->{isPage} = 2 ; + } + elsif ($self->{isPage} eq 2) + { + $self->{curInfo}->{pages} = $origtext if ( !($origtext =~ m/N.*o Cadastrado/i) && !($origtext =~ m/n.*o dispon.*vel/i)); + $self->{isPage} = 0 ; + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isFormat} eq 1) + { + $self->{isFormat} = 2 ; + } + elsif ($self->{isFormat} eq 2) + { + $self->{curInfo}->{format} = $origtext if ( !($origtext =~ m/N.*o Cadastrado/i) && !($origtext =~ m/n.*o dispon.*vel/i)); + $self->{isFormat} = 0 ; + $self->{isAnalyse} = 0 ; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 0, + serie => 0, + }; + + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + $self->{isPage} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|
    ||gi; + } + else + { + my $found = index($html,'
    '); + if ( $found >= 0 ) + { + $html = substr($html, 0, $found); + } + + $html =~ s|||gi; + $html =~ s|
  • |\n* |gi; + $html =~ s|
    |\n|gi; + $html =~ s|
    |\n|gi; + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|

    |\n|gi; + $html =~ s|

    ||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|

    |\n|gi; + $html =~ s|

    ||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + return "http://www.liberonweb.com/asp/libro.asp?ISBN=" . $word; + } + else + { + return "http://www.liberonweb.com/asp/lista.asp?D1=Titolo&T1=" . $word. "&I1=1"; + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return "LiberOnWeb"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-15"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'IT'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCMareno.pm b/lib/gcstar/GCPlugins/GCbooks/GCMareno.pm new file mode 100644 index 0000000..1afdc67 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCMareno.pm @@ -0,0 +1,365 @@ +package GCPlugins::GCbooks::GCbooksMareno; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +my $searchURL = ""; + +{ + package GCPlugins::GCbooks::GCPluginMareno; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'title') #od razu mamy wynik? + { + $self->{isBook} = 7; + } + + if (($tagname eq 'table') && ($attr->{class} eq 'bookData')) + { + $self->{itemIdx}++; + $self->{isBook} = 1; + } + if (($tagname eq 'a') && ($self->{isBook} == 1)) + { + $self->{isUrl} = 1; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.mareno.pl".$attr->{href}; + $self->{isUrl} = 0; + $self->{isTitle} = 1; + } + if (($tagname eq 'div') && ($attr->{class} eq 'bookAuthor') && ($self->{isBook} == 1)) + { + $self->{isAuthor} = 1; + $self->{isFormat} = 1; + $self->{isPublisher} = 1; + $self->{isPublication} = 1; + } + } + else + { + if (($tagname eq 'div') && ($attr->{id} eq 'wrgISBN')) + { + $self->{isISBN} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPAGES')) + { + $self->{isPage} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPUBLI')) + { + $self->{isPublisher} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPDATE')) + { + $self->{isPublication} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgFORMAT')) + { + $self->{isFormat} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgDESCR')) + { + $self->{isDescription} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgTITLE')) + { + $self->{isTitle} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgAUTOR')) + { + $self->{isAuthor} = 2; + } + if ($tagname eq 'a') + { + if ($self->{isAuthor} eq '1') + { + $self->{isAuthor} = 2; + } + elsif ($self->{isAuthor} eq '2') + { + $self->{isAuthor} = 1; + } + } + if (($tagname eq 'a') && ($attr->{href} =~ /okladki\/big/)) + { + $self->{isCover} = 1; + $self->{curInfo}->{cover} = "http://www.mareno.pl".$attr->{href}; + $self->{isCover} = 0; + } + } + } + + + sub end + { + my ($self, $tagname) = @_; + + if ($tagname eq 'table') + { + $self->{isBook} = 0; + } + if ($tagname eq 'div') + { + $self->{isAuthor} = 0; + } + + $self->{isFound} = 0; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isBook} == 7) #od razu mamy wynik? + { + $origtext =~ s|^\s*||gs; + $origtext =~ s|\s*$||gs; + if (($origtext ne '') && ($origtext !~ /wyszukiwanie/)) + { + $self->{isUrl} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $searchURL; + $self->{isUrl} = 0; + } + $self->{isBook} = 0; + } + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + if ($self->{isAuthor} == 1) + { + my ($au, $fo, $pu, $pd); + $origtext =~ m|(#\^#- [^#]+#\^#)?(okładka\s*[^,]+,\s*)?([^,]+,\s*)?(\d*)?|s; + $au = $1; + $fo = $2; + $pu = $3; + $pd = $4; + $au =~ s|#\^#- ([^#]+)#\^#|$1|g; + $self->{itemsList}[$self->{itemIdx}]->{authors} = $au; + $self->{isAuthor} = 0; + $fo =~ s|okładka\s*([^,]+),\s*|$1|g; + $self->{itemsList}[$self->{itemIdx}]->{format} = $fo; + $self->{isFormat} = 0; + $pu =~ s|([^,]+),\s*|$1|g; + $self->{itemsList}[$self->{itemIdx}]->{publisher} = $pu; + $self->{isPublisher} = 0; + $self->{itemsList}[$self->{itemIdx}]->{publication} = $pd; + $self->{isPublication} = 0; + } + if ($self->{isTitle} == 1) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + } + } + else + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + + if ($self->{isTitle} eq '1') + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0; + } + if ($self->{isAuthor} == 1) + { + $origtext =~ s|^\s*||; + $origtext =~ s|\s*$||; + if ($origtext ne '') + { + if ($self->{curInfo}->{authors} ne '') + { + $self->{curInfo}->{authors} .= ","; + } + $self->{curInfo}->{authors} .= $origtext; + } + $self->{isAuthor} = 2; + } + if ($self->{isFormat} == 1) + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0; + } + if ($self->{isDescription} == 1) + { + $self->{curInfo}->{description} = $origtext; + $self->{isDescription} = 0; + } + if ($self->{isISBN} eq '1') + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + if ($self->{isPage} eq '1') + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + if ($self->{isPublisher} eq '1') + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0; + } + if ($self->{isPublication} eq '1') + { + $origtext =~ s|(\S*)\s*(\S{4})|$2|; + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 1, + edition => 0, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isEditor_Publication_Format_Lang} = 0 ; + $self->{isAnalyse} = 0; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isEdition} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublication} = 0; + $self->{isSerie} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + $self->{isTranslator} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{insideResults} = 0; + $self->{actorsCounter} = 0; + + if ($self->{parsingList}) + { + $html =~ s/<\/?(b|strong)>//gi; + $html =~ s|]*>||gi; + $html =~ s|
    |#\^#|gi; + $html =~ s|\s*\s*|
    \s*
    |gs; + $html =~ s|||gs; + $html =~ s|||g; + } + else + { + $html =~ s/<\/?(i|br|strong)>//gi; + + $html =~ s|

    ([^<]*)

    |
    $1
    |s; + $html =~ s|

    |
    |s; + $html =~ s|\s*ISBN:\s*([\dX]*)\s*|
    $1
    |s; + $html =~ s|\s*okładka:\s*([^,]*),?\s*(\d*)[^<]*|
    $1
    $2
    |s; + $html =~ s|\s*wydawnictwo:\s*([^,]*),\s*(\d*)\s*|
    $1
    $2
    |s; + $html =~ s|opis produktu:\s*([^<]*)
    |
    $1

    |; +# $html =~ s|
    Seria:
    $*\s*
    (.*)
    |
    $1
    |; +# $html =~ s|
    Wydanie:
    (.*)
    |
    $1
    |; +# $html =~ s|
    Tłumaczenie:\s*
    $*\s*
    |
    |; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + $searchURL = "http://www.mareno.pl/rezultat.php?tytul=".$word; + return $searchURL; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return 'http://www.mareno.pl/'; + } + + sub getName + { + return "Mareno"; + } + + sub getCharset + { + my $self = shift; + #return "UTF-8"; + return "ISO-8859-2"; + } + + sub getAuthor + { + return 'WG'; + } + + sub getLang + { + return 'PL'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCMediabooks.pm b/lib/gcstar/GCPlugins/GCbooks/GCMediabooks.pm new file mode 100644 index 0000000..6b5f41b --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCMediabooks.pm @@ -0,0 +1,333 @@ +package GCPlugins::GCbooks::GCMediabooks; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginMediabooks; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + use Encode; + use HTML::Entities; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'font') && ($attr->{class} eq 'font4Copy')) + { + $self->{isBook} = 1 ; + $self->{isUrl} = 1 ; + $self->{isDescription} = 0 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m|/artigos/popUp_detalhe.jsp|i) && ($self->{isBook}) && ($self->{isUrl})) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + my $found = index($self->{itemsList}[$self->{itemIdx}]->{url},"'"); + if ( $found >= 0 ) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = substr($self->{itemsList}[$self->{itemIdx}]->{url}, $found +length("'"),length($self->{itemsList}[$self->{itemIdx}]->{url})- $found -length("'")); + $found = index($self->{itemsList}[$self->{itemIdx}]->{url},"'"); + if ( $found >= 0 ) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = substr($self->{itemsList}[$self->{itemIdx}]->{url}, 0, $found); + } + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.mediabooks.pt" .$self->{itemsList}[$self->{itemIdx}]->{url}; + } + + $self->{isTitle} = 1 ; + $self->{isUrl} = 0 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m|/autores/index.jsp|i) && ($self->{isBook})) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m|/editores/index.jsp|i) && ($self->{isBook})) + { + $self->{isPublisher} = 1 ; + } + elsif (($tagname eq 'input') && ($attr->{type} eq 'hidden')) + { + $self->{isBook} = 0 ; + } + } + else + { + if (($tagname eq 'a') && ($attr->{href} =~ m|/autores/index.jsp|i)) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m|/editores/index.jsp|i)) + { + $self->{isPublisher} = 1 ; + } + elsif ($self->{isISBN} eq 1) + { + $self->{isISBN} = 2 ; + } + elsif (($tagname eq 'span') && ($self->{isTitle})) + { + $self->{isTitle} = 2 ; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'font4Copy')) + { + $self->{isAnalyse} = 1 ; + } + elsif (($tagname eq 'img') && ($attr->{src} =~ m|/artigos/imagens/|i)) + { + if ($origtext =~ m|/artigos/imagens/livros|i) + { + } + else + { + $self->{curInfo}->{cover} = 'http://www.mediabooks.pt' .$attr->{src}; + } + + $self->{isTitle} = 1 ; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{isFound} = 0 ; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor} eq 1) + { + # Enleve les retours chariots + $origtext =~ s/\n//g; + $origtext =~ s/\r//g; + if (($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') && ($origtext ne '')) + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext; + } + elsif ($origtext ne '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ', '; + $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext; + } + $self->{isAuthor} = 0 ; + } + elsif ($self->{isPublisher}) + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublisher} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if ($self->{isTitle} eq '2') + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor} eq 1) + { + if ($origtext ne '') + { + $self->{curInfo}->{authors} .= $origtext; + $self->{curInfo}->{authors} .= ","; + } + $self->{isAuthor} = 0 ; + } + elsif ($self->{isAnalyse}) + { + $self->{isISBN} = 1 if ($origtext =~ m/ISBN/i); + $self->{isFormat} = 1 if ($origtext =~ m/Formato/i); + $self->{isDescription} = 1 if ($origtext =~ m/Breve Descri/i); + $self->{isPublication} = 1 if ($origtext =~ m/Ano de Edi/i); + $self->{isPage} = 1 if ($origtext =~ m/P.ginas/i); + + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isISBN} eq 2) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0 ; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isFormat}) + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0 ; + } + elsif ($self->{isPublication}) + { + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0 ; + } + elsif ($self->{isPage}) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0 ; + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} .= $origtext; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 1, + }; + + $self->{isFound} = 0; + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isFormatPublication} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isPublication} = 0; + $self->{isFormat} = 0; + $self->{isPage} = 0; + $self->{isDescription} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + } + else + { + $html =~ s|\n||gi; + $html =~ s|\r||gi; + $html =~ s|\t||gi; + + $html =~ s|
  • |\n* |gi; + $html =~ s|
    |\n|gi; + $html =~ s|
    |\n|gi; + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|

    |\n|gi; + $html =~ s|

    ||gi; + $html =~ s|||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + return ('http://www.mediabooks.pt/pesquisa/result_pesq.jsp', ["v_sec_id" => "1", "v_prev_sec_id" => "", "v_pes_id" => "2", "v_pesquisa" => "$word", "image.x" => "5", "image.y" => "7"] ); + } + else + { + return ('http://www.mediabooks.pt/pesquisa/result_pesq.jsp', ["v_sec_id" => "1", "v_prev_sec_id" => "", "v_pes_id" => "1", "v_pesquisa" => "$word", "image.x" => "5", "image.y" => "7"] ); + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url; + return 'http://www.mediabooks.pt/'; + } + + sub getName + { + return "Mediabooks"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'PT'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCMerlin.pm b/lib/gcstar/GCPlugins/GCbooks/GCMerlin.pm new file mode 100644 index 0000000..5c5129a --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCMerlin.pm @@ -0,0 +1,389 @@ +package GCPlugins::GCbooks::GCbooksMerlin; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginMerlin; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if (($tagname eq 'li') && ($attr->{class} eq 'tytul')) + { + $self->{isBook} = 1; + $self->{isUrl} = 1; + $self->{itemIdx}++; + } + if (($tagname eq 'li') && ($attr->{class} eq 'wydawca')) + { + $self->{isPublisher} = 1; + } + if (($tagname eq 'a') + && ($self->{isUrl} eq '1')) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.merlin.com.pl".$attr->{href}; + $self->{isUrl} = 0; + } + } + else + { + if (($tagname eq 'div') && ($attr->{id} eq 'wrgISBN')) + { + $self->{isISBN} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPAGES')) + { + $self->{isPage} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPUBLI')) + { + $self->{isPublisher} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPDATE')) + { + $self->{isPublication} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgSERIA')) + { + $self->{isSerie} = 2; + } + if (($tagname eq 'a') && ($self->{isSerie} eq '2')) + { + $self->{isSerie} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgEDITI')) + { + $self->{isEdition} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'prodHead')) + { + $self->{isCover} = 2; + $self->{isTitle} = 2; + $self->{isFormat} = 2; + } + if (($tagname eq 'h1') && ($attr->{class} eq 'prodTitle') && ($self->{isTitle} eq '2')) + { + $self->{isTitle} = 1; + } + if (($tagname eq 'h2') && ($attr->{class} eq 'prodPerson')) + { + $self->{isAuthor} = 2; + } + if ($tagname eq 'a') + { + if ($self->{isAuthor} eq '1') + { + $self->{isAuthor} = 2; + } + elsif ($self->{isAuthor} eq '2') + { + $self->{isAuthor} = 1; + } + } + if (($tagname eq 'dd') && ($attr->{id} eq 'wrgTRANS')) + { + $self->{isTranslator} = 2; + } + if ($tagname eq 'a') + { + if ($self->{isTranslator} eq '1') + { + $self->{isTranslator} = 2; + } + elsif ($self->{isTranslator} eq '2') + { + $self->{isTranslator} = 1; + } + } + if (($tagname eq 'div') && ($attr->{id} eq 'prodImg') && ($self->{isCover} eq '2')) + { + $self->{isCover} = 1; + } + if (($tagname eq 'img') && ($self->{isCover} eq '1')) + { + $self->{curInfo}->{cover} = "http://www.merlin.com.pl".$attr->{src}; + $self->{isCover} = 0; + } + if (($tagname eq 'div') && ($attr->{class} eq 'prodFeatureSpec') && ($self->{isFormat} eq '2')) + { + $self->{isFormat} = 1; + } + if (($tagname eq 'div') && ($attr->{class} eq 'productDesc')) + { + $self->{isDescription} = 1; + } + } + } + + + sub end + { + my ($self, $tagname) = @_; + + if ($tagname eq 'h2') + { + $self->{isAuthor} = 0; + } + if ($tagname eq 'dd') + { + $self->{isTranslator} = 0; + } + + $self->{isFound} = 0; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isBook} eq '1') + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + $self->{isBook} = 0; + if ($self->{inside}->{a}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isBook} = 1; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext; + } + } + if ($self->{isPublisher} eq '1') + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublisher} = 0; + } + + } + else + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + + if ($self->{isTitle} eq '1') + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0; + } + if ($self->{isAuthor} eq '1') + { + $origtext =~ s|^\s*||; + $origtext =~ s|\s*$||; + if ($origtext ne '') + { + $self->{curInfo}->{authors} .= $origtext; + } + $self->{isAuthor} = 2; + } + if ($self->{isTranslator} eq '1') + { + $origtext =~ s|^\s*||; + $origtext =~ s|\s*$||; + if ($self->{curInfo}->{translator} eq '') + { + $self->{curInfo}->{translator} = $origtext; + } + else + { + $self->{curInfo}->{translator} .= ", ".$origtext; + } + $self->{isTranslator} = 2; + } + if ($self->{isFormat} eq '1') + { + $origtext =~ s|okładka: ||m; + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0; + } + if ($self->{isDescription} eq '1') + { + $self->{curInfo}->{description} = $origtext; + $self->{isDescription} = 0; + } + + if ($self->{isISBN} eq '1') + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + if ($self->{isPage} eq '1') + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + if ($self->{isEdition} eq '1') + { + $self->{curInfo}->{edition} = $origtext; + $self->{isEdition} = 0; + } + if ($self->{isPublisher} eq '1') + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0; + } + if ($self->{isPublication} eq '1') + { + $origtext =~ s|(\S*)\s*(\S{4})|$2|; + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0; + } + if ($self->{isSerie} eq '1') + { + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 1, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isEditor_Publication_Format_Lang} = 0 ; + $self->{isAnalyse} = 0; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isEdition} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublication} = 0; + $self->{isSerie} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + $self->{isTranslator} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{insideResults} = 0; + $self->{actorsCounter} = 0; + + if ($self->{parsingList}) + { + $html =~ s|(.*?)|$1|gms; + $html =~ s|
  • (.*)
  • \s*
  • |
  • $1
  • |gm; + } + else + { + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|(.*?)||gs; + + $html =~ s|
    ISBN:
    (.*)
    |
    $1
    |; + $html =~ s|
    Liczba stron:
    (.*)
    |
    $1
    |; + $html =~ s|
    Seria:
    \s*
    (.*)
    |
    $1
    |m; + $html =~ s|
    Wydanie:
    (.*)
    |
    $1
    |; + $html =~ s|
    Wydawnictwo:
    \s*
    \s*(.*)\s*,*\s*(.*)\s*
    |
    $1
    $2
    |m; + $html =~ s|
    Tłumaczenie:\s*
    \s*
    |
    |m; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.merlin.com.pl/frontend/browse/search/1.html?phrase=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return 'http://www.merlin.com.pl/'; + } + + sub getName + { + return "Merlin"; + } + + sub getCharset + { + my $self = shift; + #return "UTF-8"; + return "ISO-8859-2"; + } + + sub getAuthor + { + return 'WG'; + } + + sub getLang + { + return 'PL'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCNUKat.pm b/lib/gcstar/GCPlugins/GCbooks/GCNUKat.pm new file mode 100644 index 0000000..6bc22eb --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCNUKat.pm @@ -0,0 +1,447 @@ +package GCPlugins::GCbooks::GCbooksNUKat; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +my $searchURL = ""; +my $searchISBN = ""; + +{ + package GCPlugins::GCbooks::GCPluginNUKat; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'title') #od razu mamy wynik + { + $self->{isBook} = 7; + } + + if (($tagname eq 'tr') && ($attr->{class} eq 'intrRow')) + { + $self->{isBook} = 1; + $self->{itemIdx}++; + } + if (($tagname eq 'td') && ($attr->{class} eq 'intrRowCell1') && ($self->{isBook} == 1)) + { + $self->{isUrl} = 2; + } + if (($tagname eq 'a') && ($self->{isUrl} == 2) && ($origtext =~ /.*function=CARDSCR.*/)) + { + $self->{isUrl} = 1; + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{itemsList}[$self->{itemIdx}]->{url} =~ s|skin=portal&||; + $self->{isUrl} = 0; + } + if (($tagname eq 'td') && ($attr->{class} eq 'intrAutor') && ($self->{isBook} == 1)) + { + $self->{isAuthor} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'intrTytul') && ($self->{isBook} == 1)) + { + $self->{isTitle} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'intrWydaw') && ($self->{isBook} == 1)) + { + $self->{isPublication} = 1; + } + } + else + { + if (($tagname eq 'td') && ($attr->{class} eq 'wrgTITLE')) + { + $self->{isTitle} = 1; + $self->{isAuthor} = 1; + $self->{isTranslator} = 1; + $self->{isArtist} = 1; + $self->{isISBN} = 2; + } + if (($tagname eq 'td') && ($attr->{class} eq 'wrgPAGES')) + { + $self->{isPage} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'wrgSERIA')) + { + $self->{isSerie} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'wrgPUBLI')) + { + $self->{isPublisher} = 1; + $self->{isPublication} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'wrgEDITI')) + { + $self->{isEdition} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'wrgISBN') && ($self->{isISBN} == 2)) + { + $self->{isISBN} = 1; + } + + if (($tagname eq 'div') && ($attr->{class} eq 'prodFeatureSpec') && ($self->{isFormat} eq '2')) + { + $self->{isFormat} = 1; + } + } + } + + + sub end + { + my ($self, $tagname) = @_; + + $self->{isFound} = 0; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isBook} == 7) #od razu mamy wynik? + { + if ($origtext =~ /Pełny opis/) + { + $self->{isUrl} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $searchURL; + $self->{isUrl} = 0; + $self->{isBook} = 0; + } + } + if ($self->{isBook} == 1) + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + if ($self->{isTitle} == 1) + { + $origtext =~ s|^\s*([^/]*)/?|$1|m; + $origtext =~ s|^\s*([^:]*):?|$1|m; + $origtext =~ s|\s*$||m; + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + } + if ($self->{isAuthor} == 1) + { + $origtext =~ s|\s*\/\s*(.*)\s*|$1|; + $origtext =~ s|^\s*([^\.]*)\.?|$1|m; + $origtext =~ s|([^\(]*)(\([^\)]*\))?|$1|; + $origtext =~ s|\s*$||m; + $origtext =~ s|([^,]*), (.*)|$2 $1|m; + $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext; + $self->{isAuthor} = 0; + } + if ($self->{isPublication} == 1) + { + $origtext =~ s|(.*)(\d{4})\D*|$2|s; + $origtext =~ s|^\s*([^\.]*)\.?|$1|m; + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublication} = 0; + $self->{isBook} = 0; + } + } + + } + else + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + + if ($self->{isFormat} eq '1') + { + $origtext =~ s|okładka: ||m; + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0; + } + + + + if ($self->{isISBN} eq '1') + { + my ($pom1, $pom2); + if ($self->{searchField} eq 'isbn') + { + $pom1 = $self->{searchISBN}; + $pom2 = $origtext; + $pom2 =~ s|[^\dX]||g; + $pom1 =~ s|-||g; + $pom2 =~ s|-||g; + if ($pom1 eq $pom2) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + else + { + $self->{isISBN} = 2; + } + } + else + { + $origtext =~ s|[^\dX]||g; + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + } + if ($self->{isTitle} eq '1') + { + my ($pom1, $pom2, $ti, $au, $tr, $il); + $origtext =~ m|([^/]*)/\s*([^;]*)(; )?([^;]*)(; )?([^;]*)$|; + $ti = $1; + $au = $2; + $pom1 = $4; + $pom2 = $6; + $ti =~ s|^\s*||; + $ti =~ s|\s*$||; + $self->{curInfo}->{title} = $ti; + $self->{isTitle} = 0; + $au =~ s| i |,|g; + $au =~ s|, |,|g; + $au =~ s|[\[\]]||g; + $au =~ s|tekst||g; + $au =~ s|^\s*||; + $au =~ s|\s*$||; + $au =~ s|(.*)(\.{1})|$1|; + $self->{curInfo}->{authors} = $au; + $self->{isAuthor} = 0; + $pom1 =~ s|[\[\]]||g; + $pom1 =~ m|(.*)(.{1})|; + if ($2 eq '.') + { + $pom1 = $1; + } + $pom2 =~ s|[\[\]]||g; + $pom2 =~ m|(.*)(.{1})|; + if ($2 eq '.') + { + $pom2 = $1; + } + if ($pom2 =~ /(przeł\.|przekł\.|tł\.|tłum\.)/) + { + $tr = $pom2; + } + if ($pom2 =~ /(il\.|oprac\. graf\.)/) + { + $il = $pom2; + } + if ($pom1 =~ /(przeł\.|przekł\.|tł\.|tłum\.)/) + { + $tr = $pom1; + } + if ($pom1 =~ /(il\.|oprac\. graf\.)/) + { + $il = $pom1; + } + $tr =~ s/(przeł\.|przekł\.|tł\.|tłum\.)//; + $tr =~ s|z \w+\.||; + $tr =~ s|^\s*||; + $tr =~ s|\s*$||; + $tr =~ s| i |,|g; + $tr =~ s|, |,|g; + $self->{curInfo}->{translator} = $tr; + $self->{isTranslator} = 0; + $il =~ s/(il\.|oprac\. graf\.)//; + $il =~ s|^\s*||; + $il =~ s|\s*$||; + $il =~ s| i |,|g; + $il =~ s|, |,|g; + $self->{curInfo}->{artist} = $il; + $self->{isArtist} = 0; + } + if ($self->{isPage} eq '1') + { + $origtext =~ s|(\d*)\D.*|$1|; + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + if ($self->{isEdition} eq '1') + { + $origtext =~ s|\D*(\d*)\D.*|$1|; + $self->{curInfo}->{edition} = $origtext; + $self->{isEdition} = 0; + } + if ($self->{isPublisher} eq '1') + { + my $pom = $origtext; + $origtext =~ s|[^:]*:\s*(.*),.*|$1|; + $origtext =~ s|^\s*||; + $origtext =~ s|"(.*)"|$1|; + $self->{curInfo}->{publisher} = $origtext; + $pom =~ s|(.*)(\d{4})(\D*)|$2|; + $self->{curInfo}->{publication} = $pom; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + } + if ($self->{isSerie} eq '1') + { + $origtext =~ s|([^;]*)(;.*)|$1|; + $origtext =~ s|\s*$||; + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 1, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isEditor_Publication_Format_Lang} = 0 ; + $self->{isAnalyse} = 0; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isEdition} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublication} = 0; + $self->{isSerie} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + $self->{isTranslator} = 0; + $self->{isArtist} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{insideResults} = 0; + $self->{actorsCounter} = 0; + + if ($self->{parsingList}) + { + $html =~ s|(.*?)|$1|gms; + $html =~ s|||g; + $html =~ s|||g; + $html =~ s|$1|gs; + $html =~ s|]*>Tytuł\s*
  • "); + if ( $found2 >= 0 ) + { + $html2 = substr($html2, 0, $found2); + } + + $html2 =~ s|
  • |\n* |gi; + $html2 =~ s|
    |\n|gi; + $html2 =~ s|
    |\n|gi; + $html2 =~ s|||gi; + $html2 =~ s|||gi; + $html2 =~ s|||gi; + $html2 =~ s|||gi; + $html2 =~ s|

    |\n|gi; + $html2 =~ s|

    ||gi; + $html2 =~ s|||gi; + $html2 =~ s|\x{92}|'|g; + $html2 =~ s|’|'|gi; + $html2 =~ s|•|*|gi; + $html2 =~ s|œ|oe|gi; + $html2 =~ s|…|...|gi; + $html2 =~ s|\x{85}|...|gi; + $html2 =~ s|\x{8C}|OE|gi; + $html2 =~ s|\x{9C}|oe|gi; + + $html = substr($html, 0, $found) . ">" . $html2 .""; + + } + + $html =~ s|

    ||gmi; + $html =~ s|

    |
    |gmi; + $html =~ s|
    <|<|gmi; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + return "http://www.noosfere.org/icarus/livres/cyborg_livre.asp?mini=1000&maxi=3000&mode=Idem&EtOuParution=NS&isbn=". $word; + } + else + { + return "http://www.noosfere.org/icarus/livres/cyborg_livre.asp?mini=1000&maxi=3000&mode=Idem&EtOuParution=NS&titre=". $word; + } + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url; + return 'http://www.noosfere.org/'; + } + + sub getName + { + return "nooSFere"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-15"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'FR'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm b/lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm new file mode 100644 index 0000000..54dd119 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm @@ -0,0 +1,303 @@ +package GCPlugins::GCbooks::GCSaraiva; + +################################################### +# +# Plugin for a brazilian bookstore named "Saraiva". +# Code written by Guilherme "nirev" Nogueira. +# guilherme at nirev dot org +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginSaraiva; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if (($tagname eq 'div') && ($attr->{class} eq 'hsliceLista')) + { + $self->{isResult} = 1; + $self->{linkCount} = 0; + $self->{itemIdx}++; + } + if (($tagname eq 'span') && ($attr->{class} eq 'entry-title')) + { + $self->{isTitle} = 1; + } + if (($tagname eq 'h2') && ($attr->{class} eq 'titulo_autor')) + { + $self->{isAuthor} = 1; + } + if (($tagname eq 'a') && $self->{isResult} && $self->{linkCount} == 0 ) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{linkCount}++; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'entry-content')) + { + $self->{isResult} = 0; + } + } + else + { + if (($tagname eq 'img') && ($attr->{id} eq 'imgProd')) + { + my $imgid = $attr->{src}; + $imgid =~ s/(.)*pro_id=//; + $imgid =~ s/&.*$//; + $self->{curInfo}->{cover} = 'http://www.livrariasaraiva.com.br/imagem/imagem.dll?tam=2&pro_id='.$imgid; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'aba1')) + { + $self->{isDescription} = 1; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'aba2')) + { + $self->{divInfo} = 1; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'produtosAbasMenus')) + { + $self->{divInfo} = 0; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'tituloprod')) + { + $self->{isTitle} = 1; + } + elsif (($tagname eq 'a') && ($attr->{href} eq 'javascript:PesquisaAutor();')) + { + $self->{isAuthor} = 1; + } + elsif (($tagname eq 'a') && ($attr->{href} eq 'javascript:PesquisaMarca();')) + { + $self->{isPublisher} = 1; + } + elsif (($tagname eq 'font')) + { + $self->{isAnalyse} = 1; + } + elsif (($tagname eq 'b') && $self->{divInfo} == 1) + { + $self->{isAnalyse} = 1; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + my $texto = $origtext; + $self->{itemsList}[$self->{itemIdx}]->{title} = $texto; + $self->{isTitle} = 0; + } + if ($self->{isAuthor}) + { + my $texto = $origtext; + $texto =~ s/
    //; + my @dados = split(' / ', $texto); + $self->{itemsList}[$self->{itemIdx}]->{authors} = $dados[0]; + $self->{isAuthor} = 0; + } + } + else + { + if ($self->{isAuthor}) + { + my @authors = split(';', $origtext); + my $authors = ''; + my $tam = @authors; + my $count = 0; + for($count = 0; $count < $tam; $count++) + { + $authors[$count] =~ s/^\s*//gi; + $authors[$count] =~ s/\s*$//gi; + my @names = split(', ', $authors[$count]); + $authors .= ',' if ($count); + $authors .= $names[1].' '.$names[0]; + + } + $self->{curInfo}->{authors} = $authors; + $self->{isAuthor} = 0; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0; + } + elsif ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} = $origtext; + $self->{curInfo}->{description} =~ s/^\s*//; + $self->{curInfo}->{description} =~ s/\s+/ /; + $self->{isDescription} = 0; + } + elsif ($self->{isAnalyse}) + { + $self->{isISBN} = 1 if ($origtext =~ m/I\.S\.B\.N/i); + $self->{isFormat} = 1 if ($origtext =~ m/Acabamento/i); + $self->{isPublication} = 1 if ($origtext =~ m/Edição/i); + $self->{isPage} = 1 if ($origtext =~ m/Número de Paginas/i); + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isISBN}) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + elsif ($self->{isFormat}) + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0; + } + elsif ($self->{isPublication}) + { + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0; + } + elsif ($self->{isPage}) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 0, + serie => 0, + }; + + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + $self->{isPage} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isDescription} = 0; + $self->{isResult} = 0; + $self->{linkCount} = 0; + $self->{divInfo} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + my $inicio_res = index($html,'

    '); + if ( $inicio_res >= 0 ) + { + $html = substr($html, $inicio_res); + } + my $fim_res = index($html,''); + if ( $fim_res >= 0 ) + { + $html = substr($html, 0, $fim_res); + } + $html = '' if ($inicio_res < 0); + } + else + { + + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + $word =~ s|\s+|\+|; + + if ($self->{searchField} eq 'isbn') + { + return "http://www.livrariasaraiva.com.br/pesquisaweb/pesquisaweb.dll/pesquisa?ORDEMN2=E&ESTRUTN1=0301&PALAVRASN1=".$word; + } + else + { + return "http://www.livrariasaraiva.com.br/pesquisaweb/pesquisaweb.dll/pesquisa?ORDEMN2=E&ESTRUTN1=0301&PALAVRASN1=".$word; + } + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.livrariasaraiva.com.br".$url; + } + + sub getName + { + return "Saraiva"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-1"; + } + + sub getAuthor + { + return 'nirev'; + } + + sub getLang + { + return 'PT'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCbooksAdlibrisCommon.pm b/lib/gcstar/GCPlugins/GCbooks/GCbooksAdlibrisCommon.pm new file mode 100644 index 0000000..ee556dc --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCbooksAdlibrisCommon.pm @@ -0,0 +1,331 @@ +package GCPlugins::GCbooks::GCbooksAdlibrisCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCbooksAdlibrisPluginsBase; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ( (($tagname eq 'div') && ($attr->{class} eq 'productTitleFormat')) + || (($tagname eq 'a') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_linkProductTitle')) + ) + { + $self->{isFound} = 1 ; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl}; + } + elsif (($tagname eq 'a') && ($attr->{id} =~ m/_hlkTitle/i) && ($self->{isFound} eq '0')) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.adlibris.com/" . $self->{isLang} . "/" . $attr->{href}; + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} =~ m/ctl00_main_frame_ctrlsearchhit_rptSearchHit_ctl/i) && ($attr->{id} =~ m/_Label2/i) && ($self->{isFound} eq '0')) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} =~ m/ctl00_main_frame_ctrlsearchhit_rptSearchHit_ctl/i) && ($attr->{id} =~ m/_Label4/i) && ($self->{isFound} eq '0')) + { + $self->{isFormat} = 1 ; + } + } + else + { + if (($tagname eq 'h1')) + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'li') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_liISBN13')) + { + $self->{isbnLevel} = 1 ; + } + elsif ($self->{isbnLevel} > 0) + { + if ($self->{isbnLevel} < 5) + { + $self->{isbnLevel}++ ; + } + else + { + $self->{isISBN} = 1 ; + $self->{isbnLevel} = 0 ; + } + } + elsif (($tagname eq 'a') && (($attr->{id} eq 'ctl00_main_frame_ctrlproduct_rptAuthor_ctl00_linkAuthor')) || ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_rptAuthor_ctl01_linkAuthor')) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_linkPublisher')) + { + $self->{isPublisher} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_lblPublished')) + { + $self->{isPublication} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_lblPages')) + { + $self->{isPages} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_lblLanguage')) + { + $self->{isLanguage} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_lblFormat')) + { + $self->{isReliure} = 1 ; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'productDescription')) + { + $self->{isDescription} = 1 ; + } + elsif (($tagname eq 'img') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_imgProduct_ProductImageNotLinked') && !($attr->{src} =~ m/\/noimage./i)) + { + $self->{curInfo}->{cover} = $attr->{src} ; + } + + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{isFound} = 0 ; + $self->{inside}->{$tagname}--; + if (($self->{isDescription}) && ($tagname eq 'div')) + { + $self->{isDescription} = 0; + $self->{curInfo}->{description} =~ s/^Beskrivning://g ; + $self->{curInfo}->{description} =~ s/^Kuvaus://g ; + } + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + my @array = split(/&/,$origtext); + my $element; + foreach $element (@array) + { + my @nom_prenom = split(/,/,$element); + # Enleve les blancs en debut de chaine + $nom_prenom[0] =~ s/^\s+//; + $nom_prenom[1] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $nom_prenom[0] =~ s/\s$+//; + $nom_prenom[1] =~ s/\s$+//; + if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ", " . $nom_prenom[1] ." " . $nom_prenom[0]; + } + } + + $self->{isAuthor} = 0 ; + } + elsif ($self->{isFormat}) + { + $self->{itemsList}[$self->{itemIdx}]->{format} = $origtext; + $self->{isFormat} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + $origtext =~ s/\s+$//; + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + $self->{curInfo}->{authors} .= $origtext; + $self->{curInfo}->{authors} .= ","; + $self->{isAuthor} = 0 ; + } + elsif ($self->{isISBN}) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{curInfo}->{isbn} =~ s/\s//g; + $self->{isISBN} = 0 ; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isPublication}) + { + $self->{curInfo}->{publication} = $origtext; + $self->{curInfo}->{publication} =~ s/(\d\d\d\d)(\d\d)/01\/$2\/$1/g; + $self->{isPublication} = 0 ; + } + elsif ($self->{isPages}) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPages} = 0 ; + } + elsif ($self->{isLanguage}) + { + $self->{curInfo}->{language} = $origtext; + $self->{isLanguage} = 0 ; + } + elsif ($self->{isReliure}) + { + $self->{curInfo}->{format} = $origtext; + $self->{isReliure} = 0 ; + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} .= $origtext ; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 1, + edition => 0, + }; + + $self->{isLang} = 'se'; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isFormat} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isPublicationAndPages} = 0; + $self->{isLangAndReliure} = 0; + $self->{isDescription} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + } + else + { + $html =~ s|
  • |\n* |g; + $html =~ s|
    |\n|g; + $html =~ s|
    |\n|g; + $html =~ s|

    |\n|g; + $html =~ s|||g; + $html =~ s|||g; + $html =~ s|||g; + $html =~ s|||g; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + return "http://www.adlibris.com/" . $self->{isLang} . "/searchresult.aspx?isbn=" . $word. "&%3BfromProduct=true"; + } + else + { + return "http://www.adlibris.com/" . $self->{isLang} . "/searchresult.aspx?title=" . $word. "&%3BfromProduct=true"; + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return "Adlibris"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'SW'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCbooksAmazonCommon.pm b/lib/gcstar/GCPlugins/GCbooks/GCbooksAmazonCommon.pm new file mode 100644 index 0000000..bc75766 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCbooksAmazonCommon.pm @@ -0,0 +1,65 @@ +package GCPlugins::GCbooks::GCbooksAmazonCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksCommon; +use GCPlugins::GCstar::GCAmazonCommon; + +{ + package GCPlugins::GCbooks::GCbooksAmazonPluginsBase; + + use base ('GCPlugins::GCbooks::GCbooksPluginsBase', 'GCPlugins::GCstar::GCPluginAmazonCommon'); + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{codeField} = 'isbn'; + + return $self; + } + + sub isEAN + { + my ($self, $value) = @_; + + return $value =~ /^978/; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + + sub getEanField + { + return 'isbn'; + } +} + +1; \ No newline at end of file diff --git a/lib/gcstar/GCPlugins/GCbooks/GCbooksCommon.pm b/lib/gcstar/GCPlugins/GCbooks/GCbooksCommon.pm new file mode 100644 index 0000000..a74e35a --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCbooksCommon.pm @@ -0,0 +1,61 @@ +package GCPlugins::GCbooks::GCbooksCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCPluginsBase; + +{ + package GCPlugins::GCbooks::GCbooksPluginsBase; + + use base qw(GCPluginParser); + use HTML::Entities; + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + $self->{searchType} = 'books'; + return $self; + } + + sub getSearchFieldsArray + { + return ['title']; + } + + sub getEanField + { + my $self = shift; + my $fields = $self->getSearchFieldsArray; + return 'isbn' + if $fields->[0] eq 'isbn'; + return undef; + } + +} + +1; \ No newline at end of file -- cgit v1.2.3

  • \s*]*>]*)>\s*||gm; + $html =~ s| \s*|\n
    |gm; + $html =~ s|
    ]*>([^<]*)|$1|gs; + $html =~ s|]*>Adres wyd.\s*||gs; + } + else + { + $html =~ s|||gi; + $html =~ s|||gi; + $html =~ s|||gi; + + $html =~ s|]*>Tytuł\s*\s*]*>([^<]*)|$1|gs; + $html =~ s|]*>Strefa serii\s*\s*]*>([^<]*)|$1|gs; + $html =~ s|]*>Adres wydawniczy\s*||gs; + $html =~ s|]*>Opis fizyczny\s*||gs; + $html =~ s|]*>Oznaczenie wydania\s*||gs; + $html =~ s|]*>ISBN\s*||gs; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + my $bubu; + if ($self->{searchField} eq 'isbn') + { + $bubu = "7"; + $self->{searchISBN} = $word; + } + else + { + $bubu = "4"; + $self->{searchISBN} = ""; + } + $searchURL = "http://www.nukat.edu.pl/cgi-bin/gw_43_3/chameleon?host=193.0.118.2%2b1111%2bDEFAULT&search=KEYWORD&function=INITREQ&conf=.%2fchameleon.conf&lng=pl&u1=".$bubu."&t1=".$word; + return $searchURL; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return 'http://www.nukat.edu.pl/'; + } + + sub getName + { + return "NUKat"; + } + + sub getCharset + { + my $self = shift; + return "UTF-8"; + #return "ISO-8859-2"; + } + + sub getAuthor + { + return 'WG'; + } + + sub getLang + { + return 'PL'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCNooSFere.pm b/lib/gcstar/GCPlugins/GCbooks/GCNooSFere.pm new file mode 100644 index 0000000..c878af9 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCNooSFere.pm @@ -0,0 +1,462 @@ +package GCPlugins::GCbooks::GCNooSFere; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginNooSFere; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + return if ( $self->{isFound} eq 2 ); + if (($tagname eq 'td') && ($attr->{class} eq 'onglet_bleu')) + { + $self->{isFound} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m/editionslivre.asp\?numitem=/i) && !($attr->{href} =~ m/numediteur=/i) && !($attr->{href} =~ m/tri=/i)) + { + $self->{isTitle} = 1 ; + $self->{isAuthor} = 0 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m|/icarus/livres/auteur.asp\?NumAuteur=|i) && ($self->{isAuthor} eq 0)) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m|./editeur.asp\?numediteur=|i)) + { + $self->{isPublisher} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m|./serie.asp\?NumSerie=|i)) + { + $self->{isSerie} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m/editionslivre.asp\?numitem=/i) && ($attr->{href} =~ m/numediteur=/i)) + { + + my $html = $self->loadPage( "http://www.noosfere.org/icarus/livres/" . $attr->{href}, 0, 1 ); + my $found = index($html,"Fiche livre : les éditions"); + if ( $found >= 0 ) + { + + while (index($html,"./niourf.asp?numlivre=")) + { + $found = index($html,"./niourf.asp?numlivre="); + if ( $found >= 0 ) + { + $html = substr($html, $found +length('./niourf.asp?numlivre='),length($html)- $found -length('./niourf.asp?numlivre=')); + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{title} = $self->{saveTitle}; + $self->{itemsList}[$self->{itemIdx}]->{authors} = $self->{saveAuthor}; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.noosfere.org/icarus/livres/niourf.asp?numlivre=" . substr($html, 0, index($html,"\"")); + } + else + { + last; + } + + } + } + else + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{title} = $self->{saveTitle}; + $self->{itemsList}[$self->{itemIdx}]->{authors} = $self->{saveAuthor}; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.noosfere.org/icarus/livres/" . $attr->{href}; + } + } + elsif ($tagname eq 'h1') + { + $self->{isTitle} = 1 ; + $self->{isAuthor} = 0 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m|./niourf.asp\?numlivre=|i)) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{title} = $self->{saveTitle}; + $self->{itemsList}[$self->{itemIdx}]->{authors} = $self->{saveAuthor}; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.noosfere.org/icarus/livres/" . $attr->{href}; + } + elsif (($tagname eq 'td') && ($attr->{class} eq 'onglet_biblio1')) + { + $self->{isAuthor} = 2 ; + } + elsif (($tagname eq 'table') && ($attr->{class} eq 'piedpage')) + { + $self->{isAuthor} = 0 ; + } + } + else + { + if (($tagname eq 'mytpf') && ($attr->{id} eq 'TPFENDCOMMENTTPF')) + { + $self->{isDescription} = 0 ; + } + elsif (($tagname eq 'font') && ($attr->{class} eq 'TitreNiourf')) + { + $self->{isAnalyse} = 0 ; + $self->{isTitle} = 1 ; + $self->{isAuthor} = 0 ; + } + elsif (($tagname eq 'font') && ($attr->{class} eq 'AuteurNiourf')) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m|/icarus/livres/auteur.asp\?NumAuteur=|i) && ($self->{isAuthor} eq 1)) + { + $self->{isAuthor} = 2 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m|actu_mois.asp\?|i)) + { + $self->{isPublication} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m|editeur.asp\?numediteur=|i) && ($self->{curInfo}->{publisher} eq '')) + { + $self->{isPublisher} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m|collection.asp\?NumCollection=|i) && ($self->{curInfo}->{serie} eq '')) + { + $self->{isSerie} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m|/icarus/livres/auteur.asp\?NumAuteur=|i) && ($self->{isTranslator} eq 1)) + { + $self->{isTranslator} = 2 ; + } + elsif ($tagname eq 'br') + { + $self->{isAnalyseTrans} = 1 ; + } + elsif (($tagname eq 'font') && ($attr->{style} eq 'font-size:12px;') && ($self->{isAnalyse} eq 0)) + { + $self->{isAnalyse} = 1 ; + } + elsif (($tagname eq 'img') && ($attr->{name} eq 'couverture')) + { + $self->{curInfo}->{cover} = "http://www.noosfere.org/icarus/livres/" . $attr->{src} ; + } + elsif (($tagname eq 'mytpf') && ($attr->{id} eq 'TPFSTARTCOMMENTTPF')) + { + $self->{isDescription} = 1 ; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{isFound} = 0 ; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + $self->{saveTitle} = $origtext; + $self->{saveAuthor} = ''; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor} eq 1) + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if (($self->{saveAuthor} eq '') && ($origtext ne '')) + { + $self->{saveAuthor} = $origtext; + } + elsif ($origtext ne '') + { + $self->{saveAuthor} .= ', '; + $self->{saveAuthor} .= $origtext; + } + $self->{isAuthor} = 0 ; + } + elsif ($self->{isPublisher}) + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isSerie}) + { + $self->{itemsList}[$self->{itemIdx}]->{serie} = $origtext; + $self->{isSerie} = 0 ; + } + elsif ($self->{isFound} eq 1) + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if ($origtext eq 'Fiche livre') + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl}; + $self->{isFound} = 2 ; + } + else + { + $self->{isFound} = 0 ; + } + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if ($self->{isTitle} eq '1') + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAnalyse} eq 1) + { + my $found = index($origtext," pages"); + if ( $found >= 0 ) + { + $self->{curInfo}->{pages} = substr($origtext, 0, $found); + } + $found = index($origtext,"ISBN : "); + if ( $found >= 0 ) + { + $self->{curInfo}->{isbn} = substr($origtext, $found +length('ISBN : '),length($origtext)- $found -length('ISBN : ')); + } + + $self->{isAnalyse} = 2 ; + } + elsif ($self->{isAnalyseTrans}) + { + $self->{isTranslator} = 1 if ($origtext =~ m/Traduction/i); + + $self->{isAnalyseTrans} = 0 ; + } + elsif ($self->{isAuthor} eq 2) + { + if (($self->{curInfo}->{authors} eq '') && ($origtext ne '')) + { + $self->{curInfo}->{authors} = $origtext; + } + elsif ($origtext ne '') + { + $self->{curInfo}->{authors} .= ', '; + $self->{curInfo}->{authors} .= $origtext; + } + $self->{isAuthor} = 1 ; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isSerie}) + { + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0 ; + } + elsif ($self->{isPublication}) + { + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0 ; + } + elsif ($self->{isTranslator} eq 2) + { + $self->{curInfo}->{translator} = $origtext; + $self->{isTranslator} = 0 ; + } + elsif ($self->{isDescription}) + { + if ($origtext =~ m/Pas de texte sur la quatri.me de couverture\./i) + { + } + else + { + $self->{curInfo}->{description} .= $origtext ."\n"; + } + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 1, + serie => 1, + }; + + $self->{saveTitle} = ''; + $self->{saveAuthor} = ''; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + $self->{isSerie} = 0; + $self->{isDescription} = 0; + $self->{isTranslator} = 0; + $self->{isAnalyseTrans} = 0; + $self->{isAnalyse} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + } + else + { + # Le descriptif pouvant contenir des balises html je le repere maintenant + my $found = index($html,"Id=\"R"); + if ( $found >= 0 ) + { + my $html2 = substr($html, $found +length('Id="R'),length($html)- $found -length('Id="R')); + my $found2 = index($html2,""); + if ( $found2 >= 0 ) + { + $html2 = substr($html2, $found2 +length(''),length($html2)- $found2 -length('')); + } + + $found2 = index($html2,"