diff options
Diffstat (limited to 'lib/gcstar/GCPlugins/GCbooks')
32 files changed, 9864 insertions, 0 deletions
diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisFI.pm b/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisFI.pm new file mode 100644 index 0000000..34997a8 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisFI.pm @@ -0,0 +1,59 @@ +package GCPlugins::GCbooks::GCAdlibrisFI; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksAdlibrisCommon; + +{ + package GCPlugins::GCbooks::GCPluginAdlibrisFI; + + use base qw(GCPlugins::GCbooks::GCbooksAdlibrisPluginsBase); + use URI::Escape; + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{isLang} = 'fi'; + + return $self; + } + + sub getName + { + return "Adlibris (FI)"; + } + + sub getLang + { + return 'FI'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisSV.pm b/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisSV.pm new file mode 100644 index 0000000..f17abdb --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAdlibrisSV.pm @@ -0,0 +1,59 @@ +package GCPlugins::GCbooks::GCAdlibrisSV; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksAdlibrisCommon; + +{ + package GCPlugins::GCbooks::GCPluginAdlibrisSV; + + use base qw(GCPlugins::GCbooks::GCbooksAdlibrisPluginsBase); + use URI::Escape; + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{isLang} = 'se'; + + return $self; + } + + sub getName + { + return "Adlibris (SV)"; + } + + sub getLang + { + return 'SV'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAlapage.pm b/lib/gcstar/GCPlugins/GCbooks/GCAlapage.pm new file mode 100644 index 0000000..44f3da0 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAlapage.pm @@ -0,0 +1,391 @@ +package GCPlugins::GCbooks::GCAlapage;
+
+###################################################
+#
+# Copyright 2005-2010 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCbooks::GCbooksCommon;
+
+{
+ package GCPlugins::GCbooks::GCPluginAlapage;
+
+ use base qw(GCPlugins::GCbooks::GCbooksPluginsBase);
+ use URI::Escape;
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingList})
+ {
+
+ if (($tagname eq 'div') && ($attr->{class} eq 'infos_produit'))
+ {
+ $self->{isBook} = 1 ;
+ $self->{isUrl} = 1 ;
+ }
+ elsif ($tagname eq 'div')
+ {
+ $self->{isBook} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ($self->{isUrl}) && ($self->{isBook}))
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $attr->{title};
+ $self->{isUrl} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ( index($attr->{href},"mot_auteurs") >= 0) && ($self->{isBook}))
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'br') && ($self->{isBook}))
+ {
+ $self->{isPublisher} = 1 ;
+ }
+ }
+ else
+ {
+ if ($self->{isISBN} eq 1)
+ {
+ $self->{isISBN} = 2 ;
+ }
+ elsif ($self->{isPublication} eq 1)
+ {
+ $self->{isPublication} = 2 ;
+ }
+ elsif ($self->{isFormat} eq 1)
+ {
+ $self->{isFormat} = 2 ;
+ }
+ elsif ($self->{isPage} eq 1)
+ {
+ $self->{isPage} = 2 ;
+ }
+ elsif ($tagname eq 'h2')
+ {
+ $self->{isTitle} = 1 ;
+ }
+ elsif (($tagname eq 'tpfcommentaire') && ($self->{isDescription} eq 1))
+ {
+ $self->{isDescription} = 2 ;
+ }
+ elsif (($tagname eq 'a') && ( index($attr->{href},"mot_auteurs") >= 0))
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{class} eq 'thickbox tooltip') && ($self->{curInfo}->{cover} eq ''))
+ {
+ my $html = $self->loadPage( "http://www.alapage.com" . $attr->{href}, 0, 1);
+ my $found = index($html,"\"laplusgrande\"");
+ if ( $found >= 0 )
+ {
+ my $found2 = index($html,"&m=v");
+ $html = substr($html, $found +length('"laplusgrande"'),length($html)- $found -length('"laplusgrande"'));
+
+ my @array = split(/"/,$html);
+ #"
+ $self->{curInfo}->{cover} = "http://www.alapage.com" . $array[1];
+ if ( $found2 >= 0 )
+ {
+ $self->{curInfo}->{backpic} = $self->{curInfo}->{cover};
+ $self->{curInfo}->{backpic} =~ s|&m=r|&m=v|gi;
+ }
+ }
+ }
+ elsif ($tagname eq 'li')
+ {
+ $self->{isAnalyse} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ( index($attr->{href},"mot_cdu") >= 0))
+ {
+ $self->{isGenre} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ( index($attr->{href},"mot_coll_serie") >= 0))
+ {
+ $self->{isSerie} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ( index($attr->{href},"mot_editeur") >= 0) && ( index($attr->{href},"mot_coll_serie") == -1))
+ {
+ $self->{isPublisher} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{name} eq 'comment'))
+ {
+ $self->{isDescription} = 1 ;
+ }
+ elsif (($tagname eq 'div') && ($attr->{class} eq 'blocWithMargin') && ($self->{isDescription}) && ($self->{curInfo}->{description} eq '') )
+ {
+ $self->{isDescription} = 2 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{name} ne ''))
+ {
+ $self->{isDescription} = 0 ;
+ }
+ elsif (($tagname eq 'div') && ($attr->{class} eq 'edito FP_commentaire'))
+ {
+ $self->{isDescription} = 1 ;
+ }
+
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{inside}->{$tagname}--;
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isAuthor})
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//;
+
+ if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '')
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext;
+ }
+ else
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{authors} .= ', ';
+ $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext;
+ }
+ $self->{isAuthor} = 0 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ my @array = split(/,/,$origtext);
+
+ $self->{itemsList}[$self->{itemIdx}]->{edition} = $array[0];
+ $self->{itemsList}[$self->{itemIdx}]->{edition} =~ s/^\s+//;
+ $self->{itemsList}[$self->{itemIdx}]->{edition} =~ s/\s+$//;
+
+ if ($#array ne 0 )
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{publication} = $array[$#array];
+ $self->{itemsList}[$self->{itemIdx}]->{publication} =~ s/^\s+//;
+ $self->{itemsList}[$self->{itemIdx}]->{publication} =~ s/\s+$//;
+ }
+
+ $self->{isPublisher} = 0 ;
+ }
+ }
+ else
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ if ($self->{isTitle})
+ {
+ $self->{curInfo}->{title} = $origtext;
+ $self->{curInfo}->{language} = 'Français';
+ $self->{isTitle} = 0 ;
+ }
+ elsif ($self->{isAuthor})
+ {
+ $self->{curInfo}->{authors} .= $origtext;
+ $self->{curInfo}->{authors} .= ",";
+ $self->{isAuthor} = 0 ;
+ }
+ elsif ($self->{isAnalyse})
+ {
+ $self->{isISBN} = 1 if ($origtext =~ m/ISBN/i);
+ $self->{isFormat} = 1 if ($origtext =~ m/Dimensions/i);
+ $self->{isPublication} = 1 if ($origtext =~ m/Date de parution/i);
+ $self->{isPage} = 1 if ($origtext =~ m/Nombre de pages/i);
+
+ $self->{isAnalyse} = 0 ;
+ }
+ elsif ($self->{isISBN} eq 2)
+ {
+ $self->{curInfo}->{isbn} = $origtext;
+ $self->{isISBN} = 0 ;
+ }
+ elsif ($self->{isGenre})
+ {
+ my @array = split(/,/,$origtext);
+ my $element;
+ foreach $element (@array)
+ {
+ $element =~ s/^\s+//;
+ $self->{curInfo}->{genre} .= $element;
+ $self->{curInfo}->{genre} .= ",";
+ }
+ $self->{isGenre} = 0 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ $self->{curInfo}->{publisher} = $origtext;
+ $self->{isPublisher} = 0 ;
+ }
+ elsif ($self->{isSerie})
+ {
+ $self->{curInfo}->{serie} = $origtext;
+ $self->{isSerie} = 0 ;
+ }
+ elsif ($self->{isFormat} eq 2)
+ {
+ $self->{curInfo}->{format} = $origtext;
+ $self->{isFormat} = 0 ;
+ }
+ elsif ($self->{isPublication} eq 2)
+ {
+ $self->{curInfo}->{publication} = $origtext;
+ $self->{isPublication} = 0 ;
+ }
+ elsif ($self->{isPage} eq 2)
+ {
+ $self->{curInfo}->{pages} = $origtext;
+ $self->{isPage} = 0 ;
+ }
+ elsif ($self->{isDescription} eq 2)
+ {
+ $self->{curInfo}->{description} = $origtext;
+ $self->{isDescription} = 0 ;
+ }
+
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ title => 1,
+ authors => 1,
+ publication => 1,
+ format => 0,
+ edition => 1,
+ serie => 0,
+ };
+
+ $self->{isBook} = 0;
+ $self->{isUrl} = 0;
+ $self->{isTitle} = 0;
+ $self->{isAuthor} = 0;
+ $self->{isPublisher} = 0;
+ $self->{isAnalyse} = 0;
+ $self->{isISBN} = 0;
+ $self->{isGenre} = 0;
+ $self->{isPublication} = 0;
+ $self->{isPage} = 0;
+ $self->{isFormat} = 0;
+ $self->{isSerie} = 0;
+ $self->{isDescription} = 0;
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ if ($self->{parsingList})
+ {
+ $html =~ s|<p>||gi;
+ $html =~ s|</p>||gi;
+ }
+ else
+ {
+ $html =~ s|<font style="font-size:13px;">||gi;
+ $html =~ s|<font style="font-size:13px;">||gi;
+ $html =~ s|</font>||gi;
+ $html =~ s|<strong>||gi;
+ $html =~ s|</strong>|</strong><tpfanalyse>|gi;
+ $html =~ s|</h3>|</h3><tpfcommentaire>|gi;
+ $html =~ s|<p>||gi;
+ $html =~ s|</p>||gi;
+ }
+
+ return $html;
+
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ if ($self->{searchField} eq 'isbn')
+ {
+ return "http://www.alapage.com/-/Recherche/?type=1&mot_isbn=" . $word;
+ }
+ else
+ {
+ return "http://www.alapage.com/-/Recherche/?type=1&mot_titre=" . $word;
+ }
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return "http://www.alapage.com" . $url;
+ }
+
+ sub getName
+ {
+ return "Alapage";
+ }
+
+ sub getAuthor
+ {
+ return 'TPF';
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['isbn','title'];
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+ return "ISO-8859-15";
+ }
+
+ sub getDefaultPictureSuffix
+ {
+ return '.jpg';
+ }
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAmazon.pm b/lib/gcstar/GCPlugins/GCbooks/GCAmazon.pm new file mode 100644 index 0000000..7d70ec4 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAmazon.pm @@ -0,0 +1,352 @@ +package GCPlugins::GCbooks::GCAmazon; + +################################################### +# +# Copyright 2005-2009 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginAmazon; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use XML::Simple; + use LWP::Simple qw($ua); + use Encode; + use HTML::Entities; + use GCUtils; + + sub parse + { + my ($self, $page) = @_; + return if $page =~ /^<!DOCTYPE html/; + my $xml; + my $xs = XML::Simple->new; + + if ($self->{parsingList}) + { + $xml = $xs->XMLin($page, ForceArray => ['Item','Author'], KeyAttr => []); + my $book; + foreach $book ( @{ $xml -> {'Items'} -> {'Item'} }) + { + $self->{itemIdx}++; + my $url = $self->baseAWSUrl."&Operation=ItemLookup&ResponseGroup=Large,EditorialReview&ItemId=".$book->{ASIN}; + + $self->{itemsList}[$self->{itemIdx}]->{url} = $url; + $self->{itemsList}[$self->{itemIdx}]->{title} = $book->{ItemAttributes}->{'Title'}; + for my $author (@{$book->{ItemAttributes}->{'Author'}}) + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ", " + if $self->{itemsList}[$self->{itemIdx}]->{authors}; + $self->{itemsList}[$self->{itemIdx}]->{authors} .= $author; + } + $self->{itemsList}[$self->{itemIdx}]->{publication} = $book->{ItemAttributes}->{'PublicationDate'}; + $self->{itemsList}[$self->{itemIdx}]->{format} = $book->{ItemAttributes}->{'Binding'}; + $self->{itemsList}[$self->{itemIdx}]->{edition} = $book->{ItemAttributes}->{'Edition'}; + } + } + else + { + $xml = $xs->XMLin($page, ForceArray => ['Author','EditorialReview','Language'], KeyAttr => []); + $self->{curInfo}->{title} = $xml->{Items}->{Item}->{ItemAttributes}->{Title}; + for my $author (@{$xml->{Items}->{Item}->{ItemAttributes}->{Author}}) + { + push @{$self->{curInfo}->{authors}}, [$author]; + } + + my $htmlDescription; + if ($xml->{Items}->{Item}->{EditorialReviews}->{EditorialReview}[0]->{Content}) + { + $htmlDescription = $xml->{Items}->{Item}->{EditorialReviews}->{EditorialReview}[0]->{Content}; + } + else + { + # Unfortunately the api doesn't always return the product description, which is due to + # copyright concerns or something. In this case, grab the product html and parse it for + # the description. + my $response = $ua->get($xml->{Items}->{Item}->{DetailPageURL}); + my $result; + eval { + $result = $response->decoded_content; + }; + + # Replace some bad characters. TODO - will probably need to extend this for de/jp plugins + $result =~ s|\x{92}|'|gi; + $result =~ s|’|'|gi; + $result =~ s|•|*|gi; + $result =~ s|œ|oe|gi; + $result =~ s|…|...|gi; + $result =~ s|\x{85}|...|gi; + $result =~ s|\x{8C}|OE|gi; + $result =~ s|\x{9C}|oe|gi; + $result =~ s|ü|ü|g; + $result =~ s|ß|ß|g; + $result =~ s|ö|ö|g; + $result =~ s|Ü|Ü|g; + $result =~ s|ä|ä|g; + $result =~ s/„/»/gm; + $result =~ s/“/«/gm; + + # Chop out the product description + $result =~ /<div class="productDescriptionWrapper">(.*?)<(\/)*?div/s; + $htmlDescription = $1; + + # Decode + decode_entities($htmlDescription); + $htmlDescription = decode('ISO-8859-1', $htmlDescription); + } + + # Replace some html with line breaks, strip out the rest + $htmlDescription =~ s/<br>/\n/ig; + $htmlDescription =~ s/<p>/\n\n/ig; + $htmlDescription =~ s/<(.*?)>//gi; + $htmlDescription =~ s/^\s*//; + $htmlDescription =~ s/\s*$//; + $htmlDescription =~ s/ {1,}/ /g; + $self->{curInfo}->{description} = $htmlDescription; + + $self->{curInfo}->{publisher} = $xml->{Items}->{Item}->{ItemAttributes}->{Publisher} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{Publisher})); + $self->{curInfo}->{publication} = $xml->{Items}->{Item}->{ItemAttributes}->{PublicationDate} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{PublicationDate})); + $self->{curInfo}->{language} = $xml->{Items}->{Item}->{ItemAttributes}->{Languages}->{Language}[0]->{Name} + if (ref($xml->{Items}->{Item}->{ItemAttributes}->{Languages}->{Language})); + $self->{curInfo}->{pages} = $xml->{Items}->{Item}->{ItemAttributes}->{NumberOfPages} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{NumberOfPages})); + $self->{curInfo}->{isbn} = $xml->{Items}->{Item}->{ItemAttributes}->{EAN} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{EAN})); + $self->{curInfo}->{format} = $xml->{Items}->{Item}->{ItemAttributes}->{Binding} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{Binding})); + $self->{curInfo}->{edition} = $xml->{Items}->{Item}->{ItemAttributes}->{Edition} + if (!ref($xml->{Items}->{Item}->{ItemAttributes}->{Edition})); + $self->{curInfo}->{web} = $xml->{Items}->{Item}->{DetailPageURL}; + + # Genre handling via Amazon's browsenodes. Stupidly complicated way of doing things, IMO + # Loop through all the nodes: + for my $node (@{$xml->{Items}->{Item}->{BrowseNodes}->{BrowseNode}}) + { + my $genre = ''; + my $ancestor = $node; + + # Push the lowest node to the temporary genre list + my @genre_list = ($node->{Name}); + + # Start stepping down through the current node to find it's children + while ($ancestor->{Ancestors}->{BrowseNode}) + { + $ancestor = $ancestor->{Ancestors}->{BrowseNode}; + if (($ancestor->{Name} eq 'Specialty Stores') || + ($ancestor->{Name} eq 'Refinements') || + ($ancestor->{Name} eq 'Self Service') || + ($ancestor->{Name} eq 'Specialty Boutique')) + { + # Some categories we definetly want to ignore, since they are full of rubbish tags + $genre = 'ignore'; + last; + } + elsif ($ancestor->{Name} =~ m/A\-Z/) + { + # Clear out the current genres from the node, will be full of rubbish like "Authors A-K" + # Keep looping afterwards though, since there could be valid tags below the author + # specific ones + undef(@genre_list); + } + elsif ($ancestor->{Name} eq 'Subjects') + { + # Don't go deeper than a Subjects node + last; + } + else + { + # Add the current node to the temporary list, if it's not already included in either list + push @genre_list, $ancestor->{Name} + if ((!GCUtils::inArrayTest($ancestor->{Name}, @genre_list)) && + (!GCUtils::inArrayTest($ancestor->{Name}, @{$self->{curInfo}->{genre}}))); + } + } + + if ($genre ne 'ignore') + { + # Add temporary list to item info + push @{$self->{curInfo}->{genre}}, [$_] foreach @genre_list; + } + } + + # Let's sort the list for good measure + @{$self->{curInfo}->{genre}} = sort @{$self->{curInfo}->{genre}}; + + + # Fetch either the big original pic, or just the small thumbnail pic + if ($self->{bigPics}) + { + $self->{curInfo}->{cover} = $xml->{Items}->{Item}->{LargeImage}->{URL}; + } + else + { + $self->{curInfo}->{cover} = $xml->{Items}->{Item}->{SmallImage}->{URL}; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 1, + edition => 1, + }; + + return $self; + } + + sub getItemUrl + { + my ($self, $url) = @_; + if (!$url) + { + # If we're not passed a url, return a hint so that gcstar knows what type + # of addresses this plugin handles + $url = "http://".$self->baseWWWamazonUrl(); + } + elsif ($url !~ m/sowacs.appspot.com/) + { + # Convert amazon url to aws url + $url =~ /\/dp\/(\w*)[\/|%3F]/; + my $asinid = $1; + $url = $self->baseAWSUrl."&Operation=ItemLookup&ResponseGroup=Large,EditorialReview&ItemId=".$asinid; + } + return $url; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub decodeEntitiesWanted + { + return 0; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + my $key = + ($self->{searchField} eq 'authors') ? 'Author' : + ($self->{searchField} eq 'title') ? 'Title' : + ($self->{searchField} eq 'isbn') ? 'Keywords' : + ''; + $word =~ s/\D//g + if $key eq 'Keywords'; + return $self->baseAWSUrl."&Operation=ItemSearch&$key=$word&SearchIndex=Books&ResponseGroup=Medium"; + } + + sub baseAWSUrl + { + my $self = shift; + return "http://sowacs.appspot.com/AWS/%5Bamazon\@gcstar.org%5D".$self->baseAmazonUrl()."/onca/xml?Service=AWSECommerceService&AWSAccessKeyId=AKIAJJ5TJWI62A5OOTQQ&AssociateTag=AKIAJJ5TJWI62A5OOTQQ"; + } + + sub baseAmazonUrl + { + return "ecs.amazonaws.com"; + } + + sub baseWWWamazonUrl + { + return "www.amazon.com"; + } + + sub changeUrl + { + my ($self, $url) = @_; + # Make sure the url is for the api, not the main movie page + return $self->getItemUrl($url); + } + + sub getName + { + return "Amazon (US)"; + } + + sub getAuthor + { + return 'Zombiepig'; + } + + sub getLang + { + return 'EN'; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "utf8"; + } + + sub convertCharset + { + my ($self, $value) = @_; + return $value; + } + + sub getNotConverted + { + my $self = shift; + return []; + } + + sub isPreferred + { + return 1; + } + + sub getSearchFieldsArray + { + return ['title', 'authors', 'isbn']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAmazonCA.pm b/lib/gcstar/GCPlugins/GCbooks/GCAmazonCA.pm new file mode 100644 index 0000000..eb51a4c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAmazonCA.pm @@ -0,0 +1,61 @@ +package GCPlugins::GCbooks::GCAmazonCA;
+
+###################################################
+#
+# Copyright 2005-2010 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+
+use GCPlugins::GCbooks::GCAmazon;
+
+{
+ package GCPlugins::GCbooks::GCPluginAmazonCA;
+
+ use base qw(GCPlugins::GCbooks::GCPluginAmazon);
+
+ sub baseWWWamazonUrl
+ {
+ return "www.amazon.ca";
+ }
+
+ sub baseAmazonUrl
+ {
+ return "ecs.amazonaws.ca";
+ }
+
+ sub getName
+ {
+ return "Amazon (CA)";
+ }
+
+ sub getLang
+ {
+ return 'EN';
+ }
+
+ sub isPreferred
+ {
+ return 0;
+ }
+
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAmazonDE.pm b/lib/gcstar/GCPlugins/GCbooks/GCAmazonDE.pm new file mode 100644 index 0000000..0c87502 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAmazonDE.pm @@ -0,0 +1,56 @@ +package GCPlugins::GCbooks::GCAmazonDE; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCAmazon; + +{ + package GCPlugins::GCbooks::GCPluginAmazonDE; + + use base qw(GCPlugins::GCbooks::GCPluginAmazon); + + sub baseWWWamazonUrl + { + return "www.amazon.de"; + } + + sub baseAmazonUrl + { + return "ecs.amazonaws.de"; + } + + sub getName + { + return "Amazon (DE)"; + } + + sub getLang + { + return 'DE'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAmazonFR.pm b/lib/gcstar/GCPlugins/GCbooks/GCAmazonFR.pm new file mode 100644 index 0000000..d87af48 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAmazonFR.pm @@ -0,0 +1,57 @@ +package GCPlugins::GCbooks::GCAmazonFR; + +################################################### +# +# Copyright 2005-2009 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCAmazon; + +{ + package GCPlugins::GCbooks::GCPluginAmazonFR; + + use base qw(GCPlugins::GCbooks::GCPluginAmazon); + + sub baseWWWamazonUrl + { + return "www.amazon.fr"; + } + + sub baseAmazonUrl + { + return "ecs.amazonaws.fr"; + } + + sub getName + { + return "Amazon (FR)"; + } + + sub getLang + { + return 'FR'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCAmazonUK.pm b/lib/gcstar/GCPlugins/GCbooks/GCAmazonUK.pm new file mode 100644 index 0000000..e39a2de --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCAmazonUK.pm @@ -0,0 +1,61 @@ +package GCPlugins::GCbooks::GCAmazonUK; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCAmazon; + +{ + package GCPlugins::GCbooks::GCPluginAmazonUK; + + use base qw(GCPlugins::GCbooks::GCPluginAmazon); + + sub baseWWWamazonUrl + { + return "www.amazon.co.uk"; + } + + sub baseAmazonUrl + { + return "ecs.amazonaws.co.uk"; + } + + sub getName + { + return "Amazon (UK)"; + } + + sub getLang + { + return 'EN'; + } + + sub isPreferred + { + return 0; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCBDGest.pm b/lib/gcstar/GCPlugins/GCbooks/GCBDGest.pm new file mode 100644 index 0000000..36074aa --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCBDGest.pm @@ -0,0 +1,477 @@ +package GCPlugins::GCbooks::GCBDGest;
+
+###################################################
+#
+# Copyright 2005-2006 Tian
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCbooks::GCbooksCommon;
+
+{
+ package GCPlugins::GCbooks::GCPluginBDGest;
+
+ use base qw(GCPlugins::GCbooks::GCbooksPluginsBase);
+ use URI::Escape;
+ # tableau pour stocker l'identifiant propre à bdgest
+ my @tableau;
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ # parse une liste de résultat
+ if ($self->{parsingList})
+ {
+ if (($tagname eq 'tr'))
+ {
+ $self->{isBook} = 1 ;
+ $self->{isUrl} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($self->{isUrl}) && ($self->{isBook}) && (index($attr->{href},"serie-") >= 0))
+ {
+ $self->{itemIdx}++;
+ $self->{isFound} = 1 ;
+ $tableau[$self->{itemIdx}] = substr($attr->{href},index( $attr->{href},"#")+1);
+ #on retravaille l'url pour avoir toutes les pages de la série
+ my $urlRecherche = substr($attr->{href},0,index($attr->{href},"."))."__10000".substr($attr->{href},index($attr->{href},"."));
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $urlRecherche;
+ $self->{isSerie} = 1 ;
+ $self->{isUrl} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{name} eq 'TitreAlbum')&& ($self->{isBook}) && ($attr->{title} ne ''))
+ {
+ $self->{isTitle} = 1 ;
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $attr->{title};
+ }
+ elsif (($tagname eq 'td') && $self->{isTitle} eq 1)
+ {
+ $self->{isPublisher} = 1 ;
+ $self->{isTitle} = 0;
+ }
+ elsif (($tagname eq 'td') && $self->{isPublisher} eq 2)
+ {
+ $self->{isPublication} = 1 ;
+ $self->{isPublisher} = 0;
+ }
+ }
+ else # parse un item
+ {
+ if (($tagname eq 'a') && ($attr->{name} eq $tableau[$self->{wantedIdx}]))
+ {
+ $self->{isTitle} = 1 ;
+ $self->{isCover} = 1;
+ $self->{isBook} = 1 ;
+ }
+ elsif ($tagname eq 'html')
+ {
+ $self->{isCover} = 0 ;
+ }
+ elsif ($tagname eq 'head')
+ {
+ $self->{isCover} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{name} ne $tableau[$self->{wantedIdx}]) && ($attr->{name} ne ''))
+ {
+ $self->{isBook} = 0 ;
+ }
+ elsif (($tagname eq 'font') && ($attr->{color} eq '#294A6B') && ($attr->{style} eq 'font-family:Trebuchet MS; FONT-SIZE: 11pt;') && ($self->{isTitle} eq 1))
+ {
+ $self->{isTitle} = 2 ;
+ }
+# elsif (($tagname eq 'a') && ($self->{isCover} eq 0) && (index($attr->{href},"Couvertures") >= 0))
+ elsif (($tagname eq 'a') && ($self->{isCover} eq 0))
+ {
+ my $urlimage = $attr->{href};
+ $urlimage =~ s/\'//g;
+ $urlimage =~ s/\)//g;
+ $urlimage = substr($urlimage,index($urlimage,"Couvertures/"));
+ $self->{curInfo}->{cover} = 'http://www.bedetheque.com/'.$urlimage;
+ }
+ elsif (($tagname eq 'a') && ($self->{isBook}) && (index($attr->{href},"auteur") >= 0))
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'td') && ($self->{isPublisher} eq 1))
+ {
+ $self->{isPublisher} = 2 ;
+ }
+ elsif (($tagname eq 'td') && $self->{isPublication} eq 1)
+ {
+ $self->{isPublication} = 2 ;
+ }
+ elsif (($tagname eq 'td') && $self->{isEdition} eq 1)
+ {
+ $self->{isEdition} = 2 ;
+ }
+ elsif (($tagname eq 'td') && $self->{isFormatPublication} eq 1)
+ {
+ $self->{isFormatPublication} = 2 ;
+ }
+ elsif (($tagname eq 'td') && $self->{isISBN} eq 1)
+ {
+ $self->{isISBN} = 2 ;
+ }
+ elsif (($tagname eq 'td') && $self->{isPage} eq 1)
+ {
+ $self->{isPage} = 2 ;
+ }
+ elsif (($tagname eq 'i') && $self->{isDescription} eq 1)
+ {
+ $self->{isDescription} = 2 ;
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{isFound} = 0;
+ $self->{inside}->{$tagname}--;
+
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isSerie})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{serie} = $origtext;
+ $self->{isSerie} = 0 ;
+ }
+ if ($self->{isPublisher} eq 1)
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext;
+ $self->{isPublisher} = 2 ;
+ }
+ if ($self->{isPublication} eq 1)
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{publication} = $origtext;
+ $self->{isPublication} = 0 ;
+ }
+ }
+ else
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Je reinitialise le champs s il est cense etre vide
+ $origtext =~ s/#TPFCHAMPSVIDE#//;
+ if ($self->{isTitle} eq 2)
+ {
+ # si le titre contient INT (cas intégrale et donc sans titre de la série) on concaténe la série au titre.
+ if($origtext =~ /INT/i)
+ {
+ # on enléve le préfixe INT ou int et le point
+ $origtext =~ s/INT//i;
+ $origtext =~ s/.//;
+ $self->{curInfo}->{title} = $self->{itemsList}[$self->{wantedIdx}]->{serie}." ".$origtext;
+ }
+ else
+ {
+ # si numéro avant titre on le transforme en tome et on concaténe avec le nom de la série.
+ if($origtext =~ /[0-9]./)
+ {
+ my $tome = substr($origtext,0,index($origtext,"."));
+ $tome =~ s/^\s+//;
+ my $titre = substr($origtext,index($origtext,".")+1);
+ $titre =~ s/^\s+//;
+ $self->{curInfo}->{title} = $self->{itemsList}[$self->{wantedIdx}]->{serie}." Tome ".$tome ." : ".$titre;
+ }
+ else
+ {
+ $self->{curInfo}->{title} = $origtext;
+ }
+ }
+ $self->{curInfo}->{web} = "http://www.bedetheque.com/".$self->{itemsList}[$self->{wantedIdx}]->{url};
+ $self->{curInfo}->{serie} = $self->{itemsList}[$self->{wantedIdx}]->{serie};
+ $self->{curInfo}->{language} = 'Français';
+ $self->{isTitle} = 0 ;
+ }
+ elsif (($self->{isAuthor}) && ($self->{nbAuthor} < 3))
+ {
+ # Enleve la virgule entre le nom et le prenom
+ $origtext =~ s/,//g;
+ if (($origtext ne '') && ($origtext ne '#TPF NOIR ET BLANC TPF#'))
+ {
+ $self->{curInfo}->{authors} .= $origtext;
+ $self->{curInfo}->{authors} .= ",";
+ }
+ $self->{isAuthor} = 0;
+ $self->{nbAuthor} = $self->{nbAuthor} + 1;
+ }
+ elsif ($self->{isPublisher} eq 2)
+ {
+ $self->{curInfo}->{publisher} = $origtext;
+ $self->{isPublisher} = 3 ;
+ }
+ elsif ($self->{isPublication} eq 2)
+ {
+ $self->{curInfo}->{publication} = $origtext;
+ $self->{isPublication} = 3 ;
+ }
+ elsif ($self->{isEdition} eq 2)
+ {
+ $self->{curInfo}->{edition} = $origtext;
+ $self->{isEdition} = 3 ;
+ }
+ elsif ($self->{isFormatPublication} eq 2 )
+ {
+ $self->{curInfo}->{format} = $origtext;
+ $self->{isFormatPublication} = 3 ;
+ }
+ elsif ($self->{isISBN} eq 2)
+ {
+ $origtext =~ s/978\-//;
+ $self->{curInfo}->{isbn} = $origtext;
+ $self->{isISBN} = 3 ;
+ }
+ elsif ($self->{isPage} eq 2)
+ {
+ $self->{curInfo}->{pages} = $origtext;
+ $self->{isPage} = 3 ;
+ }
+ elsif ($self->{isDescription} eq 2)
+ {
+ if($origtext ne '')
+ {
+ if($self->{curInfo}->{description} ne '')
+ {
+ $self->{curInfo}->{description} .= "\n\n";
+ }
+ $self->{curInfo}->{description} .= "Info sur cette edition : ".$origtext;
+ }
+ $self->{isDescription} = 3 ;
+ }
+ elsif ($self->{isBook} eq 1)
+ {
+ if (($origtext eq "Editeur :") && ($self->{isPublisher} ne 3))
+ {
+ $self->{isPublisher} = 1;
+ }
+ elsif (($origtext eq "Dépot légal :") && ($self->{isPublication} ne 3))
+ {
+ $self->{isPublication} = 1;
+ }
+ elsif (($origtext eq "Collection :") && ($self->{isEdition} ne 3))
+ {
+ $self->{isEdition} = 1;
+ }
+ elsif (($origtext eq "Taille :") && ($self->{isFormatPublication} ne 3))
+ {
+ $self->{isFormatPublication} = 1;
+ }
+ elsif (($origtext eq "ISBN :") && ($self->{isISBN} ne 3))
+ {
+ $self->{isISBN} = 1;
+ }
+ elsif (($origtext eq "Planches :") && ($self->{isPage} ne 3))
+ {
+ $self->{isPage} = 1;
+ }
+ elsif (($origtext eq "Info édition : ") && ($self->{isDescription} ne 3))
+ {
+ $self->{isDescription} = 1;
+ }
+ }
+ }
+ }
+
+ sub new
+ {
+ #la classe est instancié une seule fois au démarrage de l'appli.
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ serie => 1,
+ title => 1,
+ publication => 1,
+ format => 0,
+ edition => 1,
+ };
+ $self->{idPage} = 0;
+ $self->{nbAuthor} = 0;
+ $self->{isFound} = 0;
+ $self->{isSerie} = 0;
+ $self->{isBook} = 0;
+ $self->{isUrl} = 0;
+ $self->{isTitle} = 0;
+ $self->{isAuthor} = 0;
+ $self->{isFormatPublication} = 0;
+ $self->{isPublisher} = 0;
+ $self->{isISBN} = 0;
+ $self->{isPublication} = 0;
+ $self->{isFormat} = 0;
+ $self->{isSerie} = 0;
+ $self->{isPage} = 0;
+ $self->{isDescription} = 0;
+ $self->{isCover} = 0;
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ #RAZ des variables (entre 2 recherches la classe reste en mémoire)
+ $self->{idPage} = 0;
+ $self->{nbAuthor} = 0;
+ $self->{isFound} = 0;
+ $self->{isSerie} = 0;
+ $self->{isEdition} = 0;
+ $self->{isBook} = 0;
+ $self->{isUrl} = 0;
+ $self->{isTitle} = 0;
+ $self->{isAuthor} = 0;
+ $self->{isFormatPublication} = 0;
+ $self->{isPublisher} = 0;
+ $self->{isISBN} = 0;
+ $self->{isPublication} = 0;
+ $self->{isFormat} = 0;
+ $self->{isSerie} = 0;
+ $self->{isPage} = 0;
+ $self->{isDescription} = 0;
+ $self->{isCover} = 0;
+
+ if ($self->{parsingList})
+ {
+ }
+ else
+ {
+ $html =~ s|<u>||gi;
+ $html =~ s|<li>|\n* |gi;
+ $html =~ s|<br>|\n|gi;
+ $html =~ s|<br />|\n|gi;
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+# $html =~ s|<i>||gi;
+# $html =~ s|</i>||gi;
+ $html =~ s|<p>|\n|gi;
+ $html =~ s|</p>||gi;
+ $html =~ s|\x{92}|'|g;
+ $html =~ s|’|'|gi;
+ $html =~ s|•|*|gi;
+ $html =~ s|…|...|gi;
+ $html =~ s|\x{8C}|OE|gi;
+ $html =~ s|\x{9C}|oe|gi;
+
+ # Quand un champs n est pas renseigne il peut y avoir un souci
+ $html =~ s|<td><font color="#5C7994"></font></td>|<td><font color="#5C7994"></font>#TPFCHAMPSVIDE#</td>|gi;
+
+ $html =~ s|<font color="#D19159">||gi;
+ $html =~ s|</font>||gi;
+ # Ce n est pas un coloriste donc il ne faut pas le rajouter a la liste des auteurs
+ $html =~ s|<N&B>|#TPF NOIR ET BLANC TPF#|gi;
+ }
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+ # si isbn renseigné alors url de recherche différent
+ if((length($word) eq 13 || length($word) eq 10) && ($word =~ /[0-9]/))
+ {
+ # si contient pas de caractére - alors insertion de ceux ci pour recherche chez bdgest (ISBN sur 10 au lieu de 13)
+ if($word =~ /\-/)
+ {
+ $word =~ s/978\-//;
+ return "http://www.bedetheque.com/index.php?R=1&RechISBN=". $word;
+ }
+ else
+ {
+ # Ajouts des - et enléve le 978 en début si présent
+ $word =~ s/978//;
+
+ #re calcul de la clé de vérification
+ my $total = substr($word,0,1)*10;
+ $total = $total + (substr($word,1,1)*9);
+ $total = $total + (substr($word,2,1)*8);
+ $total = $total + (substr($word,3,1)*7);
+ $total = $total + (substr($word,4,1)*6);
+ $total = $total + (substr($word,5,1)*5);
+ $total = $total + (substr($word,6,1)*4);
+ $total = $total + (substr($word,7,1)*3);
+ $total = $total + (substr($word,8,1)*2);
+ $total = 11 - ($total%11);
+
+ if($total eq 10)
+ {
+ $total = 'X';
+ }
+
+ $word = substr($word,0,1)."-".substr($word,1,2)."%25-%25".substr($word,7,2)."-".$total;
+ return "http://www.bedetheque.com/index.php?R=1&RechISBN=". $word;
+ }
+ }
+ else
+ {
+ return "http://www.bedetheque.com/index.php?R=1&RechSerie=". $word;
+ }
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return "http://www.bedetheque.com/" . $url;
+ }
+
+ sub getName
+ {
+ return "BDGest";
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+ return "ISO-8859-1";
+ }
+
+ sub getAuthor
+ {
+ return 'Rataflo';
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['isbn','title'];
+ }
+
+
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCbooks/GCBibliotekaNarodowa.pm b/lib/gcstar/GCPlugins/GCbooks/GCBibliotekaNarodowa.pm new file mode 100644 index 0000000..927be0b --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCBibliotekaNarodowa.pm @@ -0,0 +1,374 @@ +package GCPlugins::GCbooks::GCbooksBibliotekaNarodowa; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +my $searchISBN = ""; + +{ + package GCPlugins::GCbooks::GCPluginBibliotekaNarodowa; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'frameset') #od razu mamy wynik + { + $self->{isBook} = 7; + $self->{itemIdx}++; + } + if (($tagname eq 'frame') && ($attr->{name} eq 'bib_frame') && $self->{isBook} == 7) #od razu mamy wynik + { + $self->{isUrl} = 1; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://alpha.bn.org.pl".$attr->{src}; + $self->{isUrl} = 0; + $self->{isBook} = 0; + } + + if (($tagname eq 'tr') && ($attr->{class} eq 'browseEntry')) + { + $self->{isBook} = 1; + $self->{itemIdx}++; + } + if (($tagname eq 'td') && ($attr->{class} eq 'browseEntryData') && ($self->{isBook} == 1)) + { + $self->{isAuthor} = 2; + } + if (($tagname eq 'a') && ($self->{isBook} == 1) && ($self->{isAuthor} > 0)) + { + $self->{isUrl} = 1; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://alpha.bn.org.pl".$attr->{href}; + $self->{itemsList}[$self->{itemIdx}]->{url} =~ s|frameset|bibframe|; + $self->{isUrl} = 0; + $self->{isAuthor} = 0; + $self->{isTitle} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'browseEntryYear') && ($self->{isBook} == 1)) + { + $self->{isPublication} = 1; + } + } + else + { + if (($tagname eq 'div') && ($attr->{id} eq 'wrgTIAUTR')) + { + $self->{isTitle} = 1; + $self->{isAuthor} = 1; + $self->{isTranslator} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgISBN')) + { + $self->{isISBN} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPAGES')) + { + $self->{isPage} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPUBPD')) + { + $self->{isPublisher} = 1; + $self->{isPublication} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgSERIA')) + { + $self->{isSerie} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgEDITI')) + { + $self->{isEdition} = 1; + } + } + } + + + sub end + { + my ($self, $tagname) = @_; + + if ($self->{parsingList}) + { + if (($tagname eq 'tr') && ($self->{isBook} == 1)) + { + $self->{isBook} = 0; + } + } + + $self->{isFound} = 0; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + $origtext =~ s|^\s*||m; + $origtext =~ s|\s*$||m; + + if ($self->{parsingList}) + { + if ($self->{isBook} == 1) + { + if ($self->{isTitle} == 1) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + } + if ($self->{isAuthor} > 0) + { + $origtext =~ s|\s*\/\s*(.*)\s*;|$1|; + $self->{itemsList}[$self->{itemIdx}]->{authors} = $1; + $self->{isAuthor} = 1; + } + if ($self->{isPublication} == 1) + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublication} = 0; + } + } + + } + else + { + if (($self->{isTitle} == 1) && ($self->{isAuthor} == 1) && ($self->{isTranslator} == 1)) + { + my ($ti, $au, $tr, $bubu); + $origtext =~ m|.*(\.){1}$|; + $bubu = $1; + if ($bubu eq '.') + { + $origtext =~ s|(.*)\.$|$1|; + } + $origtext =~ m/([^\/]+)(\/\s+[^;]*)?(;\s*.*(tł|przeł|przekł)\..*)?$/; +# $origtext =~ m|([^/]+)(/\s[^;]+)?(;.*)?$|; + $ti = $1; + $au = $2; + $tr = $3; + $ti =~ s|([^:]*):?.*$|$1|; + $ti =~ s|\s*$||; + $self->{curInfo}->{title} = $ti; + $self->{isTitle} = 0; + $au =~ s|^(.*)il\..*$|$1|; + $au =~ s/(\/|tekst)//g; + $au =~ s| i |,|g; + $au =~ s|, |,|g; + $au =~ s|^\s*||; + $au =~ s|\s*$||; + $self->{curInfo}->{authors} = $au; + $self->{isAuthor} = 0; + $tr =~ s|[\[\]]||g; + $tr =~ s/;\s*.*(tł|przeł|przekł)\. (\[.*\] )?(.*)\.?$/$3/; + $tr =~ s|(z \w+\. )?(.*)|$2|; + $self->{curInfo}->{translator} = $tr; + $self->{isTranslator} = 0; + } + if (($self->{isPublisher} == 1) && ($self->{isPublication} == 1)) + { + my ($pu, $pd); + $origtext =~ m|(.*)\s:\s(.*),\s(.*)|; + $pu = $2; + $pd = $3; + $pu =~ s|([^"]*")?([^"]*)"?|$2|; + $pu =~ s|[\[\]]||g; + $self->{curInfo}->{publisher} = $pu; + $self->{isPublisher} = 0; + $pd =~ s|[^\d]||g; + $self->{curInfo}->{publication} = $pd; + $self->{isPublication} = 0; + } + if ($self->{isISBN} eq '1') + { + my ($pom1, $pom2); + if ($self->{searchField} eq 'isbn') + { + $pom1 = $self->{searchISBN}; + $pom2 = $origtext; + $pom2 =~ s|[^\dX]||g; + $pom1 =~ s|-||g; + $pom2 =~ s|-||g; + if ($pom1 eq $pom2) + { + $self->{curInfo}->{isbn} = $self->{searchISBN}; + } + else + { + $self->{curInfo}->{isbn} = $origtext; + } + } + else + { + $self->{curInfo}->{isbn} = $origtext; + } + $self->{isISBN} = 0; + } + if ($self->{isPage} eq '1') + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + if ($self->{isEdition} eq '1') + { + $origtext =~ s|[\[\]]||g; + $origtext =~ s|(.*)\.{1}$|$1|; + $self->{curInfo}->{edition} = $origtext; + $self->{isEdition} = 0; + } + if ($self->{isSerie} eq '1') + { + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 1, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isEditor_Publication_Format_Lang} = 0 ; + $self->{isAnalyse} = 0; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isEdition} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublication} = 0; + $self->{isSerie} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + $self->{isTranslator} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{insideResults} = 0; + + if ($self->{parsingList}) + { + $html =~ s|<b>(.*?)</b>|$1|gms; + $html =~ s|<td class="browseEntryData">\s*<a(.*)/a>\s*(.*)\s*|<td class="browseEntryData">$2<a$1/a>|gm; + } + else + { + $html =~ s|</?strong>||gi; + $html =~ s|</?br>||gi; + $html =~ s|</?i>||gi; + + $html =~ s|<td.*>ISBN</td>\s*<.*>\s*(\w*)</td>|<div id="wrgISBN">$1</div>|m; + $html =~ s|<td.*>Seria</td>\s*<.*>\s*(.*)\s*</td>|<div id="wrgSERIA">$1</div>|m; + $html =~ s|<div id="wrgSERIA">(.*)( / [^<]*)</div>|<div id="wrgSERIA">$1</div>|; + $html =~ s|<td.*>Opis fiz</td>\s*<.*>\s*(\d*)\D.*</td>|<div id="wrgPAGES">$1</div>|m; + $html =~ s|<td.*>TytuŁ</td>\s*<.*>\s*(.*)\s*</td>|<div id="wrgTIAUTR">$1</div>|m; + $html =~ s|<td.*>Adres wyd</td>\s*<.*>\s*(.*)\s*</td>|<div id="wrgPUBPD">$1</div>|m; + $html =~ s|<td.*>Wydanie</td>\s*<.*>\s*(.*)\s*</td>|<div id="wrgEDITI">$1</div>|m; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + my $bubu; + if ($self->{searchField} eq 'isbn') + { + $bubu = "i"; + $self->{searchISBN} = $word; + } + else + { + $bubu = "t"; + $self->{searchISBN} = ""; + } + return "http://alpha.bn.org.pl/search*pol/".$bubu."?SEARCH=".$word; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return 'http://alpha.bn.org.pl' + } + + sub getName + { + return "Biblioteka Narodowa"; + } + + sub getCharset + { + my $self = shift; + #return "UTF-8"; + return "ISO-8859-2"; + } + + sub getAuthor + { + return 'WG'; + } + + sub getLang + { + return 'PL'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCBokkilden.pm b/lib/gcstar/GCPlugins/GCbooks/GCBokkilden.pm new file mode 100644 index 0000000..d32c1f4 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCBokkilden.pm @@ -0,0 +1,295 @@ +package GCPlugins::GCbooks::GCBokkilden; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginBokkilden; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingEnded}) + { + if ($self->{itemIdx} < 0) + { + $self->{itemIdx} = 0; + $self->{itemsList}[0]->{url} = $self->{loadedUrl}; + } + return; + } + + if ($self->{parsingList}) + { + if (($tagname eq 'h1') && ($attr->{class} eq 'normal')) + { + $self->{isBook} = 1; + $self->{itemIdx}++; + } + elsif ($self->{isBook}) + { + if ($tagname eq 'a') + { + if (($attr->{href} =~ /produkt\.do/) + && (!$self->{itemsList}[$self->{itemIdx}]->{title})) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{isTitle} = 1; + } + elsif ($attr->{href} =~ /sok\.do\?enkeltsok/) + { + $self->{isAuthor} = 1; + } + } + } + } + else + { + if ($tagname eq 'table') + { + $self->{isBook} = 1 + if ($attr->{class} eq 'bokfaktatabell'); + } + elsif ($tagname eq 'div') + { + $self->{isCover} = 1 if ($attr->{class} eq 'img-ilus') + && ($attr->{style} eq 'width:120px;'); + $self->{is} = 'description' if $attr->{id} eq 'omtale-hidden'; + } + elsif ($tagname eq 'img') + { + if ($self->{isCover}) + { + $self->{curInfo}->{cover} = 'http://www.bokkilden.no/SamboWeb/' + . $attr->{src}; + $self->{isCover} = 0; + } + } + elsif ($tagname eq 'h1') + { + $self->{h1Style} = $attr->{style}; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{inside}->{title}) + { + $self->{parsingEnded} = 1 if $origtext !~ /S..?k p..?/; + } + + elsif ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ($self->{isAuthor}) + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ',' + if $self->{itemsList}[$self->{itemIdx}]->{authors}; + $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext; + $self->{isAuthor} = 0; + } + elsif ($self->{isBook}) + { + if ($origtext =~ / \| /) + { + $origtext =~ /(\d{4})/; + $self->{itemsList}[$self->{itemIdx}]->{publication} = $1; + $self->{isBook} = 0; + } + } + } + else + { + if ($self->{is}) + { + $origtext =~ s/^\s*//; + $self->{curInfo}->{$self->{is}} = $origtext; + if ($self->{is} eq 'genre') + { + $self->{curInfo}->{genre} =~ s/;\s*/,/g; + } + elsif ($self->{is} eq 'pages') + { + $self->{curInfo}->{pages} =~ s/[^0-9]//g; + } + $self->{is} = ''; + } + elsif ($self->{inside}->{title}) + { + $self->{tmpTitle} = $origtext; + } + elsif ($self->{inside}->{h1}) + { + if (!$self->{curInfo}->{title}) + { + if ($self->{h1Style}) + { + $self->{tmpTitle} =~ /\s*(.*?) av (.*?) »/gim; + $self->{curInfo}->{title} = $1; + $self->{curInfo}->{authors} = $2; + } + else + { + $self->{curInfo}->{title} = $origtext; + } + } + } + elsif ($self->{inside}->{author}) + { + $self->{curInfo}->{authors} .= ',' + if $self->{curInfo}->{authors}; + $self->{curInfo}->{authors} .= $origtext; + } + if ($self->{inside}->{translator}) + { + $self->{curInfo}->{translator} .= ', ' + if $self->{curInfo}->{translator}; + $self->{curInfo}->{translator} .= $origtext; + } + elsif (($self->{isBook}) && $self->{inside}->{b}) + { + $self->{is} = + ($origtext eq 'Utgitt: ') ? 'publication' + : ($origtext eq 'Forlag: ') ? 'publisher' + : ($origtext eq 'Innb.: ') ? 'format' + : ($origtext =~ /Spr..?k:/) ? 'language' + : ($origtext eq 'Sider: ') ? 'pages' + : ($origtext eq 'ISBN: ') ? 'isbn' + : ($origtext eq 'Utgave: ') ? 'edition' + : ($origtext eq 'Genre:') ? 'genre' + : ''; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 0, + edition => 0, + }; + + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{isBook} = 0; + if ($self->{parsingList}) + { + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + } + else + { + $self->{is} = ''; + $self->{isCover} = 0; + $html =~ s|<a href="emneliste\.do\?emnekode=[.0-9]*">(.*?)</a>|$1|gim; + $html =~ s|<a href="sok\.do\?enkeltsok=[^"]*">([^<]*)</a>|<author>$1</author>|gim; + #" + $html =~ s|<a href="sok\.do\?.*?rolle1=Oversetter">(.*?)</a>|<translator>$1</translator>|gim; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.bokkilden.no/SamboWeb/sok.do?rom=MP&enkeltsok=$word&innsnevre=ja"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return "http://www.bokkilden.no/SamboWeb/$url" + if $url !~ m|http://www.bokkilden.no/|; + return $url; + } + + sub getCharset + { + my $self = shift; + + return 'UTF-8'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + + sub getName + { + return 'Bokkilden'; + } + + sub getLang + { + return 'NO'; + } + + sub getAuthor + { + return 'Tian'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCBol.pm b/lib/gcstar/GCPlugins/GCbooks/GCBol.pm new file mode 100644 index 0000000..6e882b1 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCBol.pm @@ -0,0 +1,485 @@ +package GCPlugins::GCbooks::GCBol; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginBol; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'td') + { + if($self->{bookStep} == 0) + { + $self->{bookStep} = 1 ; + } + } + elsif ($tagname eq 'img') + { + if($self->{bookStep} == 1) + { + $self->{bookStep} = 2; + } + } + elsif ($tagname eq 'a') + { + if($self->{bookStep}==2) + { + $self->{url} = "http://www.bol.it" . $attr->{href}; + $self->{bookStep} = 3 ; + $self->{isTitle} = 1 ; + } + elsif($self->{bookStep}==3) + { + $self->{bookStep} = 4 ; + $self->{isAuthor} = 1 ; + } + } + elsif ($tagname eq 'br') + { + if($self->{bookStep}==4) + { + $self->{isBook} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{url}; + $self->{itemsList}[$self->{itemIdx}]->{title} = $self->{title}; + $self->{itemsList}[$self->{itemIdx}]->{authors} = $self->{author}; + + $self->{isFormat} = 1 ; + #$self->{bookStep} = 0 ; + } + } + elsif ( + (($tagname ne 'h3') || ( ($tagname eq 'h3') && ($self->{bookStep} != 2) )) && + (($tagname ne 'p') || ( ($tagname eq 'p') && ($self->{bookStep} != 3) )) && + (($tagname ne 'span') || ( ($tagname eq 'span') && ($self->{bookStep} != 4) )) + ) + { + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isDescription} = 0; + $self->{isTranslator} = 0; + $self->{isCover} = 0; + $self->{isGenre} = 0; + $self->{isFormat} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + $self->{isISBN} = 0; + + $self->{isBook} = 0; + $self->{bookStep} = 0; + } + } + else + { + if (($tagname eq 'img') && ($attr->{class} eq 'cover')) + { + $self->{curInfo}->{cover} = "http://www.bol.it" . $attr->{src}; + $self->{bookStep} = 1; + } + elsif (($tagname eq 'h1') && ($self->{bookStep} == 1)) + { + $self->{curInfo}->{title} = "http://www.bol.it" . $attr->{src}; + $self->{isTitle} = 1; + $self->{bookStep} = 2; + } + elsif ($self->{bookStep} == 2) + { + if (($tagname eq 'a') && ($self->{areAuthors} == 0)) + { + $self->{isAuthor} = 1; + $self->{areAuthors} = 1; + } + if ($self->{areAuthors} == 1) + { + if ($tagname eq 'a') + { + $self->{isAuthor} = 1; + } + else + { + $self->{bookStep} = 3; + $self->{areAuthors} = 0; + } + } + } + elsif ($self->{bookStep} == 4) + { + if (($tagname eq 'a') && ($self->{areGenres} == 0)) + { + $self->{isGenre} = 1; + $self->{areGenres} = 1; + } + if ($self->{areGenres} == 1) + { + if ($tagname eq 'a') + { + $self->{isGenre} = 1; + } + else + { + $self->{bookStep} = 5; + $self->{areGenres} = 0; + } + } + } + elsif ($self->{bookStep} == 6) + { + if (($tagname eq 'a') && ($self->{areTranslators} == 0)) + { + $self->{isTranslator} = 1; + $self->{areTranslators} = 1; + } + if ($self->{areTranslators} == 1) + { + if ($tagname eq 'a') + { + $self->{isTranslator} = 1; + } + else + { + $self->{bookStep} = 6; + $self->{areTranslators} = 0; + } + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ($self->{isAuthor}) + { + $self->{author} = $origtext; + $self->{isAuthor} = 0; + } + elsif ($self->{isFormat}) + { + my @array = split(/\|/,$origtext); + + $self->{itemsList}[$self->{itemIdx}]->{format} = $array[0]; + $self->{itemsList}[$self->{itemIdx}]->{format} =~ s/^\s+//; + $self->{isFormat} = 0; + $self->{isPublisher} = 1; + } + elsif ($self->{isPublisher}) + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublisher} = 0; + $self->{isPublication} = 1; + } + elsif ($self->{isPublication}) + { + my @array = split(/\|/,$origtext); + + $self->{itemsList}[$self->{itemIdx}]->{publication} = $array[1]; + $self->{itemsList}[$self->{itemIdx}]->{publication} =~ s/^\s+//; + $self->{isPublication} = 0; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + + if ($origtext eq 'I contenuti') + { + $self->{isDescription} = 1; + } + elsif ($origtext eq 'Formato:') + { + $self->{isFormat} = 1; + } + elsif (substr($origtext,0,7) eq 'Pagine:') + { + $self->{isPage} = 1; + } + elsif ($origtext eq 'Lingua:') + { + $self->{isLanguage} = 1; + } + elsif ($origtext eq 'Editore:') + { + $self->{isPublisher} = 1; + } + elsif ($origtext eq 'Anno di pubblicazione') + { + $self->{isPublication} = 1; + } + elsif ($origtext eq 'Codice EAN:') + { + $self->{isISBN} = 1; + } + elsif (($origtext eq 'Traduttore:') || ($origtext eq 'Traduttori:')) + { + $self->{bookStep} = 6; + } + elsif ($origtext eq 'Generi:') + { + $self->{bookStep} = 4; + } + elsif ($origtext ne '') + { + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + if ($self->{curInfo}->{authors} eq '') + { + $self->{curInfo}->{authors} = $origtext; + } + else + { + $self->{curInfo}->{authors} .= ", " . $origtext; + } + $self->{isAuthor} = 0 ; + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} = $origtext; + $self->{isDescription} = 0 ; + } + elsif ($self->{isFormat}) + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0; + } + elsif ($self->{isPage}) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + elsif ($self->{isLanguage}) + { + $self->{curInfo}->{language} = $origtext; + $self->{isLanguage} = 0; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0; + } + elsif ($self->{isPublication}) + { + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0; + } + elsif ($self->{isISBN}) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + elsif ($self->{isGenre}) + { + if ($self->{curInfo}->{genre} eq '') + { + $self->{curInfo}->{genre} = $origtext; + } + else + { + $self->{curInfo}->{genre} .= ", " . $origtext; + } + $self->{isGenre} = 0 ; + } + elsif ($self->{isTranslator}) + { + if ($self->{curInfo}->{translator} eq '') + { + $self->{curInfo}->{translator} = $origtext; + } + else + { + $self->{curInfo}->{translator} .= ", " . $origtext; + } + $self->{isTranslator} = 0 ; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 1, + edition => 1, + serie => 0, + }; + + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isDescription} = 0; + $self->{isTranslator} = 0; + $self->{isCover} = 0; + $self->{isGenre} = 0; + $self->{isFormat} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + $self->{isISBN} = 0; + $self->{areAuthors} = 0; + $self->{areGenres} = 0; + $self->{areTranslators} = 0; + + $self->{isBook} = 0; + $self->{bookStep} = 0; + $self->{title} = 0; + $self->{author} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|<br><i>|<i>|gi; + #$html =~ s/[\n\r\t]//g; + } + else + { + my $found = index($html,'<a name="commenti">'); + if ( $found >= 0 ) + { + $html = substr($html, 0, $found); + } + + $html =~ s|<u>||gi; + $html =~ s|<li>|\n* |gi; + #$html =~ s|<br>|\n|gi; + #$html =~ s|<br />|\n|gi; + #$html =~ s|<b>||gi; + #$html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<p>|\n|gi; + $html =~ s|</p>||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + return ('http://www.bol.it/libri/ricerca', ["crc" => "100", "crcselect" => "100", "g" => "$word", "tpr" => "10"] ); + } + else + { + $word =~ s/\+/ /g; + return ('http://www.bol.it/libri/ricerca', ["crc" => "100", "crcselect" => "100", "g" => "$word", "tpr" => "10"] ); + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url; + return 'http://www.bol.it'; + } + + sub getName + { + return "Bol"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-15"; + } + + sub getAuthor + { + return 'TPF, UnclePetros'; + } + + sub getLang + { + return 'IT'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCBuscape.pm b/lib/gcstar/GCPlugins/GCbooks/GCBuscape.pm new file mode 100644 index 0000000..ad46177 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCBuscape.pm @@ -0,0 +1,479 @@ +package GCPlugins::GCbooks::GCBuscape; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginBuscape; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'a') && ($attr->{class} eq 'xu')) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{isTitle} = 1 ; + } + elsif (( $attr->{class} eq 'xj') && ($self->{itemIdx} eq '-1') && ($self->{searchField} eq 'isbn')) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl}; + } + elsif (($tagname eq 'meta') && ($self->{itemIdx} eq '-1') && ($self->{searchField} eq 'isbn')) + { + my $html = $self->loadPage($self->{loadedUrl}, 0, 1); + my $found = index($html,"URL="); + if ( $found >= 0 ) + { + $html = substr($html, $found +length('URL='),length($html)- $found -length('URL=')); + $html = substr($html, 0, index($html,"\"")); + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $html; + } + } + } + else + { + if (( $attr->{class} eq 'xj') && ($self->{isAnalyse} eq 0)) + { + $self->{isAnalyse} = 1 ; + } + elsif (($tagname eq 'img') && ($attr->{onerror} ne '') && ($self->{curInfo}->{title} eq '')) + { + # Attention il y a 2 formats differents pour ce site + if ($attr->{alt} ne '') + { + $self->{curInfo}->{title} = $attr->{alt}; + } + if ($attr->{title} ne '') + { + my @array = split(/\(/,reverse($attr->{title})); + my @array2; + if ($array[1] ne '') + { + $self->{curInfo}->{isbn} = reverse($array[0]); + $self->{curInfo}->{isbn} =~ s/\)//; + # J enleve le premier champs qui est sense etre le code ISBN + shift(@array); + my $element1; + my $element2; + foreach $element1 (@array) + { + if ($element2 eq '') + { + $element2 = $element1; + } + else + { + $element2 .= "(" .$element1; + } + } + @array2 = split(/-/,$element2); + } + else + { + @array2 = split(/-/,$array[0]); + } + + if ($array2[1] ne '') + { + # J enleve le dernier champs qui est l auteur + shift(@array2); + } + my $element; + foreach $element (@array2) + { + if ($self->{curInfo}->{title} eq '') + { + $self->{curInfo}->{title} = $element; + } + else + { + $self->{curInfo}->{title} .= "-" .$element; + } + } + $self->{curInfo}->{title} = reverse($self->{curInfo}->{title}); + } + + $self->{curInfo}->{cover} = $attr->{src}; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + my @array = split(/\(/,reverse($origtext)); + my @array2; + if ($array[1] ne '') + { + # J enleve le premier champs qui est sense etre le code ISBN + shift(@array); + my $element1; + my $element2; + foreach $element1 (@array) + { + if ($element2 eq '') + { + $element2 = $element1; + } + else + { + $element2 .= "(" .$element1; + } + } + @array2 = split(/-/,$element2); + } + else + { + @array2 = split(/-/,$array[0]); + } + + if ($array2[1] ne '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = reverse($array2[0]); + my $found = index($self->{itemsList}[$self->{itemIdx}]->{authors}," Cod:"); + if ( $found >= 0 ) + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = substr($self->{itemsList}[$self->{itemIdx}]->{authors}, 0, $found); + } + # Enleve les blancs en debut de chaine + $self->{itemsList}[$self->{itemIdx}]->{authors} =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $self->{itemsList}[$self->{itemIdx}]->{authors} =~ s/\s+$//g; + shift(@array2); + } + my $element; + foreach $element (@array2) + { + if ($self->{itemsList}[$self->{itemIdx}]->{title} eq '') + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $element; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{title} .= "-" .$element; + } + } + $self->{itemsList}[$self->{itemIdx}]->{title} = reverse($self->{itemsList}[$self->{itemIdx}]->{title}); + $self->{isTitle} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if ($self->{isAnalyse} eq 1) + { + if ($origtext =~ m/Autor/i) + { + $self->{isAuthor} = 1 ; + $self->{isAnalyse} = 2 ; + } + elsif ($origtext =~ m/Editora/i) + { + $self->{isPublisher} = 1 ; + $self->{isAnalyse} = 2 ; + } + elsif ($origtext =~ m/Ano de edi/i) + { + $self->{isPublication} = 1 ; + $self->{isAnalyse} = 2 ; + } + elsif ($origtext =~ m/N.* de p.*ginas/i) + { + $self->{isPage} = 1 ; + $self->{isAnalyse} = 2 ; + } + elsif ($origtext =~ m/ISBN/i) + { + $self->{isISBN} = 1 ; + $self->{isAnalyse} = 2 ; + } + elsif ($origtext =~ m/Encaderna/i) + { + $self->{isFormat} = 1 ; + $self->{isAnalyse} = 2 ; + } + else + { + $self->{isAnalyse} = 0 ; + } + + } + elsif ($self->{isAuthor} eq 1) + { + $self->{isAuthor} = 2 ; + } + elsif ($self->{isAuthor} eq 2) + { + if ($origtext =~ m/N.*o Cadastrado/i) + { + } + else + { + my @nom_prenom = split(/,/,$origtext); + # Enleve les blancs en debut de chaine + $nom_prenom[0] =~ s/^\s//; + $nom_prenom[1] =~ s/^\s//; + # Enleve les blancs en fin de chaine + $nom_prenom[0] =~ s/\s+$//; + $nom_prenom[1] =~ s/\s+$//; + if ($self->{curInfo}->{authors} eq '') + { + if ($nom_prenom[1] ne '') + { + $self->{curInfo}->{authors} = $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{curInfo}->{authors} = $nom_prenom[0]; + } + } + else + { + if ($nom_prenom[1] ne '') + { + $self->{curInfo}->{authors} .= ", " . $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{curInfo}->{authors} .= ", " . $nom_prenom[0]; + } + } + } + + $self->{isAuthor} = 0 ; + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isISBN} eq 1) + { + $self->{isISBN} = 2 ; + } + elsif ($self->{isISBN} eq 2) + { + $self->{curInfo}->{isbn} = $origtext if ( !($origtext =~ m/N.*o Cadastrado/i) && !($origtext =~ m/n.*o dispon.*vel/i)); + $self->{isISBN} = 0 ; + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isPublisher} eq 1) + { + $self->{isPublisher} = 2 ; + } + elsif ($self->{isPublisher} eq 2) + { + $self->{curInfo}->{publisher} = $origtext if ( !($origtext =~ m/N.*o Cadastrado/i) && !($origtext =~ m/n.*o dispon.*vel/i)); + $self->{isPublisher} = 0 ; + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isPublication} eq 1) + { + $self->{isPublication} = 2 ; + } + elsif ($self->{isPublication} eq 2) + { + $self->{curInfo}->{publication} = $origtext if ( !($origtext =~ m/N.*o Cadastrado/i) && !($origtext =~ m/n.*o dispon.*vel/i)); + $self->{isPublication} = 0 ; + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isPage} eq 1) + { + $self->{isPage} = 2 ; + } + elsif ($self->{isPage} eq 2) + { + $self->{curInfo}->{pages} = $origtext if ( !($origtext =~ m/N.*o Cadastrado/i) && !($origtext =~ m/n.*o dispon.*vel/i)); + $self->{isPage} = 0 ; + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isFormat} eq 1) + { + $self->{isFormat} = 2 ; + } + elsif ($self->{isFormat} eq 2) + { + $self->{curInfo}->{format} = $origtext if ( !($origtext =~ m/N.*o Cadastrado/i) && !($origtext =~ m/n.*o dispon.*vel/i)); + $self->{isFormat} = 0 ; + $self->{isAnalyse} = 0 ; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 0, + serie => 0, + }; + + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + $self->{isPage} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|<br><i>|<i>|gi; + } + else + { + my $found = index($html,'<a name="commenti">'); + if ( $found >= 0 ) + { + $html = substr($html, 0, $found); + } + + $html =~ s|<u>||gi; + $html =~ s|<li>|\n* |gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<p>|\n|gi; + $html =~ s|</p>||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + + $html =~ s|<!--||gi; + $html =~ s|<strong>||gi; + $html =~ s|</strong>|<tpfnesertarien>TPFNESERTARIEN</tpfnesertarien><tpfnesertarien></tpfnesertarien>|gi; + + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + return "http://compare.buscape.com.br/proc_unico?id=3482&Carac1000000000=" .$word; + } + else + { + return "http://compare.buscape.com.br/proc_unico?id=3482&Carac1000000000=" .$word; + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + # Attention il y a 2 formats differents pour ce site + if ($url =~ m/counter_livro.asp/i) + { + my $html = $self->loadPage($url, 0, 1); + my $found = index($html,"URL="); + if ( $found >= 0 ) + { + $html = substr($html, $found +length('URL='),length($html)- $found -length('URL=')); + $html = substr($html, 0, index($html,"\"")); + } + return $html; + } + + return $url; + } + + sub getName + { + return "Buscape"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-1"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'PT'; + } + + sub getSearchFieldsArray + { + return ['ISBN', 'title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCCasadelibro.pm b/lib/gcstar/GCPlugins/GCbooks/GCCasadelibro.pm new file mode 100644 index 0000000..5d8f7e4 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCCasadelibro.pm @@ -0,0 +1,420 @@ +package GCPlugins::GCbooks::GCbooksCasadelibro; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginCasadelibro; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'p') && ($attr->{class} eq 'tit')) + { + $self->{isBook} = 1 ; + $self->{isUrl} = 1 ; + } + elsif (($tagname eq 'a') && ($self->{isBook}) && ($self->{isUrl})) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.casadelibro.com" . $attr->{href}; + $self->{isUrl} = 0 ; + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'p') && ($attr->{class} eq 'liz')) + { + $self->{isBook} = 0 ; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'autor') && ($self->{isBook})) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{class} =~ m/autor/i) && ($self->{isBook})) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'p') && ($attr->{class} eq 'puestoEditorial') && ($self->{isBook})) + { + $self->{isEditionPublication} = 1 ; + } + } + else + { + if ($self->{isLanguage} eq 1) + { + $self->{isLanguage} = 2 ; + } + elsif ($self->{isEdition} eq 1) + { + $self->{isEdition} = 2 ; + } + elsif ($self->{isFormat} eq 1) + { + $self->{isFormat} = 2 ; + } + elsif ($self->{isSerie} eq 1) + { + $self->{isSerie} = 2 ; + } + elsif ($self->{isPublication} eq 1) + { + $self->{isPublication} = 2 ; + } + elsif ($self->{isISBN} eq 1) + { + $self->{isISBN} = 2 ; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'tit_ficha')) + { + $self->{isTitle} = 1 ; + } + elsif ($tagname eq 'tpfnoauthortpf') + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{class} eq 'autor2')) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'edicion_ficha')) + { + $self->{isPublisher} = 1 ; + } + elsif ($tagname eq 'tpfstarttagtpf') + { + $self->{isAnalyse} = 1 ; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'txt_resumen')) + { + $self->{isDescription} = 1 ; + } + elsif (($tagname eq 'img') && ($attr->{id} eq 'imgFicha') && ($attr->{src} ne '/l/grande.gif')) + { + $self->{curInfo}->{cover} = "http://www.casadelibro.com" . $attr->{src} ; + } + + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + my @nom_prenom = split(/,/,$origtext); + # Enleve les blancs en debut de chaine + $nom_prenom[0] =~ s/^\s//; + $nom_prenom[1] =~ s/^\s//; + # Enleve les blancs en fin de chaine + $nom_prenom[0] =~ s/\s+$//; + $nom_prenom[1] =~ s/\s+$//; + if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') + { + if ($nom_prenom[1] ne '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $nom_prenom[0]; + } + } + else + { + if ($nom_prenom[1] ne '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ", " . $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ", " . $nom_prenom[0]; + } + } + + $self->{isAuthor} = 0 ; + } + elsif ($self->{isEditionPublication}) + { + $_= $origtext; + if (/(.*),\s([0-9][0-9][0-9][0-9]$)/) + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $1; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + } + + $_= $origtext; + if (/(.*)\s([0-9][0-9][0-9][0-9]$)/) + { + $self->{itemsList}[$self->{itemIdx}]->{publication} = $2; + } + + $self->{isEditionPublication} = 0 ; + } + } + else + { + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isLanguage} eq 2) + { + my @array = split(/:/,$origtext); + $self->{curInfo}->{language} = $array[1]; + $self->{curInfo}->{language} =~ s/^\s//; + $self->{curInfo}->{language} =~ s/\s+$//; + $self->{isLanguage} = 0 ; + } + elsif ($self->{isEdition} eq 2) + { + my @array = split(/:/,$origtext); + $self->{curInfo}->{edition} = $array[1]; + $self->{curInfo}->{edition} =~ s/^\s//; + $self->{curInfo}->{edition} =~ s/\s+$//; + $self->{isEdition} = 0 ; + } + elsif ($self->{isFormat} eq 2) + { + my @array = split(/:/,$origtext); + $self->{curInfo}->{format} = $array[1]; + $self->{curInfo}->{format} =~ s/^\s//; + $self->{curInfo}->{format} =~ s/\s+$//; + $self->{isFormat} = 0 ; + } + elsif ($self->{isSerie} eq 2) + { + my @array = split(/:/,$origtext); + $self->{curInfo}->{serie} = $array[1]; + $self->{curInfo}->{serie} =~ s/^\s//; + $self->{curInfo}->{serie} =~ s/\s+$//; + $self->{isSerie} = 0 ; + } + elsif ($self->{isPublication} eq 2) + { + $self->{curInfo}->{publication} = $origtext; + $self->{curInfo}->{publication} =~ s/^\s//; + $self->{curInfo}->{publication} =~ s/\s+$//; + $self->{isPublication} = 0 ; + } + elsif ($self->{isAnalyse}) + { + $self->{isISBN} = 1 if ($origtext =~ m/ISBN/i); + $self->{isLanguage} = 1 if ($origtext =~ m/Lengua/i); + $self->{isEdition} = 1 if ($origtext =~ m/^n(.*)\sEdici/i); + $self->{isFormat} = 1 if ($origtext =~ m/Encuadernaci/i); + $self->{isSerie} = 1 if ($origtext =~ m/Colecci/i); + $self->{isPublication} = 1 if ($origtext =~ m/^A(.*)o de Edici/i); + + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isAuthor}) + { + my @nom_prenom = split(/,/,$origtext); + # Enleve les blancs en debut de chaine + $nom_prenom[0] =~ s/^\s//; + $nom_prenom[1] =~ s/^\s//; + # Enleve les blancs en fin de chaine + $nom_prenom[0] =~ s/\s+$//; + $nom_prenom[1] =~ s/\s+$//; + if ($nom_prenom[1] ne '') + { + $self->{curInfo}->{authors} .= $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{curInfo}->{authors} .= $nom_prenom[0]; + } + $self->{curInfo}->{authors} .= ","; + + $self->{isAuthor} = 0 ; + } + elsif ($self->{isISBN} eq 2) + { + my @array = split(/:/,$origtext); + $self->{curInfo}->{isbn} = $array[1]; + $self->{curInfo}->{isbn} =~ s/^\s//; + $self->{isISBN} = 0 ; + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} = $origtext; + $self->{curInfo}->{description} =~ s/\t//g; + $self->{curInfo}->{description} =~ s/^\s//; + $self->{curInfo}->{description} =~ s/\s+$//; + $self->{isDescription} = 0 ; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 0, + edition => 1, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isEditionPublication} = 0 ; + $self->{isAnalyse} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isLanguage} = 0; + $self->{isEdition} = 0; + $self->{isFormat} = 0; + $self->{isSerie} = 0; + $self->{isPublication} = 0; + $self->{isISBN} = 0; + $self->{isDescription} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|'| |gi; + } + else + { + my $found = index($html,"<div class=\"azul3\">"); + if ( $found >= 0 ) + { + $html = substr($html, 0, $found); + } + + $html =~ s|<li>|\n* |gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|"tit_ficha"><strong>|"tit_ficha">|gi; + $html =~ s|de </span>|<TPFNOAUTHORTPF>|gi; + $html =~ s|<strong>|<TPFSTARTTAGTPF>|gi; + $html =~ s|</strong>|<TPFSTOPTAGTPF>|gi; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + + return "http://www.casadellibro.com/busquedas/resultados2?titbus=&autorbus=&isbnbus=" . $word. "&editbus=&idibus=0&encbus=0&sl1=-1"; +# return "http://www.casadellibro.com/busquedas/quickResults/0,,1-i-" . $word. ",00.html?tBusq=t&tValueForSearch=" .$word. "&cFo=true&rOd=&NotQueryAgain=false"; + } + else + { + my $word2 = $word; + $word2 =~ s|\+|%20|gi; + return "http://www.casadellibro.com/busquedas/quickResults2/0,," . $word2. ",00.html?Buscar=" .$word; + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url; + return 'http://www.casadellibro.com/'; + } + + sub getName + { + return "Casadelibro"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'ES'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCChapitre.pm b/lib/gcstar/GCPlugins/GCbooks/GCChapitre.pm new file mode 100644 index 0000000..242871c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCChapitre.pm @@ -0,0 +1,430 @@ +package GCPlugins::GCbooks::GCChapitre;
+
+###################################################
+#
+# Copyright 2005-2006 Tian
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCbooks::GCbooksCommon;
+
+{
+ package GCPlugins::GCbooks::GCPluginChapitre;
+
+ use base qw(GCPlugins::GCbooks::GCbooksPluginsBase);
+ use URI::Escape;
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingList})
+ {
+
+ if (($tagname eq 'a') && ( $attr->{id} =~ m/ctl00_PHCenter_SearchResult1_rpResult_ctl.._searchResultTitle_hlProduct/))
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.chapitre.com" . $attr->{href};
+ $self->{isTitle} = 1 ;
+ }
+ elsif ($tagname eq 'tpfauthortpf')
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif ($tagname eq 'strong')
+ {
+ $self->{isAnalyse} = 1 ;
+ }
+ }
+ else
+ {
+ if ($self->{isAuthor} eq 2)
+ {
+ if ($tagname ne 'a')
+ {
+ $self->{isAuthor} = 0 ;
+ }
+ }
+ elsif (($tagname eq 'div') && ($attr->{class} eq 'clear'))
+ {
+ $self->{isDescription} = 0 ;
+ }
+ elsif ($tagname eq 'td')
+ {
+ if ($self->{isPublisher} eq 1)
+ {
+ $self->{isPublisher} = 2 ;
+ }
+ elsif ($self->{isPublication} eq 1)
+ {
+ $self->{isPublication} = 2 ;
+ }
+ elsif ($self->{isISBN} eq 1)
+ {
+ $self->{isISBN} = 2 ;
+ }
+ elsif ($self->{isLanguage} eq 1)
+ {
+ $self->{isLanguage} = 2 ;
+ }
+ elsif ($self->{isCollection} eq 1)
+ {
+ $self->{isCollection} = 2 ;
+ }
+ elsif ($self->{isGenre} eq 1)
+ {
+ $self->{isGenre} = 2 ;
+ }
+ }
+ elsif (($tagname eq 'a') && ( $attr->{id} eq 'ctl00_PHCenter_ProductFile1_ProductTitle1_linkTitleProduct'))
+ {
+ $self->{isTitle} = 1 ;
+ }
+ elsif (($tagname eq 'div') && ( $attr->{id} eq 'ctl00_PHCenter_ProductFile1_ProductTitle1_pnlAuthor'))
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'h2') && ( $self->{isAuthor} eq 1))
+ {
+ $self->{isAuthor} = 2 ;
+ }
+ elsif (($tagname eq 'div') && ( $attr->{id} eq 'ctl00_PHCenter_ProductFile1_ProductTitle1_pnlTranslator'))
+ {
+ $self->{isTranslator} = 1 ;
+ }
+ elsif (($tagname eq 'tpftraducteurtpf') && ( $self->{isTranslator} eq 1))
+ {
+ $self->{isTranslator} = 2 ;
+ }
+ elsif (($tagname eq 'img') && ( $attr->{id} eq 'ctl00_PHCenter_ProductFile1_ProductPicture1_imgProduct') && ( index($attr->{src},"http://images.chapitre.com/indispo") eq -1 ))
+ {
+ $self->{curInfo}->{cover} = $attr->{src};
+ }
+ elsif (($tagname eq 'div') && ($attr->{class} eq 'presentation'))
+ {
+ $self->{isDescription} = 1 ;
+ }
+ elsif (($tagname eq 'tpfdescriptiontpf') && ($self->{isDescription} eq 1))
+ {
+ $self->{isDescription} = 2 ;
+ }
+ elsif ($tagname eq 'th')
+ {
+ $self->{isAnalyse} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ( $attr->{href} =~ m|/CHAPITRE/fr/search/Default.aspx\?collection=|i))
+ {
+ $self->{isCollection} = 2 ;
+ }
+ elsif (($tagname eq 'a') && ( $attr->{href} =~ m|/CHAPITRE/fr/search/Default.aspx\?themeId=|i))
+ {
+ $self->{isGenre} = 2 ;
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{inside}->{$tagname}--;
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isTitle})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext;
+ $self->{isTitle} = 0 ;
+ }
+ elsif ($self->{isAuthor})
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//g;
+ if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '')
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext;
+ }
+ else
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{authors} .= ', ';
+ $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext;
+ }
+ $self->{isAuthor} = 0 ;
+ }
+ elsif ($self->{isAnalyse})
+ {
+ $self->{isPublisher} = 1 if ($origtext =~ m/Editeur :/i);
+ $self->{isSerie} = 1 if ($origtext =~ m/Collection :/i);
+ $self->{isPublication} = 1 if ($origtext =~ m/Date :/i);
+
+ $self->{isAnalyse} = 0 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ my @array = split(/\n/,$origtext);
+ $self->{itemsList}[$self->{itemIdx}]->{edition} = $array[0];
+ $self->{isPublisher} = 0 ;
+ }
+ elsif ($self->{isPublication})
+ {
+ my @array = split(/\n/,$origtext);
+ $self->{itemsList}[$self->{itemIdx}]->{publication} = $array[0];
+ $self->{isPublication} = 0 ;
+ }
+ elsif ($self->{isSerie})
+ {
+ my @array = split(/\n/,$origtext);
+ $self->{itemsList}[$self->{itemIdx}]->{serie} = $array[0];
+ $self->{isSerie} = 0 ;
+ }
+ }
+ else
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//g;
+ if ($self->{isTitle})
+ {
+ $self->{curInfo}->{title} = $origtext;
+ $self->{isTitle} = 0 ;
+ }
+ elsif ($self->{isAuthor} eq 2)
+ {
+ if ( $origtext ne '')
+ {
+ my @array = split(/;/,$origtext);
+ my $element;
+ foreach $element (@array)
+ {
+ my @nom_prenom = split(/,/,$element);
+ # Enleve les blancs en debut de chaine
+ $nom_prenom[0] =~ s/^\s//;
+ $nom_prenom[1] =~ s/^\s//;
+ # Enleve les blancs en fin de chaine
+ $nom_prenom[0] =~ s/\s+$//;
+ $nom_prenom[1] =~ s/\s+$//;
+ if ($self->{curInfo}->{authors} eq '')
+ {
+ if ($nom_prenom[1] ne '')
+ {
+ $self->{curInfo}->{authors} = $nom_prenom[1] ." " . $nom_prenom[0];
+ }
+ else
+ {
+ $self->{curInfo}->{authors} = $nom_prenom[0];
+ }
+ }
+ else
+ {
+ if ($nom_prenom[1] ne '')
+ {
+ $self->{curInfo}->{authors} .= ", " . $nom_prenom[1] ." " . $nom_prenom[0];
+ }
+ else
+ {
+ $self->{curInfo}->{authors} .= ", " . $nom_prenom[0];
+ }
+ }
+ }
+ $self->{isAuthor} = 0 ;
+ }
+ }
+ elsif ($self->{isTranslator} eq 2)
+ {
+ $self->{curInfo}->{translator} = $origtext;
+ $self->{isTranslator} = 0 ;
+ }
+ elsif ($self->{isPublisher} eq 2)
+ {
+ $self->{curInfo}->{publisher} = $origtext;
+ $self->{isPublisher} = 0 ;
+ }
+ elsif ($self->{isDescription} eq 2)
+ {
+ $self->{curInfo}->{description} = $origtext;
+ $self->{isDescription} = 0 ;
+ }
+ elsif ($self->{isPublication} eq 2)
+ {
+ $self->{curInfo}->{publication} = $origtext;
+ $self->{isPublication} = 0 ;
+ }
+ elsif ($self->{isAnalyse})
+ {
+ $self->{isPublication} = 1 if ($origtext =~ m/parution/i);
+ $self->{isISBN} = 1 if ($origtext =~ m/EAN13/i);
+ $self->{isPublisher} = 1 if ($origtext =~ m/Editeur/i);
+ $self->{isLanguage} = 1 if ($origtext =~ m/Langue/i);
+ $self->{isCollection} = 1 if ($origtext =~ m/Collection/i);
+ $self->{isGenre} = 1 if ($origtext =~ m/Genre/i);
+
+ $self->{isAnalyse} = 0 ;
+ }
+ elsif ($self->{isISBN} eq 2)
+ {
+ $self->{curInfo}->{isbn} = $origtext;
+ $self->{isISBN} = 0 ;
+ }
+ elsif ($self->{isLanguage} eq 2)
+ {
+ $self->{curInfo}->{language} = $origtext;
+ $self->{isLanguage} = 0 ;
+ }
+ elsif ($self->{isCollection} eq 2)
+ {
+ $self->{curInfo}->{serie} = $origtext;
+ $self->{isCollection} = 0 ;
+ }
+ elsif ($self->{isGenre} eq 2)
+ {
+ $origtext =~ s|/|,|gi;
+ $self->{curInfo}->{genre} = $origtext;
+ $self->{isGenre} = 0 ;
+ }
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ title => 1,
+ authors => 1,
+ publication => 1,
+ format => 0,
+ edition => 1,
+ serie => 1,
+ };
+
+ $self->{isTitle} = 0;
+ $self->{isAuthor} = 0;
+ $self->{isPublisher} = 0;
+ $self->{isSerie} = 0;
+ $self->{isPublication} = 0;
+ $self->{isAnalyse} = 0;
+ $self->{isDescription} = 0;
+ $self->{isISBN} = 0;
+ $self->{isLanguage} = 0;
+ $self->{isCollection} = 0;
+ $self->{isTranslator} = 0;
+ $self->{isGenre} = 0;
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ if ($self->{parsingList})
+ {
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+ $html =~ s|</a>,|</a>,<tpfauthortpf>|gi;
+ }
+ else
+ {
+
+ $html =~ s|</strong>|</strong><tpftraducteurtpf>|gi;
+ $html =~ s|</h3>|</h3><tpfdescriptiontpf>|gi;
+
+ $html =~ s|<u>||gi;
+ $html =~ s|<li>|\n* |gi;
+ $html =~ s|<br>|\n|gi;
+ $html =~ s|<br />|\n|gi;
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+ $html =~ s|<i>||gi;
+ $html =~ s|</i>||gi;
+ $html =~ s|<p>|\n|gi;
+ $html =~ s|</p>||gi;
+ $html =~ s|\x{92}|'|g;
+ $html =~ s|’|'|gi;
+ $html =~ s|•|*|gi;
+ $html =~ s|…|...|gi;
+ $html =~ s|\x{85}|...|gi;
+ $html =~ s|\x{8C}|OE|gi;
+ $html =~ s|\x{9C}|oe|gi;
+
+ }
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ $word =~ s/\+/ /g;
+ return ('http://www.chapitre.com/CHAPITRE/fr/search/Default.aspx?search=true', ["quicksearch" => "$word"] );
+
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return $url;
+ }
+
+ sub getName
+ {
+ return "Chapitre.com";
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+ return "ISO-8859-15";
+ }
+
+ sub getAuthor
+ {
+ return 'TPF';
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['ISBN', 'title'];
+ }
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCbooks/GCDoubanbook.pm b/lib/gcstar/GCPlugins/GCbooks/GCDoubanbook.pm new file mode 100644 index 0000000..927e099 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCDoubanbook.pm @@ -0,0 +1,238 @@ +package GCPlugins::GCbooks::GCDoubanbook; + +################################################### +# +# Copyright 2005-2010 Bai Wensimi +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginDoubanbook; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use XML::Simple; + use Encode; + use LWP::Simple qw($ua); + + sub parse + { + my ($self, $page) = @_; + return if (($page =~ /^bad isbn/) & ($page =~ /^The/)); + my $xml; + my $xs = XML::Simple->new; + + if ($self->{parsingList}) + { + if ($page =~ /feed>$/) + { + $xml = $xs->XMLin( + $page, + forceArray=>['author'], + KeyAttr => [''] + ); + foreach my $ItemBook ( @{$xml->{'entry'}}){ + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{'url'} = $ItemBook->{'id'}; + $self->{itemsList}[ $self->{itemIdx} ]->{'title'} = $ItemBook->{'title'}; + foreach my $tmp_author (@{$ItemBook->{'author'}}){ + {($self->{itemsList}[ $self->{itemIdx} ]->{'authors'} ne '' ) and $self->{itemsList}[ $self->{itemIdx} ]->{'authors'}.=',';} + $self->{itemsList}[ $self->{itemIdx} ]->{'authors'}.=$tmp_author->{'name'}; + } + } + } + else + { + $xml = $xs->XMLin( + $page, + forceArray=>['author'], + KeyAttr => [''] + ); + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{'url'} = $xml->{'id'}; + $self->{itemsList}[ $self->{itemIdx} ]->{'title'} = $xml->{'title'}; + foreach my $tmp_author (@{$xml->{'author'}}){ + $self->{itemsList}[ $self->{itemIdx} ]->{'authors'}.=$tmp_author->{'name'}; + $self->{itemsList}[ $self->{itemIdx} ]->{'authors'}.=','; + } + } + } + else + { + $xml =$xs->XMLin($page, + ForceArray => [ 'author' ], + KeyAttr => {'db:tag'=>'name','link'=>'rel'}); + foreach my $tmp_author (@{$xml->{'author'}}){ + $self->{curInfo}->{authors}.=$tmp_author->{'name'}; + $self->{curInfo}->{authors}.=','; + } + $self->{curInfo}->{title}=$xml->{'title'}; + $self->{curInfo}->{description}=$xml->{'summary'}; + $self->{curInfo}->{web}=$xml->{'link'}->{'alternate'}->{'href'}; + foreach my $check(@{$xml->{'db:attribute'}}){ + my $db_attr=$check->{'name'}; + SWITCH: { + $db_attr eq 'publisher' and $self->{curInfo}->{publisher}=$check->{'content'} ,last; + $db_attr eq 'pubdate' and $self->{curInfo}->{publication}=$check->{'content'} ,last; + $db_attr eq 'pages' and $self->{curInfo}->{pages}=$check->{'content'} ,last; + $db_attr eq 'isbn13' and $self->{curInfo}->{isbn}=$check->{'content'} ,last; + $db_attr eq 'binding' and $self->{curInfo}->{format}=$check->{'content'} ,last; + $db_attr eq 'translator' and { ($self->{curInfo}->{translator} ne '' ) and $self->{curInfo}->{translator}.=',' }, $self->{curInfo}->{translator}.=$check->{'content'} ,last; + $db_attr eq 'author-intro' and $self->{curInfo}->{description}.="\n\n".$check->{'content'},last; + ; + } + } + + my $tmp_image=$xml->{'link'}->{'image'}->{'href'}; + $tmp_image =~ s/spic/lpic/; + $self->{curInfo}->{cover}=$tmp_image; + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + }; + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + if ($self->{searchField} eq 'isbn') + { + return "http://api.douban.com/book/subject/isbn/" .$word; + } + else + { + return "http://api.douban.com/book/subjects?q=" .$word; + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url; + } + + sub changeUrl + { + my ($self, $url) = @_; + # Make sure the url is for the api, not the main movie page + return $self->getItemUrl($url); + } + + sub getNumberPasses + { + return 1; + } + + sub getName + { + return "豆瓣"; + } + + + sub testURL + { + my ($self, $url) = @_; + $url =~ /[\?&]lid=([0-9]+)*/; + my $id = $1; + return ($id == $self->siteLanguageCode()); + } + + sub getReturnedFields + { + my $self = shift; + + $self->{hasField} = { + title => 1, + authors => 1, + }; + } + + sub getAuthor + { + return 'BW'; + } + + sub getLang + { + return 'ZH'; + } + + sub isPreferred + { + return 1; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "UTF-8"; + } + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + + sub getCharset + { + my $self = shift; + + return "UTF-8"; + } + + sub decodeEntitiesWanted + { + return 0; + } + + sub siteLanguage + { + my $self = shift; + + return 'ZH'; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCFnac.pm b/lib/gcstar/GCPlugins/GCbooks/GCFnac.pm new file mode 100644 index 0000000..9c0e804 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCFnac.pm @@ -0,0 +1,462 @@ +package GCPlugins::GCbooks::GCFnac; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginFnac; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + return if $self->{isFound}; + if (($tagname eq 'h3') && ($attr->{class} eq 'hStyle1')) + { + $self->{isFound} = 1 ; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl}; + return; + } + elsif ($tagname eq 'td') + { + if (($attr->{width} eq '254') && (!exists $attr->{bgcolor})) + { + $self->{isBook} = 1 ; + $self->{isUrl} = 1 ; + $self->{isColonne} = 0 ; + } + else + { + $self->{isColonne} ++ ; + $self->{isTitle} = 2 ; + } + } + elsif (($attr->{class} eq 'subTitre') && (!exists $attr->{color}) && ($self->{isTitle} eq '0')) + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'tpfpublicationtpf') && ($self->{isBook})) + { + $self->{isPublication} = 1 ; + } + elsif (($tagname eq 'a') && ($self->{isBook})) + { + if ($attr->{href} =~ m|/advanced/book.do\?category=book|i) + { + $self->{isBook} = 0 ; + $self->{isUrl} = 0 ; + } + elsif ($self->{isUrl}) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{isTitle} = 1 ; + $self->{isUrl} = 0 ; + } + elsif ($self->{isColonne} eq 2) + { + $self->{isAuthor} = 1 ; + } + elsif ($self->{isColonne} eq 4) + { + $self->{isPublisher} = 1 ; + } + } + } + else + { + if ($tagname eq 'tr') + { + $self->{isAuthor} = 0 ; + $self->{isISBN} = 0 ; + $self->{isPublisher} = 0 ; + $self->{isFormat} = 0 ; + $self->{isSerie} = 0 ; + $self->{isPublication} = 0 ; + $self->{isPage} = 0 ; + $self->{isTranslator} = 0 ; + } + elsif ($self->{isAuthor} eq 1) + { + $self->{isAuthor} = 2 ; + } + elsif ($self->{isISBN} eq 1) + { + $self->{isISBN} = 2 ; + } + elsif ($self->{isPublisher} eq 1) + { + $self->{isPublisher} = 2 ; + } + elsif ($self->{isFormat} eq 1) + { + $self->{isFormat} = 2 ; + } + elsif ($self->{isSerie} eq 1) + { + $self->{isSerie} = 2 ; + } + elsif ($self->{isPublication} eq 1) + { + $self->{isPublication} = 2 ; + } + elsif ($self->{isPage} eq 1) + { + $self->{isPage} = 2 ; + } + elsif ($self->{isTranslator} eq 1) + { + $self->{isTranslator} = 2 ; + } + elsif (($tagname eq 'h3') && ($attr->{class} eq 'hStyle1')) + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'strong') && (($self->{isTitle}) || $attr->{class} eq 'titre dispeblock')) + { + $self->{isTitle} = 2 ; + } + elsif (($tagname eq 'th') && ($attr->{scope} eq 'row')) + { + $self->{isAnalyse} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{class} eq 'expandimg') && ($self->{bigPics})) + { + $self->{curInfo}->{cover} = $attr->{href} ; + } + elsif (($attr->{class} eq 'activeimg') && ((!$self->{bigPics}) || ($self->{curInfo}->{cover} eq ''))) + { + $self->{isCover} = 1 ; + } + elsif (($tagname eq 'img') && ($self->{isCover})) + { + $self->{curInfo}->{cover} = $attr->{src} ; + $self->{isCover} = 0 ; + } + elsif (($tagname eq 'div') && ($attr->{class} =~ /^lireLaSuite/)) + { + $self->{isDescription} = 1 ; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{isFound} = 0 ; + $self->{inside}->{$tagname}--; + $self->{isDescription} = 0 if $tagname eq 'div'; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle} eq 1) + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if (($self->{itemsList}[$self->{itemIdx}]->{title} eq '') && ($origtext ne '')) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + } + elsif ($origtext ne '') + { + $self->{itemsList}[$self->{itemIdx}]->{title} .= ' - '; + $self->{itemsList}[$self->{itemIdx}]->{title} .= $origtext; + } + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if (($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') && ($origtext ne '')) + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext; + } + elsif ($origtext ne '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ', '; + $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext; + } + $self->{isAuthor} = 0 ; + } + elsif ($self->{isPublisher}) + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isPublication}) + { + $self->{itemsList}[$self->{itemIdx}]->{publication} = $origtext; + $self->{isPublication} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if ($self->{isTitle} eq '2') + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAnalyse}) + { + $self->{isAuthor} = 1 if ($origtext =~ m/Auteur/i); + $self->{isISBN} = 1 if ($origtext =~ m/ISBN/i); + $self->{isPublisher} = 1 if ($origtext =~ m/Editeur/i); + $self->{isFormat} = 1 if ($origtext =~ m/Format/i); + $self->{isSerie} = 1 if ($origtext =~ m/Collection/i); + $self->{isPublication} = 1 if ($origtext =~ m/Date de parution/i); + $self->{isPage} = 1 if ($origtext =~ m/pages/i); + $self->{isTranslator} = 1 if ($origtext =~ m/Traduction/i); + + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isAuthor} eq 2) + { + # Enleve les virgules + $origtext =~ s/,//; + if ($origtext ne '') + { + $self->{curInfo}->{authors} .= $origtext; + $self->{curInfo}->{authors} .= ","; + } + } + elsif ($self->{isISBN} eq 2) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0 ; + } + elsif ($self->{isPublisher} eq 2) + { + if ($origtext ne '') + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + } + } + elsif ($self->{isFormat} eq 2) + { + if ($origtext ne '') + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0 ; + } + } + elsif ($self->{isSerie} eq 2) + { + if ($origtext ne '') + { + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0 ; + } + } + elsif ($self->{isPublication} eq 2) + { + if ($origtext ne '') + { + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0 ; + } + } + elsif (($self->{isPage} eq 2)) + { + if ($origtext ne '') + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0 ; + } + } + elsif ($self->{isTranslator}) + { + if ($origtext ne '') + { + $self->{curInfo}->{translator} = $origtext; + $self->{isTranslator} = 0 ; + } + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} .= $origtext; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 0, + edition => 1, + serie => 0, + }; + + $self->{isFound} = 0; + $self->{isColonne} = 0; + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isPublication} = 0; + $self->{isFormat} = 0; + $self->{isSerie} = 0; + $self->{isPage} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + $self->{isTranslator} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|</a><br>|</a><tpfpublicationtpf>|gmi; + } + else + { + # Le descriptif pouvant contenir des balises html je le repere maintenant + my $found = index($html,"<strong>Mot de l'"); + if ( $found >= 0 ) + { + my $html2 = substr($html, $found +length('<strong>Mot de l\''),length($html)- $found -length('<strong>Mot de l\'')); + my $found2 = index($html2,"<h4 "); + my $html3 = $html2; + if ( $found2 >= 0 ) + { + $html3 = substr($html2, $found2 +length('<h4 '),length($html2)- $found2 -length('<h4 ')); + $html2 = substr($html2, 0, $found2); + } + + $found2 = index($html2,"</strong>"); + if ( $found2 >= 0 ) + { + $html2 = substr($html2, $found2 +length('</strong>'),length($html2)- $found2 -length('</strong>')); + } + + $html2 =~ s|<li>|\n* |gi; + $html2 =~ s|<br>|\n|gi; + $html2 =~ s|<br />|\n|gi; + $html2 =~ s|<b>||gi; + $html2 =~ s|</b>||gi; + $html2 =~ s|<i>||gi; + $html2 =~ s|</i>||gi; + $html2 =~ s|<p>|\n|gi; + $html2 =~ s|</p>||gi; + $html2 =~ s|</h4>||gi; + $html2 =~ s|\x{92}|'|g; + $html2 =~ s|’|'|gi; + $html2 =~ s|•|*|gi; + $html2 =~ s|…|...|gi; + $html2 =~ s|\x{85}|...|gi; + $html2 =~ s|\x{8C}|OE|gi; + $html2 =~ s|\x{9C}|oe|gi; + + } + + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www3.fnac.com/search/quick.do?filter=-3&text=". $word ."&category=book"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url; + return 'http://www.fnac.com/'; + } + + sub getName + { + return "Fnac (FR)"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-15"; +# return "UTF-8"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'FR'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCFnacPT.pm b/lib/gcstar/GCPlugins/GCbooks/GCFnacPT.pm new file mode 100644 index 0000000..eb119a7 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCFnacPT.pm @@ -0,0 +1,390 @@ +package GCPlugins::GCbooks::GCFnacPT; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginFnacPT; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'a') && ($attr->{class} eq 'txtpretoarial11')) + { + $self->{isBook} = 1 ; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.fnac.pt" . $attr->{href}; + } + elsif (($tagname eq 'strong') && ($self->{isBook})) + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'span') && ($self->{isBook})) + { + $self->{isAuthor} = 1 ; + $self->{isBook} = 0 ; + } + } + else + { + if (($tagname eq 'span') && ($attr->{class} eq 'txtpretoarial11')) + { + $self->{isAnalyse} = 1 ; + } + elsif ($self->{isISBN} eq 1) + { + $self->{isISBN} = 2 ; + } + elsif ($self->{isPublisher} eq 1) + { + $self->{isPublisher} = 2 ; + } + elsif ($self->{isFormat} eq 1) + { + $self->{isFormat} = 2 ; + } + elsif ($self->{isSerie} eq 1) + { + $self->{isSerie} = 2 ; + } + elsif ($self->{isPublication} eq 1) + { + $self->{isPublication} = 2 ; + } + elsif ($self->{isPage} eq 1) + { + $self->{isPage} = 2 ; + } + elsif (($tagname eq 'a') && ($attr->{class} eq 'txt_arial14')) + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'strong') && ($self->{isTitle})) + { + $self->{isTitle} = 2 ; + } + elsif (($tagname eq 'a') && ($attr->{class} eq 'txt_arial10') && ( index($attr->{href},"param=autor") >= 0)) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'td') && ($attr->{class} eq 'tabfundo_branco')) + { + $self->{isAnalyse} = 1 ; + } + elsif (($tagname eq 'img') && ($attr->{src} =~ m/Images\/catalogo\/livros/i)) + { + $self->{curInfo}->{cover} = "http://www.fnac.pt" . $attr->{src}; + } + elsif (($tagname eq 'td') && ($attr->{class} eq 'txtpretoarial11') && ($attr->{colspan} eq '2')) + { + $self->{isDescription} = 1 ; + } + elsif ($tagname eq 'object') + { + $self->{isDescription} = 1 ; + } + elsif ($tagname eq 'param') + { + $self->{isDescription} = 1 ; + } + elsif ($tagname eq 'embed') + { + $self->{isDescription} = 1 ; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{isFound} = 0 ; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + my @array = split(/&/,$origtext); + my $element; + foreach $element (@array) + { + my @nom_prenom = split(/,/,$element); + if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ", " . $nom_prenom[1] ." " . $nom_prenom[0]; + } + } + + $self->{isAuthor} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if ($self->{isTitle} eq '1') + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor} eq 1) + { + if ($origtext ne '') + { + my @array = split(/&/,$origtext); + my $element; + foreach $element (@array) + { + my @nom_prenom = split(/,/,$element); + # Enleve les blancs en debut de chaine + $nom_prenom[0] =~ s/^\s+//; + $nom_prenom[1] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $nom_prenom[0] =~ s/\s+$//; + $nom_prenom[1] =~ s/\s+$//; + if ($self->{curInfo}->{authors} eq '') + { + if ($nom_prenom[1] eq '') + { + $self->{curInfo}->{authors} = $nom_prenom[0]; + } + else + { + $self->{curInfo}->{authors} = $nom_prenom[1] ." " . $nom_prenom[0]; + } + } + else + { + if ($nom_prenom[1] eq '') + { + $self->{curInfo}->{authors} = $nom_prenom[0]; + } + else + { + $self->{curInfo}->{authors} .= ", " . $nom_prenom[1] ." " . $nom_prenom[0]; + } + } + } + + } + $self->{isAuthor} = 0 ; + } + elsif ($self->{isAnalyse}) + { + $self->{isISBN} = 1 if ($origtext =~ m/ISBN/i); + $self->{isPublisher} = 1 if ($origtext =~ m/Editora/i); + $self->{isFormat} = 1 if ($origtext =~ m/Encaderna/i); + $self->{isSerie} = 1 if ($origtext =~ m/Colec/i); + $self->{isPublication} = 1 if ($origtext =~ m/Ano/i); + $self->{isPage} = 1 if ($origtext =~ m/pages/i); + + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isISBN} eq 2) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0 ; + } + elsif ($self->{isPublisher} eq 2) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isFormat} eq 2) + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0 ; + } + elsif ($self->{isSerie} eq 2) + { + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0 ; + } + elsif ($self->{isPublication} eq 2) + { + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0 ; + } + elsif (($self->{isPage} eq 2)) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0 ; + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} .= $origtext; + $self->{isDescription} = 0 ; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 0, + }; + + $self->{isFound} = 0; + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isPublication} = 0; + $self->{isFormat} = 0; + $self->{isSerie} = 0; + $self->{isPage} = 0; + $self->{isDescription} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + my $found = index($html,'"listagem de resultados"'); + if ( $found >= 0 ) + { + $html = substr($html, $found +length('"listagem de resultados"'),length($html)- $found -length('"listagem de resultados"')); + } + + $found = index($html,'"tabela de estrutura do cart'); + if ( $found >= 0 ) + { + $html = substr($html, 0, $found); + } + + } + else + { + $html =~ s|<u>||gi; + $html =~ s|<li>|\n* |gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<p>|\n|gi; + $html =~ s|</p>||gi; + $html =~ s|</h4>||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + $html =~ s|<center>||gi; + $html =~ s|</center>||gi; + $html =~ s|</embed>||gi; + $html =~ s|</object>||gi; + + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.fnac.pt/pt/Search/Search.aspx?categoryN=&cIndex=&catalog=livros&str=". $word; + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url; + return 'http://www.fnac.pt/'; + } + + sub getName + { + return "Fnac (PT)"; + } + + sub getCharset + { + my $self = shift; + return "UTF-8"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'PT'; + } + + sub getSearchFieldsArray + { + return ['title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCISBNdb.pm b/lib/gcstar/GCPlugins/GCbooks/GCISBNdb.pm new file mode 100644 index 0000000..2b7256c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCISBNdb.pm @@ -0,0 +1,370 @@ +package GCPlugins::GCbooks::GCISBNdb; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginISBNdb; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'div') && ($attr->{class} eq 'bookInfo') && ($self->{searchField} ne 'isbn')) + { + $self->{isBook} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"/d/book/") >= 0) && ($self->{isBook})) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://isbndb.com" . $attr->{href}; + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"/d/person/") >= 0) && ($self->{isBook})) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"/d/publisher/") >= 0) && ($self->{isBook})) + { + $self->{isPublisher} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{onclick},"isbndbTrackBuy") >= 0) && ($self->{itemIdx} eq '-1')) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl} ; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'inactive')) + { + $self->{isBook} = 0 ; + } + } + else + { + if ($tagname eq 'title') + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"/d/person/") >= 0)) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"/d/publisher/") >= 0)) + { + $self->{isPublisher} = 1 ; + } + elsif (($tagname eq 'a') && ( index($attr->{href},"/c/Library_Shelves/Dewey") >= 0)) + { + $self->{isGenre} = 1 ; + } + elsif ($tagname eq 'h2') + { + $self->{isAnalyse} = 1 ; + } + elsif (($tagname eq 'iframe') && ($self->{curInfo}->{cover} eq '')) + { + my $html = $self->loadPage( $attr->{src}, 0, 1 ); + my $found = index($html,"<img src=\""); + if ( $found >= 0 ) + { + $html = substr($html, $found +length('<img src="'),length($html)- $found -length('<img src="')); + + my @array = split(/"/,$html); + $self->{curInfo}->{cover} = $array[0]; + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ', '; + $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext; + } + $self->{isAuthor} = 0 ; + } + elsif ($self->{isPublisher}) + { + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublisher} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + $self->{curInfo}->{authors} .= $origtext; + $self->{curInfo}->{authors} .= ","; + $self->{isAuthor} = 0 ; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isAnalyse}) + { + $self->{isFormat} = 1 if ($origtext =~ m/Book Details:/i); + $self->{isDescription} = 1 if ($origtext =~ m/Notes:/i); + $self->{isDescription} = 1 if ($origtext =~ m/Summary:/i); + + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isFormat}) + { + my @array = split(/\n/,$origtext); + my @array2; + my @array3; + my $element; + my $element2; + foreach $element (@array) + { + @array2 = split(/:/,$element); + # Enleve les blancs en debut de chaine + $array2[1] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $array2[1] =~ s/\s+$//g; + if ($array2[0] =~ m/Language/i) + { + $self->{curInfo}->{language} = $array2[1]; + } + elsif ($array2[0] =~ m/Physical Description/i) + { + @array3 = split(/;/,$array2[1]); + foreach $element2 (@array3) + { + # Enleve les blancs en debut de chaine + $element2 =~ s/^\s+//; + $_= $element2; + if (/(^[0-9]+)(\s[p])(.*)/) + { + $self->{curInfo}->{pages} = $1; + } + elsif (/(.*)(\s)([0-9]+)(\s[p])(.*)/) + { + $self->{curInfo}->{pages} = $3; + } + } + } + elsif ($array2[0] =~ m/Edition Info/i) + { + @array3 = split(/;/,$array2[1]); + $self->{curInfo}->{format} = $array3[0]; + $_= $array3[1]; + if (/(.*)([0-9][0-9][0-9][0-9])(.*)/) + { + $self->{curInfo}->{publication} = $array3[1]; + # Enleve les blancs en debut de chaine + $self->{curInfo}->{publication} =~ s/^\s+//; + } + } + } + $self->{isFormat} = 0 ; + } + elsif ($self->{isDescription}) + { + $origtext =~ s/\n\n/\n/g; + $self->{curInfo}->{description} = $origtext; + $self->{isDescription} = 0 ; + } + elsif ($self->{isGenre}) + { + my @array = split(/--/,$origtext); + + $self->{curInfo}->{genre} = $array[1]; + # Enleve les blancs en debut de chaine + $self->{curInfo}->{genre} =~ s/^\s+//; + $self->{isGenre} = 0 ; + } + elsif (($origtext =~ m/ISBN:/i) && ($self->{curInfo}->{isbn} eq '')) + { + my @array = split(/:/,$origtext); + + # Enleve les blancs en debut de chaine + $array[1] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $array[1] =~ s/\s+$//g; + my @array2 = split(/ /,$array[1]); + + $self->{curInfo}->{isbn} = $array2[0]; + # Enleve les blancs en debut de chaine + $self->{curInfo}->{isbn} =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $self->{curInfo}->{isbn} =~ s/\s+$//g; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 1, + serie => 0, + }; + + $self->{isBook} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isDescription} = 0; + $self->{isGenre} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + } + else + { + $html =~ s|<u>||gi; + $html =~ s|<li>|\n* |gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<p>|\n|gi; + $html =~ s|</p>||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + $html =~ s|…|...|gi; + $html =~ s|\x{8C}|OE|gi; + $html =~ s|\x{9C}|oe|gi; + + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + return "http://isbndb.com/search-all.html?kw=" .$word; + } + else + { + return "http://isbndb.com/search-title.html?kw=" .$word ."&isn="; + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return "ISBNdb"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-15"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'EN'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCInternetBokHandeln.pm b/lib/gcstar/GCPlugins/GCbooks/GCInternetBokHandeln.pm new file mode 100644 index 0000000..3b553e9 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCInternetBokHandeln.pm @@ -0,0 +1,464 @@ +package GCPlugins::GCbooks::GCbooksInternetBokHandeln; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginInternetBokHandeln; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'span') && ($attr->{class} eq 'title1')) + { + $self->{isFound} = 1 ; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl}; + } + elsif (($tagname eq 'td') && ($attr->{rowspan} eq '4') && ($self->{isBook} eq '0') && ($self->{isFound} eq 0)) + { + # En fait la sequence est un peu tordue. Je cherche le deuxieme passage dans la sequence + # rowspan/a + $self->{isBook} = 1 ; + $self->{isUrl} = 1 ; + } + elsif (($tagname eq 'img') && ($self->{isBook} eq '1') && ($self->{isUrl})) + { + $self->{isBook} = 2 ; + } + elsif (($tagname eq 'a') && ($self->{isBook} eq '2') && ($self->{isUrl})) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.internetbokhandeln.se" . $attr->{href}; + $self->{isUrl} = 0 ; + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'author') && ($self->{isFound} eq 0)) + { + $self->{isAuthor} = 1 ; + $self->{isBook} = 0 ; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'shaded') && ($self->{isFound} eq 0)) + { + $self->{isEditor_Publication_Format_Lang} = 1 ; + $self->{isBook} = 0 ; + } + } + else + { + if ($self->{isAuthor} eq 1) + { + $self->{isAuthor} = 2 ; + } + elsif ($self->{isPublisher} eq 1) + { + $self->{isPublisher} = 2 ; + } + elsif ($self->{isISBN} eq 1) + { + $self->{isISBN} = 2 ; + } + elsif ($self->{isFormat} eq 1) + { + $self->{isFormat} = 2 ; + } + elsif ($self->{isEdition} eq 1) + { + $self->{isEdition} = 2 ; + } + elsif ($self->{isPage} eq 1) + { + $self->{isPage} = 2 ; + } + elsif ($self->{isLanguage} eq 1) + { + $self->{isLanguage} = 2 ; + } + elsif ($self->{isPublication} eq 1) + { + $self->{isPublication} = 2 ; + } + elsif ($self->{isSerie} eq 1) + { + $self->{isSerie} = 2 ; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'title1')) + { + $self->{isTitle} = 1 ; + # On initialise la variable ( sinon d une fiche sur l autre est n est pas reinitialisee ) + $self->{isDescription} = 0; + } + elsif (($tagname eq 'span') && ($attr->{class} eq 'font5')) + { + $self->{isAnalyse} = 1 ; + } + elsif (($tagname eq 'p') && ($self->{curInfo}->{isbn} ne '') && ($self->{curInfo}->{description} eq '') && ($self->{isDescription} ne 2)) + { + $self->{isDescription} = 1 ; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'largebook')) + { + # Pour etre sur s il n y a pas de commentaire de ne pas prendre n importe quoi + $self->{isDescription} = 2 ; + } + elsif (($tagname eq 'td') && ($attr->{class} eq 'pricecolumn')) + { + $self->{isCover} = 1 ; + } + elsif (($tagname eq 'img') && ($self->{isCover} eq 1)) + { + # le but est de determiner s il y a une couverture ou non, et s il y en a une, on recuperera + # la version grand format qui est bien plus tard + if ($attr->{onclick} eq 'return showBig();') + { + $self->{isCover} = 2 ; + } + else + { + if ($attr->{src} eq '/i/dummy.gif') + { + # Il n y a pas d image + $self->{isCover} = 3 ; + } + else + { + $self->{curInfo}->{cover} = $attr->{src} ; + $self->{isCover} = 3 ; + } + } + } + elsif (($tagname eq 'img') && ($attr->{onclick} eq 'return hideBig();') && ($self->{isCover} eq 2)) + { + $self->{curInfo}->{cover} = $attr->{src} ; + $self->{isCover} = 3 ; + } + + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{isFound} = 0; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + my @array = split(/;/,$origtext); + my $element; + foreach $element (@array) + { + my @nom_prenom = split(/,/,$element); + # Enleve les blancs en debut de chaine + $nom_prenom[0] =~ s/^\s//; + $nom_prenom[1] =~ s/^\s//; + # Enleve les blancs en fin de chaine + $nom_prenom[0] =~ s/\s$//; + $nom_prenom[1] =~ s/\s$//; + if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') + { + if ($nom_prenom[1] ne '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $nom_prenom[0]; + } + } + else + { + if ($nom_prenom[1] ne '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ", " . $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ", " . $nom_prenom[0]; + } + } + } + + $self->{isAuthor} = 0 ; + } + elsif ($self->{isEditor_Publication_Format_Lang}) + { + my @Editor_Publication_Format_Lang = split(/\|/,$origtext); + + $self->{itemsList}[$self->{itemIdx}]->{publication} = $Editor_Publication_Format_Lang[1]; + $self->{itemsList}[$self->{itemIdx}]->{publication} =~ s/^\s+//; + $self->{itemsList}[$self->{itemIdx}]->{publication} =~ s/\s$+//; + + $self->{itemsList}[$self->{itemIdx}]->{format} = $Editor_Publication_Format_Lang[2]; + $self->{itemsList}[$self->{itemIdx}]->{format} =~ s/^\s+//; + $self->{itemsList}[$self->{itemIdx}]->{format} =~ s/\s$+//; + + $self->{isEditor_Publication_Format_Lang} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAnalyse}) + { + $self->{isAuthor} = 1 if ($origtext =~ m/F.*rfattare/i); + $self->{isISBN} = 1 if ($origtext =~ m/ISBN/i); + $self->{isPublisher} = 1 if ($origtext =~ m/F.*rlag/i); + $self->{isFormat} = 1 if ($origtext =~ m/Band/i); + $self->{isEdition} = 1 if ($origtext =~ m/Upplagenr/i); + $self->{isPage} = 1 if ($origtext =~ m/Sidor/i); + $self->{isLanguage} = 1 if ($origtext =~ m/Spr.*k/i); + $self->{isPublication} = 1 if ($origtext =~ m/Utgivning/i); + $self->{isSerie} = 1 if ($origtext =~ m/Serie/i); + + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isAuthor} eq 2) + { + my @array = split(/;/,$origtext); + my $element; + foreach $element (@array) + { + my @nom_prenom = split(/,/,$element); + # Enleve les blancs en debut de chaine + $nom_prenom[0] =~ s/^\s//; + $nom_prenom[1] =~ s/^\s//; + # Enleve les blancs en fin de chaine + $nom_prenom[0] =~ s/\s$//; + $nom_prenom[1] =~ s/\s$//; + if ($nom_prenom[1] ne '') + { + $self->{curInfo}->{authors} .= $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{curInfo}->{authors} .= $nom_prenom[0]; + } + $self->{curInfo}->{authors} .= ","; + } + + $self->{isAuthor} = 0 ; + } + elsif ($self->{isISBN} eq 2) + { + # Il y a 2 ISBN sur le site, seul le premier m interesse + if ($self->{curInfo}->{isbn} eq '') + { + $self->{curInfo}->{isbn} = $origtext; + } + $self->{isISBN} = 0 ; + } + elsif ($self->{isPublisher} eq 2) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isFormat} eq 2) + { + my @array = split(/\n/,$origtext); + + $self->{curInfo}->{format} = $array[0]; + $self->{isFormat} = 0 ; + } + elsif ($self->{isEdition} eq 2) + { + # There is some trouble on the site with this field : it is not accurrate. For example for ISBN 9113014528 + # this field is set to 7000. So for instance this field isn't taken. +# $self->{curInfo}->{edition} = $origtext; + $self->{isEdition} = 0 ; + } + elsif ($self->{isPage} eq 2) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0 ; + } + elsif ($self->{isLanguage} eq 2) + { + $self->{curInfo}->{language} = $origtext; + $self->{isLanguage} = 0 ; + } + elsif ($self->{isPublication} eq 2) + { + $self->{curInfo}->{publication} = $origtext; + $self->{curInfo}->{publication} =~ s|([0-9]*) ([A-Za-z]*) ([0-9]*)|$1.'/'.$self->{monthNumber}->{$2}.'/'.$3|e; + $self->{curInfo}->{publication} =~ s|([A-Za-z]*) ([0-9]*)|$self->{monthNumber}->{$1}.'/'.$2|e; + $self->{isPublication} = 0 ; + } + elsif ($self->{isSerie} eq 2) + { + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0 ; + } + elsif ($self->{isDescription} eq 1) + { + $self->{curInfo}->{description} = $origtext; + $self->{isDescription} = 2 ; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{monthNumber} = { + Januari => '01', + Februari => '02', + Mars => '03', + April => '04', + Maj => '05', + Juni => '06', + Juli => '07', + Augusti => '08', + September => '09', + Oktober => '10', + November => '11', + December => '12' + }; + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 1, + edition => 0, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isEditor_Publication_Format_Lang} = 0 ; + $self->{isAnalyse} = 0; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isEdition} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublication} = 0; + $self->{isSerie} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + } + else + { + $html =~ s|<li>|\n* |gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.internetbokhandeln.se/results.html?new_search=1&all_search=" . $word. "&search_media=all"; + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url if $url; + return 'http://www.internetbokhandeln.se/'; + } + + sub getName + { + return "InternetBokHandeln"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'SV'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCInternetBookShop.pm b/lib/gcstar/GCPlugins/GCbooks/GCInternetBookShop.pm new file mode 100644 index 0000000..713646c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCInternetBookShop.pm @@ -0,0 +1,376 @@ +package GCPlugins::GCbooks::GCInternetBookShop;
+
+###################################################
+#
+# Copyright 2005-2006 Tian
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCbooks::GCbooksCommon;
+
+{
+ package GCPlugins::GCbooks::GCPluginInternetBookShop;
+
+ use base qw(GCPlugins::GCbooks::GCbooksPluginsBase);
+ use URI::Escape;
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingList})
+ {
+ if ($tagname eq 'td')
+ {
+ if($self->{bookStep} == 0)
+ {
+ $self->{bookStep} = 1 ;
+ }
+ }
+ elsif ($tagname eq 'img')
+ {
+ if($self->{bookStep} == 1)
+ {
+ $self->{bookStep} = 2;
+ }
+ }
+ elsif ($tagname eq 'a')
+ {
+ if($self->{bookStep}==2)
+ {
+ $self->{url} = $attr->{href} ;
+ $self->{bookStep} = 3 ;
+ $self->{isTitle} = 1 ;
+ }
+ }
+ elsif (($tagname eq 'br') && ($self->{bookStep}==3))
+ {
+ $self->{bookStep} = 4 ;
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'i') && ($self->{bookStep}==4))
+ {
+ $self->{isBook} = 1;
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{url};
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $self->{title};
+
+ if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '' )
+ {
+ my @fields = split /,/, $self->{authorAndYear};
+ $self->{itemsList}[$self->{itemIdx}]->{authors} = $fields[0];
+ }
+ $self->{isPublisher} = 1;
+ }
+ elsif ($tagname ne 'b')
+ {
+ $self->{bookStep} = 0;
+ $self->{url} = '';
+ $self->{isBook} = 0;
+ $self->{isUrl} = 0;
+ $self->{isTitle} = 0;
+ $self->{isAuthor} = 0;
+ $self->{isPublisher} = 0;
+ $self->{isPage} = 0;
+ $self->{isSerie} = 0;
+ $self->{isTranslator} = 0;
+ $self->{isDescription} = 0;
+ }
+ }
+ else
+ {
+ if (($tagname eq 'input') && ( $attr->{name} eq 'isbn') && ($self->{curInfo}->{isbn} eq ''))
+ {
+ $self->{curInfo}->{isbn} = $attr->{value} ;
+ }
+ elsif (($tagname eq 'img') && ($attr->{src} =~ m/$self->{curInfo}->{isbn}/i) && ($attr->{src} =~ m/cop/i))
+ {
+ $self->{curInfo}->{cover} = $attr->{src};
+ }
+ elsif ($self->{bookStep} == 1)
+ {
+ if (($tagname eq 'a') && ($self->{areAuthors} == 0))
+ {
+ $self->{isAuthor} = 1;
+ $self->{areAuthors} = 1;
+ }
+ if ($self->{areAuthors} == 1)
+ {
+ if ($tagname eq 'a')
+ {
+ $self->{isAuthor} = 1;
+ }
+ else
+ {
+ $self->{bookStep} = 2;
+ $self->{areAuthors} = 0;
+ }
+ }
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{inside}->{$tagname}--;
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+ if ($self->{parsingList})
+ {
+ if ($self->{isTitle})
+ {
+ $self->{title} = $origtext;
+ $self->{isTitle} = 0 ;
+ }
+ elsif ($self->{isAuthor})
+ {
+ $self->{authorAndYear} = $origtext;
+ $self->{isAuthor} = 0 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext;
+ $self->{isPublisher} = 0 ;
+ }
+ }
+ else
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//g;
+ if ($origtext eq 'Titolo')
+ {
+ $self->{isTitle} = 1;
+ }
+ elsif ($origtext eq 'Autore')
+ {
+ $self->{bookStep} = 1;
+ }
+ elsif ($origtext eq 'Dati')
+ {
+ $self->{isPage} = 1;
+ }
+ elsif ($origtext eq 'Editore')
+ {
+ $self->{isPublisher} = 1;
+ }
+ elsif ($origtext eq 'Traduttore')
+ {
+ $self->{isTranslator} = 1;
+ }
+ elsif ($origtext eq '(collana')
+ {
+ $self->{isSerie} = 1;
+ }
+ elsif ($origtext eq 'Descrizione')
+ {
+ $self->{isDescription} = 1;
+ }
+ else
+ {
+ if ($self->{isTitle})
+ {
+ $self->{curInfo}->{title} = $origtext;
+ $self->{isTitle} = 0;
+ }
+ elsif ($self->{isAuthor})
+ {
+ if ($self->{curInfo}->{authors} eq '')
+ {
+ $self->{curInfo}->{authors} = $origtext;
+ }
+ else
+ {
+ $self->{curInfo}->{authors} .= ", " . $origtext;
+ }
+ $self->{isAuthor} = 0 ;
+ }
+ elsif ($self->{isPage})
+ {
+ my @array = split(/,/,$origtext);
+
+ $self->{curInfo}->{publication} = $array[0];
+ $self->{curInfo}->{pages} = $array[1];
+ # Enleve les blancs en debut de chaine
+ $self->{curInfo}->{pages} =~ s/^\s+//;
+ $self->{curInfo}->{pages} =~ s/p.//;
+ if ($array[3] ne '')
+ {
+ $self->{curInfo}->{format} = $array[2] . "," .$array[3];
+ }
+ else
+ {
+ $self->{curInfo}->{format} = $array[2];
+ }
+ # Enleve les blancs en debut de chaine
+ $self->{curInfo}->{format} =~ s/^\s+//;
+
+ $self->{isPage} = 0 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ $self->{curInfo}->{publisher} = $origtext;
+ $self->{isPublisher} = 0 ;
+ }
+ elsif ($self->{isTranslator})
+ {
+ $self->{curInfo}->{translator} = $origtext;
+ $self->{isTranslator} = 0 ;
+ }
+ elsif ($self->{isDescription})
+ {
+ $self->{curInfo}->{description} .= $origtext;
+ $self->{isDescription} = 0 ;
+ }
+ elsif ($self->{isSerie})
+ {
+ $self->{curInfo}->{serie} = $origtext;
+ $self->{isSerie} = 0 ;
+ }
+ }
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ title => 1,
+ authors => 1,
+ publication => 0,
+ format => 0,
+ edition => 1,
+ };
+
+ $self->{isBook} = 0;
+ $self->{isUrl} = 0;
+ $self->{isTitle} = 0;
+ $self->{isAuthor} = 0;
+ $self->{isPublisher} = 0;
+ $self->{isPage} = 0;
+ $self->{isSerie} = 0;
+ $self->{isTranslator} = 0;
+ $self->{isDescription} = 0;
+ $self->{areAuthors} = 0;
+
+ $self->{bookStep} = 0;
+ $self->{url} = '';
+ $self->{authorAndYear} = '';
+ $self->{title} = '';
+
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ if ($self->{parsingList})
+ {
+ $html =~ s|<br><i>|<i>|gi;
+ }
+ else
+ {
+ my $found = index($html,'<a name="commenti">');
+ if ( $found >= 0 )
+ {
+ $html = substr($html, 0, $found);
+ }
+
+ $html =~ s|<u>||gi;
+ $html =~ s|<li>|\n* |gi;
+ $html =~ s|<br>|\n|gi;
+ $html =~ s|<br />|\n|gi;
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+ $html =~ s|<i>||gi;
+ $html =~ s|</i>||gi;
+ $html =~ s|<p>|\n|gi;
+ $html =~ s|</p>||gi;
+ $html =~ s|</h4>||gi;
+ $html =~ s|\x{92}|'|g;
+ $html =~ s|’|'|gi;
+ $html =~ s|•|*|gi;
+ $html =~ s|<center>||gi;
+ $html =~ s|</center>||gi;
+ $html =~ s|</embed>||gi;
+ $html =~ s|</object>||gi;
+
+ }
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ return "http://www.internetbookshop.it/ser/serpge.asp?type=keyword&x=".$word;
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+ return $url if $url;
+ return 'http://www.internetbookshop.it/';
+ }
+
+ sub getName
+ {
+ return "InternetBookShop";
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+ return "ISO-8859-1";
+ }
+
+ sub getAuthor
+ {
+ return 'TPF';
+ }
+
+ sub getLang
+ {
+ return 'IT';
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['title'];
+ }
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCbooks/GCLeLivre.pm b/lib/gcstar/GCPlugins/GCbooks/GCLeLivre.pm new file mode 100644 index 0000000..ff4d6c4 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCLeLivre.pm @@ -0,0 +1,334 @@ +package GCPlugins::GCbooks::GCLeLivre; + +################################################### +# +# Copyright 2005-2006 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginLeLivre; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'font') && ( $attr->{size} eq '-1') && ( $attr->{face} eq 'Courier New, Courier, mono') && ( $attr->{color} eq '#990000')) + { + $self->{itemIdx}++; + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'input') && ( $attr->{name} eq 'add')) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.le-livre.com/index.php?fich=fiche_info.php3&ref=" . $attr->{value}; + } + elsif (($tagname eq 'font') && ( $attr->{size} eq '-1') && ( $attr->{face} eq 'Courier New, Courier, mono') && ( $attr->{color} eq '#0000CC')) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'font') && ( $attr->{size} eq '-1') && ( $attr->{face} eq 'Times New Roman, Times, serif')) + { + $self->{isPublisher} = 1 ; + } + } + else + { + if ($self->{isTitle} eq 3) + { + $self->{isTitle} = 0 ; + $self->{isAuthor} = 1 ; + } + elsif ($self->{isISBN} eq 1) + { + $self->{isISBN} = 2 ; + } + elsif ($self->{isISBN} eq 2) + { + $self->{isISBN} = 3 ; + } + elsif ($self->{isFormat} eq 1) + { + $self->{isFormat} = 2 ; + } + elsif ($self->{isFormat} eq 2) + { + $self->{isFormat} = 3 ; + } + elsif (($tagname eq 'font') && ( $attr->{color} eq '#990000') && ($self->{curInfo}->{title} eq '')) + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'font') && ( $attr->{size} eq '2') && ( $attr->{face} eq 'Arial, Helvetica, sans-serif') && ($self->{isTitle} eq 1)) + { + $self->{isTitle} = 2 ; + } + elsif (($tagname eq 'img') && ( index($attr->{src},"/photos/") >= 0) && ($self->{curInfo}->{cover} eq '')) + { + $self->{curInfo}->{cover} = $attr->{src}; + } + elsif (($tagname eq 'font') && ( $attr->{color} eq '#000099')) + { + $self->{isAnalyse} = 1 ; + } + elsif ($tagname eq 'tpftraducteurtpf') + { + $self->{isTranslator} = 1 ; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ', '; + $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext; + } + $self->{isAuthor} = 0 ; + } + elsif ($self->{isPublisher}) + { + $origtext =~ s|\.\.|\.|gi; + my @array = split(/\./,$origtext); + $self->{itemsList}[$self->{itemIdx}]->{edition} = $array[0]; + $self->{itemsList}[$self->{itemIdx}]->{publication} = $array[1]; + $self->{itemsList}[$self->{itemIdx}]->{format} = $array[2]; + $self->{isPublisher} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if ($self->{isTitle} eq 2) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 3 ; + } + elsif ($self->{isAuthor}) + { + $origtext =~ s|/ ||g; + $self->{curInfo}->{authors} .= $origtext; + $self->{curInfo}->{authors} .= ","; + $self->{isAuthor} = 0 ; + } + elsif ($self->{isTranslator}) + { + $self->{curInfo}->{translator} = $origtext; + $self->{isTranslator} = 0 ; + } + elsif ($self->{isAnalyse}) + { + $self->{isISBN} = 1 if ($origtext =~ m/ISBN/i); + $self->{isFormat} = 1 if ($origtext =~ m/Descriptif/i); + + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isFormat} eq 3) + { + $origtext =~ s|\.\.|\.|gi; + my @array = split(/\./,$origtext); + $self->{curInfo}->{publisher} = $array[0]; + + # Enleve les blancs en debut de chaine + $array[1] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $array[1] =~ s/\s+$//g; + $_= $array[1]; + if (/(.*)([0-9][0-9][0-9][0-9])(.*)/) + { + $self->{curInfo}->{publication} = $array[1]; + } + + # Enleve les blancs en debut de chaine + $array[2] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $array[2] =~ s/\s+$//g; + $self->{curInfo}->{format} = $array[2]; + + my $element; + foreach $element (@array) + { + $element =~ s/^\s+//; + $_= $element; + if (/(^[0-9]+)(\s[p])(.*)/) + { + $self->{curInfo}->{pages} = $1; + } + elsif (/(^[Oo][u][v][r][a][g][e])(\s[e][n]\s)(.*)/) + { + $self->{curInfo}->{language} = $3; + } + } + + $self->{isFormat} = 0 ; + + } + elsif ($self->{isISBN} eq 3) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0 ; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 1, + edition => 1, + serie => 0, + }; + + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isTranslator} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + } + else + { + + $html =~ s|: </font>|<tpfpourfaireunebalisetpf>|gi; + $html =~ s|Traduction de |<tpftraducteurtpf>|gi; + + $html =~ s|<u>||gi; + $html =~ s|<li>|\n* |gi; + $html =~ s|<br>|\n|gi; + $html =~ s|<br />|\n|gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<p>|\n|gi; + $html =~ s|</p>||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + $html =~ s|…|...|gi; + $html =~ s|\x{85}|...|gi; + $html =~ s|\x{8C}|OE|gi; + $html =~ s|\x{9C}|oe|gi; + + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + return "http://www.le-livre.com/index.php?page=1&Categ=0&mot=". $word; + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return "Le-Livre"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-15"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'FR'; + } + + sub getSearchFieldsArray + { + return ['ISBN', 'title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCLiberOnWeb.pm b/lib/gcstar/GCPlugins/GCbooks/GCLiberOnWeb.pm new file mode 100644 index 0000000..1b219aa --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCLiberOnWeb.pm @@ -0,0 +1,418 @@ +package GCPlugins::GCbooks::GCLiberOnWeb; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginLiberOnWeb; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + use Encode; + use HTML::Entities; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + + if (($tagname eq 'font') && ($attr->{color} eq '#E7E4D8') && ($attr->{face} eq 'Arial')) + { + $self->{itemIdx}++; + $self->{isBook} = 1 ; + $self->{isUrl} = 1 ; + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'font') && ($attr->{color} eq '#D90000') && ($attr->{size} eq '3') && ($self->{isBook})) + { + $self->{isAuthor} = 0 ; + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'font') && ($attr->{color} eq '#FFFFFF') && ($attr->{size} eq '2') && ($attr->{face} eq 'Arial') && ($self->{isBook})) + { + $self->{isPublisher} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{href} =~ m|libro.asp|i) && ($self->{isBook}) && ($self->{isUrl})) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.liberonweb.com/asp/" . $attr->{href}; + $self->{isUrl} = 0 ; + } + elsif (($tagname eq 'font') && ($attr->{color} eq '#D90000') && ($attr->{size} eq '5') && ($self->{searchField} eq 'isbn')) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl}; + } + } + else + { + if (($tagname eq 'font') && ($attr->{color} eq '#E7E4D8') && ($attr->{size} eq '4')) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'font') && ($attr->{color} eq '#D90000') && ($attr->{size} eq '5')) + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'font') && ($attr->{face} eq 'Arial') && ($attr->{size} eq '2')) + { + $self->{isGenre} = 1 ; + } + elsif (($tagname eq 'font') && ($attr->{face} eq 'Verdana, Arial, Helvetica') && ($attr->{size} eq '2') && ($attr->{color} eq '')) + { + $self->{isFormat} = 1 ; + } + elsif (($tagname eq 'font') && ($attr->{color} eq '#6F6948') && ($attr->{size} eq '4')) + { + $self->{isAnalyse} = 0 ; + $self->{isDescription} = 1 ; + } + elsif ($tagname eq 'tpfserie') + { + $self->{isSerie} = 1 ; + } + elsif ($tagname eq 'tpfanalysecarac') + { + $self->{isSerie} = 0 ; + $self->{isAnalyse} = 1 ; + } + elsif ($tagname eq 'tpffindesc') + { + $self->{isDescription} = 0 ; + } + elsif (($tagname eq 'tpfsautdeligne') && ($self->{isDescription})) + { + $self->{curInfo}->{description} .= "\n"; + } + elsif (($tagname eq 'img') && ($attr->{src} =~ m|/images/books/|i)) + { + $self->{curInfo}->{cover} = 'http://www.liberonweb.com/asp/' .$attr->{src}; + + my $isbn = reverse($attr->{src}); + my $found = index($isbn,"/"); + if ( $found >= 0 ) + { + $isbn = substr($isbn, 0,$found); + $isbn = reverse($isbn); + $found = index($isbn,"."); + if ( $found >= 0 ) + { + $self->{curInfo}->{isbn} = substr($isbn, 0,$found); + } + } + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext; + $self->{isAuthor} = 0 ; + } + elsif ($self->{isPublisher}) + { + if (($origtext =~ m/Collana:/i) && ($self->{itemsList}[$self->{itemIdx}]->{edition} eq '')) + { + my @array = split(/-/,$origtext); + # Enleve les blancs en debut de chaine + $array[0] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $array[0] =~ s/\s+$//g; + $self->{itemsList}[$self->{itemIdx}]->{edition} = $array[0]; + } + elsif (($origtext =~ m/Anno /i) && ($self->{itemsList}[$self->{itemIdx}]->{publication} eq '')) + { + my $found = index($origtext,"Anno "); + if ( $found >= 0 ) + { + $origtext = substr($origtext, $found +length('Anno '),length($origtext)- $found -length('Anno ')); + my @array = split(/,/,$origtext); + $self->{itemsList}[$self->{itemIdx}]->{publication} = $array[0]; + # Enleve les blancs en fin de chaine + $self->{itemsList}[$self->{itemIdx}]->{publication} =~ s/\s+$//g; + } + } + $self->{isPublisher} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $origtext =~ s/\s+$//g; + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isGenre}) + { + if ($origtext =~ m/Argomenti:/i) + { + my @array = split(/:/,$origtext); + # Enleve les blancs en debut de chaine + $array[1] =~ s/^\s+//; + $array[1] =~ s|, |,|gi; + $self->{curInfo}->{genre} = $array[1]; + } + $self->{isGenre} = 0 ; + } + elsif ($self->{isFormat}) + { + if ($origtext =~ m/Caratteristiche:/i) + { + my @array = split(/:/,$origtext); + # Enleve les blancs en debut de chaine + $array[1] =~ s/^\s+//; + $self->{curInfo}->{format} = $array[1]; + } + $self->{isFormat} = 0 ; + } + elsif ($self->{isAuthor}) + { + my @array = split(/-/,$origtext); + my $element; + + foreach $element (@array) + { + my @array = split(/\(/,$element); + # Enleve les blancs en debut de chaine + $array[0] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $array[0] =~ s/\s+$//; + + if ($array[0] ne '') + { + $self->{curInfo}->{authors} .= $array[0]; + $self->{curInfo}->{authors} .= ","; + } + } + + $self->{isAuthor} = 0 ; + } + elsif ($self->{isSerie}) + { + if ($origtext =~ m/Collana:/i) + { + my @array = split(/:/,$origtext); + # Enleve les blancs en debut de chaine + $array[1] =~ s/^\s+//; + $self->{curInfo}->{serie} = $array[1]; + } + elsif (($origtext ne '') && ($self->{curInfo}->{serie} eq '')) + { + $self->{curInfo}->{publisher} = $origtext; + } + } + elsif ($self->{isAnalyse}) + { + + my @array = split(/ - /,$origtext); + my $element; + + foreach $element (@array) + { + # Enleve les blancs en debut de chaine + $element =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $element =~ s/\s+$//; + + if ($element =~ m/Pagine/i) + { + $element =~ s/Pagine //i; + $element =~ s/-/,/i; + my @array2 = split(/,/,$element); + if ($array2[1] eq '') + { + $self->{curInfo}->{pages} = $array2[0]; + } + else + { + $self->{curInfo}->{pages} = $array2[1]; + } + # Enleve les blancs en debut de chaine + $self->{curInfo}->{pages} =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $self->{curInfo}->{pages} =~ s/\s+$//; + } + elsif ($element =~ m/Anno/i) + { + my @array2 = split(/ /,$element); + $self->{curInfo}->{publication} = $array2[1]; + } + } + + } + elsif ($self->{isDescription}) + { + if ($origtext ne '') + { + $self->{curInfo}->{description} .= $origtext; + $self->{curInfo}->{description} .= "\n"; + } + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 0, + edition => 1, + }; + + $self->{isTitle} = 0; + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isAuthor} = 0; + $self->{isSerie} = 0; + $self->{isGenre} = 0; + $self->{isFormat} = 0; + $self->{isDescription} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + } + else + { + $html =~ s|\n||gi; + $html =~ s|\r||gi; + $html =~ s|\t||gi; + + $html =~ s|<li>|\n* |gi; + $html =~ s|<br>|<tpfsautdeligne>|gi; + $html =~ s|<br />|<tpfsautdeligne>|gi; + $html =~ s|<br clear=all>|<tpfsautdeligne>|gi; + $html =~ s|<b>||gi; + $html =~ s|</b>||gi; + $html =~ s|<!--Visualizzazione delle Note del libro-->|<tpfanalysecarac>|gi; + $html =~ s|<!--Visualizzazione dell'Editore e della Collana-->|<tpfserie>|gi; + $html =~ s|<font face=Verdana, Arial, Helvetica size=2>|<font face="Verdana, Arial, Helvetica" size=2>|gi; + $html =~ s|<!--mstheme-->|<tpffindesc>|gi; + $html =~ s|<i>||gi; + $html =~ s|</i>||gi; + $html =~ s|<p>|\n|gi; + $html =~ s|</p>||gi; + $html =~ s|\x{92}|'|g; + $html =~ s|’|'|gi; + $html =~ s|•|*|gi; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + return "http://www.liberonweb.com/asp/libro.asp?ISBN=" . $word; + } + else + { + return "http://www.liberonweb.com/asp/lista.asp?D1=Titolo&T1=" . $word. "&I1=1"; + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return "LiberOnWeb"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-15"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'IT'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCMareno.pm b/lib/gcstar/GCPlugins/GCbooks/GCMareno.pm new file mode 100644 index 0000000..1afdc67 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCMareno.pm @@ -0,0 +1,365 @@ +package GCPlugins::GCbooks::GCbooksMareno; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +my $searchURL = ""; + +{ + package GCPlugins::GCbooks::GCPluginMareno; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'title') #od razu mamy wynik? + { + $self->{isBook} = 7; + } + + if (($tagname eq 'table') && ($attr->{class} eq 'bookData')) + { + $self->{itemIdx}++; + $self->{isBook} = 1; + } + if (($tagname eq 'a') && ($self->{isBook} == 1)) + { + $self->{isUrl} = 1; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.mareno.pl".$attr->{href}; + $self->{isUrl} = 0; + $self->{isTitle} = 1; + } + if (($tagname eq 'div') && ($attr->{class} eq 'bookAuthor') && ($self->{isBook} == 1)) + { + $self->{isAuthor} = 1; + $self->{isFormat} = 1; + $self->{isPublisher} = 1; + $self->{isPublication} = 1; + } + } + else + { + if (($tagname eq 'div') && ($attr->{id} eq 'wrgISBN')) + { + $self->{isISBN} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPAGES')) + { + $self->{isPage} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPUBLI')) + { + $self->{isPublisher} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPDATE')) + { + $self->{isPublication} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgFORMAT')) + { + $self->{isFormat} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgDESCR')) + { + $self->{isDescription} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgTITLE')) + { + $self->{isTitle} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgAUTOR')) + { + $self->{isAuthor} = 2; + } + if ($tagname eq 'a') + { + if ($self->{isAuthor} eq '1') + { + $self->{isAuthor} = 2; + } + elsif ($self->{isAuthor} eq '2') + { + $self->{isAuthor} = 1; + } + } + if (($tagname eq 'a') && ($attr->{href} =~ /okladki\/big/)) + { + $self->{isCover} = 1; + $self->{curInfo}->{cover} = "http://www.mareno.pl".$attr->{href}; + $self->{isCover} = 0; + } + } + } + + + sub end + { + my ($self, $tagname) = @_; + + if ($tagname eq 'table') + { + $self->{isBook} = 0; + } + if ($tagname eq 'div') + { + $self->{isAuthor} = 0; + } + + $self->{isFound} = 0; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isBook} == 7) #od razu mamy wynik? + { + $origtext =~ s|^\s*||gs; + $origtext =~ s|\s*$||gs; + if (($origtext ne '') && ($origtext !~ /wyszukiwanie/)) + { + $self->{isUrl} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $searchURL; + $self->{isUrl} = 0; + } + $self->{isBook} = 0; + } + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + if ($self->{isAuthor} == 1) + { + my ($au, $fo, $pu, $pd); + $origtext =~ m|(#\^#- [^#]+#\^#)?(okładka\s*[^,]+,\s*)?([^,]+,\s*)?(\d*)?|s; + $au = $1; + $fo = $2; + $pu = $3; + $pd = $4; + $au =~ s|#\^#- ([^#]+)#\^#|$1|g; + $self->{itemsList}[$self->{itemIdx}]->{authors} = $au; + $self->{isAuthor} = 0; + $fo =~ s|okładka\s*([^,]+),\s*|$1|g; + $self->{itemsList}[$self->{itemIdx}]->{format} = $fo; + $self->{isFormat} = 0; + $pu =~ s|([^,]+),\s*|$1|g; + $self->{itemsList}[$self->{itemIdx}]->{publisher} = $pu; + $self->{isPublisher} = 0; + $self->{itemsList}[$self->{itemIdx}]->{publication} = $pd; + $self->{isPublication} = 0; + } + if ($self->{isTitle} == 1) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + } + } + else + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + + if ($self->{isTitle} eq '1') + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0; + } + if ($self->{isAuthor} == 1) + { + $origtext =~ s|^\s*||; + $origtext =~ s|\s*$||; + if ($origtext ne '') + { + if ($self->{curInfo}->{authors} ne '') + { + $self->{curInfo}->{authors} .= ","; + } + $self->{curInfo}->{authors} .= $origtext; + } + $self->{isAuthor} = 2; + } + if ($self->{isFormat} == 1) + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0; + } + if ($self->{isDescription} == 1) + { + $self->{curInfo}->{description} = $origtext; + $self->{isDescription} = 0; + } + if ($self->{isISBN} eq '1') + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + if ($self->{isPage} eq '1') + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + if ($self->{isPublisher} eq '1') + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0; + } + if ($self->{isPublication} eq '1') + { + $origtext =~ s|(\S*)\s*(\S{4})|$2|; + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 1, + format => 1, + edition => 0, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isEditor_Publication_Format_Lang} = 0 ; + $self->{isAnalyse} = 0; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isEdition} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublication} = 0; + $self->{isSerie} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + $self->{isTranslator} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{insideResults} = 0; + $self->{actorsCounter} = 0; + + if ($self->{parsingList}) + { + $html =~ s/<\/?(b|strong)>//gi; + $html =~ s|</?font[^>]*>||gi; + $html =~ s|<br>|#\^#|gi; + $html =~ s|<TABLE border="0">\s*<tr>\s*<td valign=top>\s*</td>|<table border="0" class="bookData">|gs; + $html =~ s|<td valign=top align=center><a href="[^"]*" class="left-menulink">\s*<IMG SRC[^>]*></a></td>||gs; + $html =~ s|<td valign=top align=left><A HREF([^>]*)>\s*|<a href$1>|gm; + $html =~ s|</a> \s*|</a>\n<div class="bookAuthor">|gm; + $html =~ s|</td></tr>|</div>|g; + } + else + { + $html =~ s/<\/?(i|br|strong)>//gi; + + $html =~ s|<h1>([^<]*)</h1>|<div id="wrgTITLE">$1</div>|s; + $html =~ s|<h2><A(.*)</A></h2>|<div id="wrgAUTOR"><A$1</A></div>|s; + $html =~ s|<span class=textsmall>\s*ISBN:\s*([\dX]*)\s*</span>|<div id="wrgISBN">$1</div>|s; + $html =~ s|<span class=textsmall>\s*okładka:\s*([^,]*),?\s*(\d*)[^<]*</span>|<div id="wrgFORMAT">$1</div><div id="wrgPAGES">$2</div>|s; + $html =~ s|<span class=textsmall>\s*wydawnictwo:\s*([^,]*),\s*(\d*)\s*</span>|<div id="wrgPUBLI">$1</div><div id="wrgPDATE">$2</div>|s; + $html =~ s|opis produktu:\s*([^<]*)<hr>|<div id="wrgDESCR">$1</div><hr>|; +# $html =~ s|<dt>Seria:</dt>$*\s*<dd>(.*)</dd>|<div id="wrgSERIA">$1</div>|; +# $html =~ s|<dt>Wydanie:</dt><dd>(.*)</dd>|<div id="wrgEDITI">$1</div>|; +# $html =~ s|<dt>Tłumaczenie:\s*</dt>$*\s*<dd>|<dd id="wrgTRANS">|; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + $searchURL = "http://www.mareno.pl/rezultat.php?tytul=".$word; + return $searchURL; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return 'http://www.mareno.pl/'; + } + + sub getName + { + return "Mareno"; + } + + sub getCharset + { + my $self = shift; + #return "UTF-8"; + return "ISO-8859-2"; + } + + sub getAuthor + { + return 'WG'; + } + + sub getLang + { + return 'PL'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCMediabooks.pm b/lib/gcstar/GCPlugins/GCbooks/GCMediabooks.pm new file mode 100644 index 0000000..6b5f41b --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCMediabooks.pm @@ -0,0 +1,333 @@ +package GCPlugins::GCbooks::GCMediabooks;
+
+###################################################
+#
+# Copyright 2005-2006 Tian
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+
+use GCPlugins::GCbooks::GCbooksCommon;
+
+{
+ package GCPlugins::GCbooks::GCPluginMediabooks;
+
+ use base qw(GCPlugins::GCbooks::GCbooksPluginsBase);
+ use URI::Escape;
+
+ use Encode;
+ use HTML::Entities;
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingList})
+ {
+
+ if (($tagname eq 'font') && ($attr->{class} eq 'font4Copy'))
+ {
+ $self->{isBook} = 1 ;
+ $self->{isUrl} = 1 ;
+ $self->{isDescription} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|/artigos/popUp_detalhe.jsp|i) && ($self->{isBook}) && ($self->{isUrl}))
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
+ my $found = index($self->{itemsList}[$self->{itemIdx}]->{url},"'");
+ if ( $found >= 0 )
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{url} = substr($self->{itemsList}[$self->{itemIdx}]->{url}, $found +length("'"),length($self->{itemsList}[$self->{itemIdx}]->{url})- $found -length("'"));
+ $found = index($self->{itemsList}[$self->{itemIdx}]->{url},"'");
+ if ( $found >= 0 )
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{url} = substr($self->{itemsList}[$self->{itemIdx}]->{url}, 0, $found);
+ }
+ $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.mediabooks.pt" .$self->{itemsList}[$self->{itemIdx}]->{url};
+ }
+
+ $self->{isTitle} = 1 ;
+ $self->{isUrl} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|/autores/index.jsp|i) && ($self->{isBook}))
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|/editores/index.jsp|i) && ($self->{isBook}))
+ {
+ $self->{isPublisher} = 1 ;
+ }
+ elsif (($tagname eq 'input') && ($attr->{type} eq 'hidden'))
+ {
+ $self->{isBook} = 0 ;
+ }
+ }
+ else
+ {
+ if (($tagname eq 'a') && ($attr->{href} =~ m|/autores/index.jsp|i))
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|/editores/index.jsp|i))
+ {
+ $self->{isPublisher} = 1 ;
+ }
+ elsif ($self->{isISBN} eq 1)
+ {
+ $self->{isISBN} = 2 ;
+ }
+ elsif (($tagname eq 'span') && ($self->{isTitle}))
+ {
+ $self->{isTitle} = 2 ;
+ }
+ elsif (($tagname eq 'span') && ($attr->{class} eq 'font4Copy'))
+ {
+ $self->{isAnalyse} = 1 ;
+ }
+ elsif (($tagname eq 'img') && ($attr->{src} =~ m|/artigos/imagens/|i))
+ {
+ if ($origtext =~ m|/artigos/imagens/livros|i)
+ {
+ }
+ else
+ {
+ $self->{curInfo}->{cover} = 'http://www.mediabooks.pt' .$attr->{src};
+ }
+
+ $self->{isTitle} = 1 ;
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{isFound} = 0 ;
+ $self->{inside}->{$tagname}--;
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isTitle})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext;
+ $self->{isTitle} = 0 ;
+ }
+ elsif ($self->{isAuthor} eq 1)
+ {
+ # Enleve les retours chariots
+ $origtext =~ s/\n//g;
+ $origtext =~ s/\r//g;
+ if (($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') && ($origtext ne ''))
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext;
+ }
+ elsif ($origtext ne '')
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{authors} .= ', ';
+ $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext;
+ }
+ $self->{isAuthor} = 0 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext;
+ $self->{isPublisher} = 0 ;
+ }
+ }
+ else
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//g;
+ if ($self->{isTitle} eq '2')
+ {
+ $self->{curInfo}->{title} = $origtext;
+ $self->{isTitle} = 0 ;
+ }
+ elsif ($self->{isAuthor} eq 1)
+ {
+ if ($origtext ne '')
+ {
+ $self->{curInfo}->{authors} .= $origtext;
+ $self->{curInfo}->{authors} .= ",";
+ }
+ $self->{isAuthor} = 0 ;
+ }
+ elsif ($self->{isAnalyse})
+ {
+ $self->{isISBN} = 1 if ($origtext =~ m/ISBN/i);
+ $self->{isFormat} = 1 if ($origtext =~ m/Formato/i);
+ $self->{isDescription} = 1 if ($origtext =~ m/Breve Descri/i);
+ $self->{isPublication} = 1 if ($origtext =~ m/Ano de Edi/i);
+ $self->{isPage} = 1 if ($origtext =~ m/P.ginas/i);
+
+ $self->{isAnalyse} = 0 ;
+ }
+ elsif ($self->{isISBN} eq 2)
+ {
+ $self->{curInfo}->{isbn} = $origtext;
+ $self->{isISBN} = 0 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ $self->{curInfo}->{publisher} = $origtext;
+ $self->{isPublisher} = 0 ;
+ }
+ elsif ($self->{isFormat})
+ {
+ $self->{curInfo}->{format} = $origtext;
+ $self->{isFormat} = 0 ;
+ }
+ elsif ($self->{isPublication})
+ {
+ $self->{curInfo}->{publication} = $origtext;
+ $self->{isPublication} = 0 ;
+ }
+ elsif ($self->{isPage})
+ {
+ $self->{curInfo}->{pages} = $origtext;
+ $self->{isPage} = 0 ;
+ }
+ elsif ($self->{isDescription})
+ {
+ $self->{curInfo}->{description} .= $origtext;
+ }
+
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ title => 1,
+ authors => 1,
+ publication => 0,
+ format => 0,
+ edition => 1,
+ };
+
+ $self->{isFound} = 0;
+ $self->{isBook} = 0;
+ $self->{isUrl} = 0;
+ $self->{isTitle} = 0;
+ $self->{isAuthor} = 0;
+ $self->{isFormatPublication} = 0;
+ $self->{isPublisher} = 0;
+ $self->{isISBN} = 0;
+ $self->{isPublication} = 0;
+ $self->{isFormat} = 0;
+ $self->{isPage} = 0;
+ $self->{isDescription} = 0;
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ if ($self->{parsingList})
+ {
+ }
+ else
+ {
+ $html =~ s|\n||gi;
+ $html =~ s|\r||gi;
+ $html =~ s|\t||gi;
+
+ $html =~ s|<li>|\n* |gi;
+ $html =~ s|<br>|\n|gi;
+ $html =~ s|<br />|\n|gi;
+ $html =~ s|<b>||gi;
+ $html =~ s|</b>||gi;
+ $html =~ s|<i>||gi;
+ $html =~ s|</i>||gi;
+ $html =~ s|<p>|\n|gi;
+ $html =~ s|</p>||gi;
+ $html =~ s|</h4>||gi;
+ $html =~ s|\x{92}|'|g;
+ $html =~ s|’|'|gi;
+ $html =~ s|•|*|gi;
+ }
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ if ($self->{searchField} eq 'isbn')
+ {
+ return ('http://www.mediabooks.pt/pesquisa/result_pesq.jsp', ["v_sec_id" => "1", "v_prev_sec_id" => "", "v_pes_id" => "2", "v_pesquisa" => "$word", "image.x" => "5", "image.y" => "7"] );
+ }
+ else
+ {
+ return ('http://www.mediabooks.pt/pesquisa/result_pesq.jsp', ["v_sec_id" => "1", "v_prev_sec_id" => "", "v_pes_id" => "1", "v_pesquisa" => "$word", "image.x" => "5", "image.y" => "7"] );
+ }
+
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return $url if $url;
+ return 'http://www.mediabooks.pt/';
+ }
+
+ sub getName
+ {
+ return "Mediabooks";
+ }
+
+ sub getAuthor
+ {
+ return 'TPF';
+ }
+
+ sub getLang
+ {
+ return 'PT';
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['isbn', 'title'];
+ }
+
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCbooks/GCMerlin.pm b/lib/gcstar/GCPlugins/GCbooks/GCMerlin.pm new file mode 100644 index 0000000..5c5129a --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCMerlin.pm @@ -0,0 +1,389 @@ +package GCPlugins::GCbooks::GCbooksMerlin; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginMerlin; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if (($tagname eq 'li') && ($attr->{class} eq 'tytul')) + { + $self->{isBook} = 1; + $self->{isUrl} = 1; + $self->{itemIdx}++; + } + if (($tagname eq 'li') && ($attr->{class} eq 'wydawca')) + { + $self->{isPublisher} = 1; + } + if (($tagname eq 'a') + && ($self->{isUrl} eq '1')) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.merlin.com.pl".$attr->{href}; + $self->{isUrl} = 0; + } + } + else + { + if (($tagname eq 'div') && ($attr->{id} eq 'wrgISBN')) + { + $self->{isISBN} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPAGES')) + { + $self->{isPage} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPUBLI')) + { + $self->{isPublisher} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgPDATE')) + { + $self->{isPublication} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgSERIA')) + { + $self->{isSerie} = 2; + } + if (($tagname eq 'a') && ($self->{isSerie} eq '2')) + { + $self->{isSerie} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'wrgEDITI')) + { + $self->{isEdition} = 1; + } + if (($tagname eq 'div') && ($attr->{id} eq 'prodHead')) + { + $self->{isCover} = 2; + $self->{isTitle} = 2; + $self->{isFormat} = 2; + } + if (($tagname eq 'h1') && ($attr->{class} eq 'prodTitle') && ($self->{isTitle} eq '2')) + { + $self->{isTitle} = 1; + } + if (($tagname eq 'h2') && ($attr->{class} eq 'prodPerson')) + { + $self->{isAuthor} = 2; + } + if ($tagname eq 'a') + { + if ($self->{isAuthor} eq '1') + { + $self->{isAuthor} = 2; + } + elsif ($self->{isAuthor} eq '2') + { + $self->{isAuthor} = 1; + } + } + if (($tagname eq 'dd') && ($attr->{id} eq 'wrgTRANS')) + { + $self->{isTranslator} = 2; + } + if ($tagname eq 'a') + { + if ($self->{isTranslator} eq '1') + { + $self->{isTranslator} = 2; + } + elsif ($self->{isTranslator} eq '2') + { + $self->{isTranslator} = 1; + } + } + if (($tagname eq 'div') && ($attr->{id} eq 'prodImg') && ($self->{isCover} eq '2')) + { + $self->{isCover} = 1; + } + if (($tagname eq 'img') && ($self->{isCover} eq '1')) + { + $self->{curInfo}->{cover} = "http://www.merlin.com.pl".$attr->{src}; + $self->{isCover} = 0; + } + if (($tagname eq 'div') && ($attr->{class} eq 'prodFeatureSpec') && ($self->{isFormat} eq '2')) + { + $self->{isFormat} = 1; + } + if (($tagname eq 'div') && ($attr->{class} eq 'productDesc')) + { + $self->{isDescription} = 1; + } + } + } + + + sub end + { + my ($self, $tagname) = @_; + + if ($tagname eq 'h2') + { + $self->{isAuthor} = 0; + } + if ($tagname eq 'dd') + { + $self->{isTranslator} = 0; + } + + $self->{isFound} = 0; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isBook} eq '1') + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + $self->{isBook} = 0; + if ($self->{inside}->{a}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isBook} = 1; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext; + } + } + if ($self->{isPublisher} eq '1') + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublisher} = 0; + } + + } + else + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + + if ($self->{isTitle} eq '1') + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0; + } + if ($self->{isAuthor} eq '1') + { + $origtext =~ s|^\s*||; + $origtext =~ s|\s*$||; + if ($origtext ne '') + { + $self->{curInfo}->{authors} .= $origtext; + } + $self->{isAuthor} = 2; + } + if ($self->{isTranslator} eq '1') + { + $origtext =~ s|^\s*||; + $origtext =~ s|\s*$||; + if ($self->{curInfo}->{translator} eq '') + { + $self->{curInfo}->{translator} = $origtext; + } + else + { + $self->{curInfo}->{translator} .= ", ".$origtext; + } + $self->{isTranslator} = 2; + } + if ($self->{isFormat} eq '1') + { + $origtext =~ s|okładka: ||m; + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0; + } + if ($self->{isDescription} eq '1') + { + $self->{curInfo}->{description} = $origtext; + $self->{isDescription} = 0; + } + + if ($self->{isISBN} eq '1') + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + if ($self->{isPage} eq '1') + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + if ($self->{isEdition} eq '1') + { + $self->{curInfo}->{edition} = $origtext; + $self->{isEdition} = 0; + } + if ($self->{isPublisher} eq '1') + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0; + } + if ($self->{isPublication} eq '1') + { + $origtext =~ s|(\S*)\s*(\S{4})|$2|; + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0; + } + if ($self->{isSerie} eq '1') + { + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 1, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isEditor_Publication_Format_Lang} = 0 ; + $self->{isAnalyse} = 0; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isEdition} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublication} = 0; + $self->{isSerie} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + $self->{isTranslator} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{insideResults} = 0; + $self->{actorsCounter} = 0; + + if ($self->{parsingList}) + { + $html =~ s|<b>(.*?)</b>|$1|gms; + $html =~ s|<li class="tytul">(.*)</li>\s*<li>|<li class="tytul">$1</li><li class="wydawca">|gm; + } + else + { + $html =~ s|</?strong>||gi; + $html =~ s|</?i>||gi; + $html =~ s|</?br>||gi; + $html =~ s|<dfn>(.*?)</dfn>||gs; + + $html =~ s|<dt>ISBN:</dt><dd>(.*)</dd>|<div id="wrgISBN">$1</div>|; + $html =~ s|<dt>Liczba stron:</dt><dd>(.*)</dd>|<div id="wrgPAGES">$1</div>|; + $html =~ s|<dt>Seria:</dt>\s*<dd>(.*)</dd>|<div id="wrgSERIA">$1</div>|m; + $html =~ s|<dt>Wydanie:</dt><dd>(.*)</dd>|<div id="wrgEDITI">$1</div>|; + $html =~ s|<dt>Wydawnictwo:</dt>\s*<dd>\s*(.*)\s*,*\s*(.*)\s*</dd>|<div id="wrgPUBLI">$1</div><div id="wrgPDATE">$2</div>|m; + $html =~ s|<dt>Tłumaczenie:\s*</dt>\s*<dd>|<dd id="wrgTRANS">|m; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + return "http://www.merlin.com.pl/frontend/browse/search/1.html?phrase=$word"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return 'http://www.merlin.com.pl/'; + } + + sub getName + { + return "Merlin"; + } + + sub getCharset + { + my $self = shift; + #return "UTF-8"; + return "ISO-8859-2"; + } + + sub getAuthor + { + return 'WG'; + } + + sub getLang + { + return 'PL'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCNUKat.pm b/lib/gcstar/GCPlugins/GCbooks/GCNUKat.pm new file mode 100644 index 0000000..6bc22eb --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCNUKat.pm @@ -0,0 +1,447 @@ +package GCPlugins::GCbooks::GCbooksNUKat; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +my $searchURL = ""; +my $searchISBN = ""; + +{ + package GCPlugins::GCbooks::GCPluginNUKat; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ($tagname eq 'title') #od razu mamy wynik + { + $self->{isBook} = 7; + } + + if (($tagname eq 'tr') && ($attr->{class} eq 'intrRow')) + { + $self->{isBook} = 1; + $self->{itemIdx}++; + } + if (($tagname eq 'td') && ($attr->{class} eq 'intrRowCell1') && ($self->{isBook} == 1)) + { + $self->{isUrl} = 2; + } + if (($tagname eq 'a') && ($self->{isUrl} == 2) && ($origtext =~ /.*function=CARDSCR.*/)) + { + $self->{isUrl} = 1; + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{itemsList}[$self->{itemIdx}]->{url} =~ s|skin=portal&||; + $self->{isUrl} = 0; + } + if (($tagname eq 'td') && ($attr->{class} eq 'intrAutor') && ($self->{isBook} == 1)) + { + $self->{isAuthor} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'intrTytul') && ($self->{isBook} == 1)) + { + $self->{isTitle} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'intrWydaw') && ($self->{isBook} == 1)) + { + $self->{isPublication} = 1; + } + } + else + { + if (($tagname eq 'td') && ($attr->{class} eq 'wrgTITLE')) + { + $self->{isTitle} = 1; + $self->{isAuthor} = 1; + $self->{isTranslator} = 1; + $self->{isArtist} = 1; + $self->{isISBN} = 2; + } + if (($tagname eq 'td') && ($attr->{class} eq 'wrgPAGES')) + { + $self->{isPage} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'wrgSERIA')) + { + $self->{isSerie} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'wrgPUBLI')) + { + $self->{isPublisher} = 1; + $self->{isPublication} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'wrgEDITI')) + { + $self->{isEdition} = 1; + } + if (($tagname eq 'td') && ($attr->{class} eq 'wrgISBN') && ($self->{isISBN} == 2)) + { + $self->{isISBN} = 1; + } + + if (($tagname eq 'div') && ($attr->{class} eq 'prodFeatureSpec') && ($self->{isFormat} eq '2')) + { + $self->{isFormat} = 1; + } + } + } + + + sub end + { + my ($self, $tagname) = @_; + + $self->{isFound} = 0; + $self->{inside}->{$tagname}--; + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isBook} == 7) #od razu mamy wynik? + { + if ($origtext =~ /Pełny opis/) + { + $self->{isUrl} = 1; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $searchURL; + $self->{isUrl} = 0; + $self->{isBook} = 0; + } + } + if ($self->{isBook} == 1) + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + if ($self->{isTitle} == 1) + { + $origtext =~ s|^\s*([^/]*)/?|$1|m; + $origtext =~ s|^\s*([^:]*):?|$1|m; + $origtext =~ s|\s*$||m; + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0; + } + if ($self->{isAuthor} == 1) + { + $origtext =~ s|\s*\/\s*(.*)\s*|$1|; + $origtext =~ s|^\s*([^\.]*)\.?|$1|m; + $origtext =~ s|([^\(]*)(\([^\)]*\))?|$1|; + $origtext =~ s|\s*$||m; + $origtext =~ s|([^,]*), (.*)|$2 $1|m; + $self->{itemsList}[$self->{itemIdx}]->{authors} = $origtext; + $self->{isAuthor} = 0; + } + if ($self->{isPublication} == 1) + { + $origtext =~ s|(.*)(\d{4})\D*|$2|s; + $origtext =~ s|^\s*([^\.]*)\.?|$1|m; + $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext; + $self->{isPublication} = 0; + $self->{isBook} = 0; + } + } + + } + else + { + $origtext =~ s/^\s*//m; + $origtext =~ s/\s*$//m; + + if ($self->{isFormat} eq '1') + { + $origtext =~ s|okładka: ||m; + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0; + } + + + + if ($self->{isISBN} eq '1') + { + my ($pom1, $pom2); + if ($self->{searchField} eq 'isbn') + { + $pom1 = $self->{searchISBN}; + $pom2 = $origtext; + $pom2 =~ s|[^\dX]||g; + $pom1 =~ s|-||g; + $pom2 =~ s|-||g; + if ($pom1 eq $pom2) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + else + { + $self->{isISBN} = 2; + } + } + else + { + $origtext =~ s|[^\dX]||g; + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + } + if ($self->{isTitle} eq '1') + { + my ($pom1, $pom2, $ti, $au, $tr, $il); + $origtext =~ m|([^/]*)/\s*([^;]*)(; )?([^;]*)(; )?([^;]*)$|; + $ti = $1; + $au = $2; + $pom1 = $4; + $pom2 = $6; + $ti =~ s|^\s*||; + $ti =~ s|\s*$||; + $self->{curInfo}->{title} = $ti; + $self->{isTitle} = 0; + $au =~ s| i |,|g; + $au =~ s|, |,|g; + $au =~ s|[\[\]]||g; + $au =~ s|tekst||g; + $au =~ s|^\s*||; + $au =~ s|\s*$||; + $au =~ s|(.*)(\.{1})|$1|; + $self->{curInfo}->{authors} = $au; + $self->{isAuthor} = 0; + $pom1 =~ s|[\[\]]||g; + $pom1 =~ m|(.*)(.{1})|; + if ($2 eq '.') + { + $pom1 = $1; + } + $pom2 =~ s|[\[\]]||g; + $pom2 =~ m|(.*)(.{1})|; + if ($2 eq '.') + { + $pom2 = $1; + } + if ($pom2 =~ /(przeł\.|przekł\.|tł\.|tłum\.)/) + { + $tr = $pom2; + } + if ($pom2 =~ /(il\.|oprac\. graf\.)/) + { + $il = $pom2; + } + if ($pom1 =~ /(przeł\.|przekł\.|tł\.|tłum\.)/) + { + $tr = $pom1; + } + if ($pom1 =~ /(il\.|oprac\. graf\.)/) + { + $il = $pom1; + } + $tr =~ s/(przeł\.|przekł\.|tł\.|tłum\.)//; + $tr =~ s|z \w+\.||; + $tr =~ s|^\s*||; + $tr =~ s|\s*$||; + $tr =~ s| i |,|g; + $tr =~ s|, |,|g; + $self->{curInfo}->{translator} = $tr; + $self->{isTranslator} = 0; + $il =~ s/(il\.|oprac\. graf\.)//; + $il =~ s|^\s*||; + $il =~ s|\s*$||; + $il =~ s| i |,|g; + $il =~ s|, |,|g; + $self->{curInfo}->{artist} = $il; + $self->{isArtist} = 0; + } + if ($self->{isPage} eq '1') + { + $origtext =~ s|(\d*)\D.*|$1|; + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + if ($self->{isEdition} eq '1') + { + $origtext =~ s|\D*(\d*)\D.*|$1|; + $self->{curInfo}->{edition} = $origtext; + $self->{isEdition} = 0; + } + if ($self->{isPublisher} eq '1') + { + my $pom = $origtext; + $origtext =~ s|[^:]*:\s*(.*),.*|$1|; + $origtext =~ s|^\s*||; + $origtext =~ s|"(.*)"|$1|; + $self->{curInfo}->{publisher} = $origtext; + $pom =~ s|(.*)(\d{4})(\D*)|$2|; + $self->{curInfo}->{publication} = $pom; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + } + if ($self->{isSerie} eq '1') + { + $origtext =~ s|([^;]*)(;.*)|$1|; + $origtext =~ s|\s*$||; + $self->{curInfo}->{serie} = $origtext; + $self->{isSerie} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 1, + }; + + $self->{isBook} = 0; + $self->{isUrl} = 0; + $self->{isEditor_Publication_Format_Lang} = 0 ; + $self->{isAnalyse} = 0; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isEdition} = 0; + $self->{isPage} = 0; + $self->{isLanguage} = 0; + $self->{isPublication} = 0; + $self->{isSerie} = 0; + $self->{isDescription} = 0; + $self->{isCover} = 0; + $self->{isTranslator} = 0; + $self->{isArtist} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + $self->{insideResults} = 0; + $self->{actorsCounter} = 0; + + if ($self->{parsingList}) + { + $html =~ s|<b>(.*?)</b>|$1|gms; + $html =~ s|<img .*/book.gif">||g; + $html =~ s|<font.*</font>||g; + $html =~ s|<span class="highlight[^>]+>||g; + $html =~ s|</?span[^>]*>||g; + $html =~ s|<th[^>]*>Autor</th>\s*<td><a[^>]*>([^<]*)</a>|<td class="intrAutor">$1|gs; + $html =~ s|<th[^>]*>Tytuł</th>\s*<td><a[^>]*>([^<]*)</a>|<td class="intrTytul">$1|gs; + $html =~ s|<th[^>]*>Adres wyd.</th>\s*<td>|<td class="intrWydaw">|gs; + } + else + { + $html =~ s|</?strong>||gi; + $html =~ s|</?i>||gi; + $html =~ s|</?br>||gi; + + $html =~ s|<th[^>]*>Tytuł</th>\s*<td>\s*<a[^>]*>([^<]*)</a>|<td class="wrgTITLE">$1|gs; + $html =~ s|<th[^>]*>Strefa serii</th>\s*<td>\s*<a[^>]*>([^<]*)</a>|<td class="wrgSERIA">$1|gs; + $html =~ s|<th[^>]*>Adres wydawniczy</th>\s*<td>|<td class="wrgPUBLI">|gs; + $html =~ s|<th[^>]*>Opis fizyczny</th>\s*<td>|<td class="wrgPAGES">|gs; + $html =~ s|<th[^>]*>Oznaczenie wydania</th>\s*<td>|<td class="wrgEDITI">|gs; + $html =~ s|<th[^>]*>ISBN</th>\s*<td>|<td class="wrgISBN">|gs; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + my $bubu; + if ($self->{searchField} eq 'isbn') + { + $bubu = "7"; + $self->{searchISBN} = $word; + } + else + { + $bubu = "4"; + $self->{searchISBN} = ""; + } + $searchURL = "http://www.nukat.edu.pl/cgi-bin/gw_43_3/chameleon?host=193.0.118.2%2b1111%2bDEFAULT&search=KEYWORD&function=INITREQ&conf=.%2fchameleon.conf&lng=pl&u1=".$bubu."&t1=".$word; + return $searchURL; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url; + return 'http://www.nukat.edu.pl/'; + } + + sub getName + { + return "NUKat"; + } + + sub getCharset + { + my $self = shift; + return "UTF-8"; + #return "ISO-8859-2"; + } + + sub getAuthor + { + return 'WG'; + } + + sub getLang + { + return 'PL'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCNooSFere.pm b/lib/gcstar/GCPlugins/GCbooks/GCNooSFere.pm new file mode 100644 index 0000000..c878af9 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCNooSFere.pm @@ -0,0 +1,462 @@ +package GCPlugins::GCbooks::GCNooSFere;
+
+###################################################
+#
+# Copyright 2005-2006 Tian
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCbooks::GCbooksCommon;
+
+{
+ package GCPlugins::GCbooks::GCPluginNooSFere;
+
+ use base qw(GCPlugins::GCbooks::GCbooksPluginsBase);
+ use URI::Escape;
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ $self->{inside}->{$tagname}++;
+
+ if ($self->{parsingList})
+ {
+ return if ( $self->{isFound} eq 2 );
+ if (($tagname eq 'td') && ($attr->{class} eq 'onglet_bleu'))
+ {
+ $self->{isFound} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m/editionslivre.asp\?numitem=/i) && !($attr->{href} =~ m/numediteur=/i) && !($attr->{href} =~ m/tri=/i))
+ {
+ $self->{isTitle} = 1 ;
+ $self->{isAuthor} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|/icarus/livres/auteur.asp\?NumAuteur=|i) && ($self->{isAuthor} eq 0))
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|./editeur.asp\?numediteur=|i))
+ {
+ $self->{isPublisher} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|./serie.asp\?NumSerie=|i))
+ {
+ $self->{isSerie} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m/editionslivre.asp\?numitem=/i) && ($attr->{href} =~ m/numediteur=/i))
+ {
+
+ my $html = $self->loadPage( "http://www.noosfere.org/icarus/livres/" . $attr->{href}, 0, 1 );
+ my $found = index($html,"Fiche livre : les éditions");
+ if ( $found >= 0 )
+ {
+
+ while (index($html,"./niourf.asp?numlivre="))
+ {
+ $found = index($html,"./niourf.asp?numlivre=");
+ if ( $found >= 0 )
+ {
+ $html = substr($html, $found +length('./niourf.asp?numlivre='),length($html)- $found -length('./niourf.asp?numlivre='));
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $self->{saveTitle};
+ $self->{itemsList}[$self->{itemIdx}]->{authors} = $self->{saveAuthor};
+ $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.noosfere.org/icarus/livres/niourf.asp?numlivre=" . substr($html, 0, index($html,"\""));
+ }
+ else
+ {
+ last;
+ }
+
+ }
+ }
+ else
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $self->{saveTitle};
+ $self->{itemsList}[$self->{itemIdx}]->{authors} = $self->{saveAuthor};
+ $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.noosfere.org/icarus/livres/" . $attr->{href};
+ }
+ }
+ elsif ($tagname eq 'h1')
+ {
+ $self->{isTitle} = 1 ;
+ $self->{isAuthor} = 0 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|./niourf.asp\?numlivre=|i))
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $self->{saveTitle};
+ $self->{itemsList}[$self->{itemIdx}]->{authors} = $self->{saveAuthor};
+ $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.noosfere.org/icarus/livres/" . $attr->{href};
+ }
+ elsif (($tagname eq 'td') && ($attr->{class} eq 'onglet_biblio1'))
+ {
+ $self->{isAuthor} = 2 ;
+ }
+ elsif (($tagname eq 'table') && ($attr->{class} eq 'piedpage'))
+ {
+ $self->{isAuthor} = 0 ;
+ }
+ }
+ else
+ {
+ if (($tagname eq 'mytpf') && ($attr->{id} eq 'TPFENDCOMMENTTPF'))
+ {
+ $self->{isDescription} = 0 ;
+ }
+ elsif (($tagname eq 'font') && ($attr->{class} eq 'TitreNiourf'))
+ {
+ $self->{isAnalyse} = 0 ;
+ $self->{isTitle} = 1 ;
+ $self->{isAuthor} = 0 ;
+ }
+ elsif (($tagname eq 'font') && ($attr->{class} eq 'AuteurNiourf'))
+ {
+ $self->{isAuthor} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|/icarus/livres/auteur.asp\?NumAuteur=|i) && ($self->{isAuthor} eq 1))
+ {
+ $self->{isAuthor} = 2 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|actu_mois.asp\?|i))
+ {
+ $self->{isPublication} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|editeur.asp\?numediteur=|i) && ($self->{curInfo}->{publisher} eq ''))
+ {
+ $self->{isPublisher} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|collection.asp\?NumCollection=|i) && ($self->{curInfo}->{serie} eq ''))
+ {
+ $self->{isSerie} = 1 ;
+ }
+ elsif (($tagname eq 'a') && ($attr->{href} =~ m|/icarus/livres/auteur.asp\?NumAuteur=|i) && ($self->{isTranslator} eq 1))
+ {
+ $self->{isTranslator} = 2 ;
+ }
+ elsif ($tagname eq 'br')
+ {
+ $self->{isAnalyseTrans} = 1 ;
+ }
+ elsif (($tagname eq 'font') && ($attr->{style} eq 'font-size:12px;') && ($self->{isAnalyse} eq 0))
+ {
+ $self->{isAnalyse} = 1 ;
+ }
+ elsif (($tagname eq 'img') && ($attr->{name} eq 'couverture'))
+ {
+ $self->{curInfo}->{cover} = "http://www.noosfere.org/icarus/livres/" . $attr->{src} ;
+ }
+ elsif (($tagname eq 'mytpf') && ($attr->{id} eq 'TPFSTARTCOMMENTTPF'))
+ {
+ $self->{isDescription} = 1 ;
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+
+ $self->{isFound} = 0 ;
+ $self->{inside}->{$tagname}--;
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isTitle})
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//g;
+ $self->{saveTitle} = $origtext;
+ $self->{saveAuthor} = '';
+ $self->{isTitle} = 0 ;
+ }
+ elsif ($self->{isAuthor} eq 1)
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//g;
+ if (($self->{saveAuthor} eq '') && ($origtext ne ''))
+ {
+ $self->{saveAuthor} = $origtext;
+ }
+ elsif ($origtext ne '')
+ {
+ $self->{saveAuthor} .= ', ';
+ $self->{saveAuthor} .= $origtext;
+ }
+ $self->{isAuthor} = 0 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{edition} = $origtext;
+ $self->{isPublisher} = 0 ;
+ }
+ elsif ($self->{isSerie})
+ {
+ $self->{itemsList}[$self->{itemIdx}]->{serie} = $origtext;
+ $self->{isSerie} = 0 ;
+ }
+ elsif ($self->{isFound} eq 1)
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//g;
+ if ($origtext eq 'Fiche livre')
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl};
+ $self->{isFound} = 2 ;
+ }
+ else
+ {
+ $self->{isFound} = 0 ;
+ }
+ }
+ }
+ else
+ {
+ # Enleve les blancs en debut de chaine
+ $origtext =~ s/^\s+//;
+ # Enleve les blancs en fin de chaine
+ $origtext =~ s/\s+$//g;
+ if ($self->{isTitle} eq '1')
+ {
+ $self->{curInfo}->{title} = $origtext;
+ $self->{isTitle} = 0 ;
+ }
+ elsif ($self->{isAnalyse} eq 1)
+ {
+ my $found = index($origtext," pages");
+ if ( $found >= 0 )
+ {
+ $self->{curInfo}->{pages} = substr($origtext, 0, $found);
+ }
+ $found = index($origtext,"ISBN : ");
+ if ( $found >= 0 )
+ {
+ $self->{curInfo}->{isbn} = substr($origtext, $found +length('ISBN : '),length($origtext)- $found -length('ISBN : '));
+ }
+
+ $self->{isAnalyse} = 2 ;
+ }
+ elsif ($self->{isAnalyseTrans})
+ {
+ $self->{isTranslator} = 1 if ($origtext =~ m/Traduction/i);
+
+ $self->{isAnalyseTrans} = 0 ;
+ }
+ elsif ($self->{isAuthor} eq 2)
+ {
+ if (($self->{curInfo}->{authors} eq '') && ($origtext ne ''))
+ {
+ $self->{curInfo}->{authors} = $origtext;
+ }
+ elsif ($origtext ne '')
+ {
+ $self->{curInfo}->{authors} .= ', ';
+ $self->{curInfo}->{authors} .= $origtext;
+ }
+ $self->{isAuthor} = 1 ;
+ }
+ elsif ($self->{isPublisher})
+ {
+ $self->{curInfo}->{publisher} = $origtext;
+ $self->{isPublisher} = 0 ;
+ }
+ elsif ($self->{isSerie})
+ {
+ $self->{curInfo}->{serie} = $origtext;
+ $self->{isSerie} = 0 ;
+ }
+ elsif ($self->{isPublication})
+ {
+ $self->{curInfo}->{publication} = $origtext;
+ $self->{isPublication} = 0 ;
+ }
+ elsif ($self->{isTranslator} eq 2)
+ {
+ $self->{curInfo}->{translator} = $origtext;
+ $self->{isTranslator} = 0 ;
+ }
+ elsif ($self->{isDescription})
+ {
+ if ($origtext =~ m/Pas de texte sur la quatri.me de couverture\./i)
+ {
+ }
+ else
+ {
+ $self->{curInfo}->{description} .= $origtext ."\n";
+ }
+ }
+
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+
+ $self->{hasField} = {
+ title => 1,
+ authors => 1,
+ publication => 0,
+ format => 0,
+ edition => 1,
+ serie => 1,
+ };
+
+ $self->{saveTitle} = '';
+ $self->{saveAuthor} = '';
+ $self->{isFound} = 0;
+ $self->{isTitle} = 0;
+ $self->{isAuthor} = 0;
+ $self->{isPublisher} = 0;
+ $self->{isPublication} = 0;
+ $self->{isSerie} = 0;
+ $self->{isDescription} = 0;
+ $self->{isTranslator} = 0;
+ $self->{isAnalyseTrans} = 0;
+ $self->{isAnalyse} = 0;
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ if ($self->{parsingList})
+ {
+ }
+ else
+ {
+ # Le descriptif pouvant contenir des balises html je le repere maintenant
+ my $found = index($html,"Id=\"R");
+ if ( $found >= 0 )
+ {
+ my $html2 = substr($html, $found +length('Id="R'),length($html)- $found -length('Id="R'));
+ my $found2 = index($html2,"<TD class=\"noocell_fs15\" valign=\"top\">");
+ if ( $found2 >= 0 )
+ {
+ $html2 = substr($html2, $found2 +length('<TD class="noocell_fs15" valign="top">'),length($html2)- $found2 -length('<TD class="noocell_fs15" valign="top">'));
+ }
+
+ $found2 = index($html2,"</TD>");
+ if ( $found2 >= 0 )
+ {
+ $html2 = substr($html2, 0, $found2);
+ }
+
+ $html2 =~ s|<li>|\n* |gi;
+ $html2 =~ s|<br>|\n|gi;
+ $html2 =~ s|<br />|\n|gi;
+ $html2 =~ s|<b>||gi;
+ $html2 =~ s|</b>||gi;
+ $html2 =~ s|<i>||gi;
+ $html2 =~ s|</i>||gi;
+ $html2 =~ s|<p>|\n|gi;
+ $html2 =~ s|</p>||gi;
+ $html2 =~ s|</h4>||gi;
+ $html2 =~ s|\x{92}|'|g;
+ $html2 =~ s|’|'|gi;
+ $html2 =~ s|•|*|gi;
+ $html2 =~ s|œ|oe|gi;
+ $html2 =~ s|…|...|gi;
+ $html2 =~ s|\x{85}|...|gi;
+ $html2 =~ s|\x{8C}|OE|gi;
+ $html2 =~ s|\x{9C}|oe|gi;
+
+ $html = substr($html, 0, $found) . "><mytpf id=\"TPFSTARTCOMMENTTPF\">" . $html2 ."</mytpf><mytpf id=\"TPFENDCOMMENTTPF\"></mytpf>";
+
+ }
+
+ $html =~ s|<b><p>||gmi;
+ $html =~ s|<br><br>|<br>|gmi;
+ $html =~ s|<br><|<|gmi;
+ }
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ if ($self->{searchField} eq 'isbn')
+ {
+ return "http://www.noosfere.org/icarus/livres/cyborg_livre.asp?mini=1000&maxi=3000&mode=Idem&EtOuParution=NS&isbn=". $word;
+ }
+ else
+ {
+ return "http://www.noosfere.org/icarus/livres/cyborg_livre.asp?mini=1000&maxi=3000&mode=Idem&EtOuParution=NS&titre=". $word;
+ }
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+
+ return $url if $url;
+ return 'http://www.noosfere.org/';
+ }
+
+ sub getName
+ {
+ return "nooSFere";
+ }
+
+ sub getCharset
+ {
+ my $self = shift;
+ return "ISO-8859-15";
+ }
+
+ sub getAuthor
+ {
+ return 'TPF';
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['isbn', 'title'];
+ }
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm b/lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm new file mode 100644 index 0000000..54dd119 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCSaraiva.pm @@ -0,0 +1,303 @@ +package GCPlugins::GCbooks::GCSaraiva; + +################################################### +# +# Plugin for a brazilian bookstore named "Saraiva". +# Code written by Guilherme "nirev" Nogueira. +# guilherme at nirev dot org +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCPluginSaraiva; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if (($tagname eq 'div') && ($attr->{class} eq 'hsliceLista')) + { + $self->{isResult} = 1; + $self->{linkCount} = 0; + $self->{itemIdx}++; + } + if (($tagname eq 'span') && ($attr->{class} eq 'entry-title')) + { + $self->{isTitle} = 1; + } + if (($tagname eq 'h2') && ($attr->{class} eq 'titulo_autor')) + { + $self->{isAuthor} = 1; + } + if (($tagname eq 'a') && $self->{isResult} && $self->{linkCount} == 0 ) + { + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{linkCount}++; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'entry-content')) + { + $self->{isResult} = 0; + } + } + else + { + if (($tagname eq 'img') && ($attr->{id} eq 'imgProd')) + { + my $imgid = $attr->{src}; + $imgid =~ s/(.)*pro_id=//; + $imgid =~ s/&.*$//; + $self->{curInfo}->{cover} = 'http://www.livrariasaraiva.com.br/imagem/imagem.dll?tam=2&pro_id='.$imgid; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'aba1')) + { + $self->{isDescription} = 1; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'aba2')) + { + $self->{divInfo} = 1; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'produtosAbasMenus')) + { + $self->{divInfo} = 0; + } + elsif (($tagname eq 'div') && ($attr->{id} eq 'tituloprod')) + { + $self->{isTitle} = 1; + } + elsif (($tagname eq 'a') && ($attr->{href} eq 'javascript:PesquisaAutor();')) + { + $self->{isAuthor} = 1; + } + elsif (($tagname eq 'a') && ($attr->{href} eq 'javascript:PesquisaMarca();')) + { + $self->{isPublisher} = 1; + } + elsif (($tagname eq 'font')) + { + $self->{isAnalyse} = 1; + } + elsif (($tagname eq 'b') && $self->{divInfo} == 1) + { + $self->{isAnalyse} = 1; + } + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{inside}->{$tagname}--; + + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + my $texto = $origtext; + $self->{itemsList}[$self->{itemIdx}]->{title} = $texto; + $self->{isTitle} = 0; + } + if ($self->{isAuthor}) + { + my $texto = $origtext; + $texto =~ s/<br>//; + my @dados = split(' / ', $texto); + $self->{itemsList}[$self->{itemIdx}]->{authors} = $dados[0]; + $self->{isAuthor} = 0; + } + } + else + { + if ($self->{isAuthor}) + { + my @authors = split(';', $origtext); + my $authors = ''; + my $tam = @authors; + my $count = 0; + for($count = 0; $count < $tam; $count++) + { + $authors[$count] =~ s/^\s*//gi; + $authors[$count] =~ s/\s*$//gi; + my @names = split(', ', $authors[$count]); + $authors .= ',' if ($count); + $authors .= $names[1].' '.$names[0]; + + } + $self->{curInfo}->{authors} = $authors; + $self->{isAuthor} = 0; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0; + } + elsif ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0; + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} = $origtext; + $self->{curInfo}->{description} =~ s/^\s*//; + $self->{curInfo}->{description} =~ s/\s+/ /; + $self->{isDescription} = 0; + } + elsif ($self->{isAnalyse}) + { + $self->{isISBN} = 1 if ($origtext =~ m/I\.S\.B\.N/i); + $self->{isFormat} = 1 if ($origtext =~ m/Acabamento/i); + $self->{isPublication} = 1 if ($origtext =~ m/Edição/i); + $self->{isPage} = 1 if ($origtext =~ m/Número de Paginas/i); + $self->{isAnalyse} = 0 ; + } + elsif ($self->{isISBN}) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{isISBN} = 0; + } + elsif ($self->{isFormat}) + { + $self->{curInfo}->{format} = $origtext; + $self->{isFormat} = 0; + } + elsif ($self->{isPublication}) + { + $self->{curInfo}->{publication} = $origtext; + $self->{isPublication} = 0; + } + elsif ($self->{isPage}) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPage} = 0; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 0, + edition => 0, + serie => 0, + }; + + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isAnalyse} = 0; + $self->{isPublisher} = 0; + $self->{isPublication} = 0; + $self->{isPage} = 0; + $self->{isISBN} = 0; + $self->{isFormat} = 0; + $self->{isDescription} = 0; + $self->{isResult} = 0; + $self->{linkCount} = 0; + $self->{divInfo} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + my $inicio_res = index($html,'<div id="esquerdaPesquisa" style="display:none;">esquerdaPesquisa</div>'); + if ( $inicio_res >= 0 ) + { + $html = substr($html, $inicio_res); + } + my $fim_res = index($html,'<div id="direitaPesquisa" style="display:none;">direitaPesquisa</div>'); + if ( $fim_res >= 0 ) + { + $html = substr($html, 0, $fim_res); + } + $html = '' if ($inicio_res < 0); + } + else + { + + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + $word =~ s|\s+|\+|; + + if ($self->{searchField} eq 'isbn') + { + return "http://www.livrariasaraiva.com.br/pesquisaweb/pesquisaweb.dll/pesquisa?ORDEMN2=E&ESTRUTN1=0301&PALAVRASN1=".$word; + } + else + { + return "http://www.livrariasaraiva.com.br/pesquisaweb/pesquisaweb.dll/pesquisa?ORDEMN2=E&ESTRUTN1=0301&PALAVRASN1=".$word; + } + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return "http://www.livrariasaraiva.com.br".$url; + } + + sub getName + { + return "Saraiva"; + } + + sub getCharset + { + my $self = shift; + return "ISO-8859-1"; + } + + sub getAuthor + { + return 'nirev'; + } + + sub getLang + { + return 'PT'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCbooksAdlibrisCommon.pm b/lib/gcstar/GCPlugins/GCbooks/GCbooksAdlibrisCommon.pm new file mode 100644 index 0000000..ee556dc --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCbooksAdlibrisCommon.pm @@ -0,0 +1,331 @@ +package GCPlugins::GCbooks::GCbooksAdlibrisCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksCommon; + +{ + package GCPlugins::GCbooks::GCbooksAdlibrisPluginsBase; + + use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); + use URI::Escape; + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + $self->{inside}->{$tagname}++; + + if ($self->{parsingList}) + { + if ( (($tagname eq 'div') && ($attr->{class} eq 'productTitleFormat')) + || (($tagname eq 'a') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_linkProductTitle')) + ) + { + $self->{isFound} = 1 ; + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = $self->{loadedUrl}; + } + elsif (($tagname eq 'a') && ($attr->{id} =~ m/_hlkTitle/i) && ($self->{isFound} eq '0')) + { + $self->{itemIdx}++; + $self->{itemsList}[$self->{itemIdx}]->{url} = "http://www.adlibris.com/" . $self->{isLang} . "/" . $attr->{href}; + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} =~ m/ctl00_main_frame_ctrlsearchhit_rptSearchHit_ctl/i) && ($attr->{id} =~ m/_Label2/i) && ($self->{isFound} eq '0')) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} =~ m/ctl00_main_frame_ctrlsearchhit_rptSearchHit_ctl/i) && ($attr->{id} =~ m/_Label4/i) && ($self->{isFound} eq '0')) + { + $self->{isFormat} = 1 ; + } + } + else + { + if (($tagname eq 'h1')) + { + $self->{isTitle} = 1 ; + } + elsif (($tagname eq 'li') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_liISBN13')) + { + $self->{isbnLevel} = 1 ; + } + elsif ($self->{isbnLevel} > 0) + { + if ($self->{isbnLevel} < 5) + { + $self->{isbnLevel}++ ; + } + else + { + $self->{isISBN} = 1 ; + $self->{isbnLevel} = 0 ; + } + } + elsif (($tagname eq 'a') && (($attr->{id} eq 'ctl00_main_frame_ctrlproduct_rptAuthor_ctl00_linkAuthor')) || ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_rptAuthor_ctl01_linkAuthor')) + { + $self->{isAuthor} = 1 ; + } + elsif (($tagname eq 'a') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_linkPublisher')) + { + $self->{isPublisher} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_lblPublished')) + { + $self->{isPublication} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_lblPages')) + { + $self->{isPages} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_lblLanguage')) + { + $self->{isLanguage} = 1 ; + } + elsif (($tagname eq 'span') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_lblFormat')) + { + $self->{isReliure} = 1 ; + } + elsif (($tagname eq 'div') && ($attr->{class} eq 'productDescription')) + { + $self->{isDescription} = 1 ; + } + elsif (($tagname eq 'img') && ($attr->{id} eq 'ctl00_main_frame_ctrlproduct_imgProduct_ProductImageNotLinked') && !($attr->{src} =~ m/\/noimage./i)) + { + $self->{curInfo}->{cover} = $attr->{src} ; + } + + } + } + + sub end + { + my ($self, $tagname) = @_; + + $self->{isFound} = 0 ; + $self->{inside}->{$tagname}--; + if (($self->{isDescription}) && ($tagname eq 'div')) + { + $self->{isDescription} = 0; + $self->{curInfo}->{description} =~ s/^Beskrivning://g ; + $self->{curInfo}->{description} =~ s/^Kuvaus://g ; + } + } + + sub text + { + my ($self, $origtext) = @_; + + if ($self->{parsingList}) + { + if ($self->{isTitle}) + { + $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + my @array = split(/&/,$origtext); + my $element; + foreach $element (@array) + { + my @nom_prenom = split(/,/,$element); + # Enleve les blancs en debut de chaine + $nom_prenom[0] =~ s/^\s+//; + $nom_prenom[1] =~ s/^\s+//; + # Enleve les blancs en fin de chaine + $nom_prenom[0] =~ s/\s$+//; + $nom_prenom[1] =~ s/\s$+//; + if ($self->{itemsList}[$self->{itemIdx}]->{authors} eq '') + { + $self->{itemsList}[$self->{itemIdx}]->{authors} = $nom_prenom[1] ." " . $nom_prenom[0]; + } + else + { + $self->{itemsList}[$self->{itemIdx}]->{authors} .= ", " . $nom_prenom[1] ." " . $nom_prenom[0]; + } + } + + $self->{isAuthor} = 0 ; + } + elsif ($self->{isFormat}) + { + $self->{itemsList}[$self->{itemIdx}]->{format} = $origtext; + $self->{isFormat} = 0 ; + } + } + else + { + # Enleve les blancs en debut de chaine + $origtext =~ s/^\s+//; + $origtext =~ s/\s+$//; + if ($self->{isTitle}) + { + $self->{curInfo}->{title} = $origtext; + $self->{isTitle} = 0 ; + } + elsif ($self->{isAuthor}) + { + $self->{curInfo}->{authors} .= $origtext; + $self->{curInfo}->{authors} .= ","; + $self->{isAuthor} = 0 ; + } + elsif ($self->{isISBN}) + { + $self->{curInfo}->{isbn} = $origtext; + $self->{curInfo}->{isbn} =~ s/\s//g; + $self->{isISBN} = 0 ; + } + elsif ($self->{isPublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{isPublisher} = 0 ; + } + elsif ($self->{isPublication}) + { + $self->{curInfo}->{publication} = $origtext; + $self->{curInfo}->{publication} =~ s/(\d\d\d\d)(\d\d)/01\/$2\/$1/g; + $self->{isPublication} = 0 ; + } + elsif ($self->{isPages}) + { + $self->{curInfo}->{pages} = $origtext; + $self->{isPages} = 0 ; + } + elsif ($self->{isLanguage}) + { + $self->{curInfo}->{language} = $origtext; + $self->{isLanguage} = 0 ; + } + elsif ($self->{isReliure}) + { + $self->{curInfo}->{format} = $origtext; + $self->{isReliure} = 0 ; + } + elsif ($self->{isDescription}) + { + $self->{curInfo}->{description} .= $origtext ; + } + + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{hasField} = { + title => 1, + authors => 1, + publication => 0, + format => 1, + edition => 0, + }; + + $self->{isLang} = 'se'; + $self->{isFound} = 0; + $self->{isTitle} = 0; + $self->{isAuthor} = 0; + $self->{isFormat} = 0; + $self->{isPublisher} = 0; + $self->{isISBN} = 0; + $self->{isPublicationAndPages} = 0; + $self->{isLangAndReliure} = 0; + $self->{isDescription} = 0; + + return $self; + } + + sub preProcess + { + my ($self, $html) = @_; + + if ($self->{parsingList}) + { + } + else + { + $html =~ s|<li>|\n* |g; + $html =~ s|<br>|\n|g; + $html =~ s|<br />|\n|g; + $html =~ s|<p>|\n|g; + $html =~ s|<b>||g; + $html =~ s|</b>||g; + $html =~ s|<i>||g; + $html =~ s|</i>||g; + } + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + if ($self->{searchField} eq 'isbn') + { + return "http://www.adlibris.com/" . $self->{isLang} . "/searchresult.aspx?isbn=" . $word. "&%3BfromProduct=true"; + } + else + { + return "http://www.adlibris.com/" . $self->{isLang} . "/searchresult.aspx?title=" . $word. "&%3BfromProduct=true"; + } + + } + + sub getItemUrl + { + my ($self, $url) = @_; + + return $url; + } + + sub getName + { + return "Adlibris"; + } + + sub getAuthor + { + return 'TPF'; + } + + sub getLang + { + return 'SW'; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + +} + +1; diff --git a/lib/gcstar/GCPlugins/GCbooks/GCbooksAmazonCommon.pm b/lib/gcstar/GCPlugins/GCbooks/GCbooksAmazonCommon.pm new file mode 100644 index 0000000..bc75766 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCbooksAmazonCommon.pm @@ -0,0 +1,65 @@ +package GCPlugins::GCbooks::GCbooksAmazonCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCbooks::GCbooksCommon; +use GCPlugins::GCstar::GCAmazonCommon; + +{ + package GCPlugins::GCbooks::GCbooksAmazonPluginsBase; + + use base ('GCPlugins::GCbooks::GCbooksPluginsBase', 'GCPlugins::GCstar::GCPluginAmazonCommon'); + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + + $self->{codeField} = 'isbn'; + + return $self; + } + + sub isEAN + { + my ($self, $value) = @_; + + return $value =~ /^978/; + } + + sub getSearchFieldsArray + { + return ['isbn', 'title']; + } + + sub getEanField + { + return 'isbn'; + } +} + +1;
\ No newline at end of file diff --git a/lib/gcstar/GCPlugins/GCbooks/GCbooksCommon.pm b/lib/gcstar/GCPlugins/GCbooks/GCbooksCommon.pm new file mode 100644 index 0000000..a74e35a --- /dev/null +++ b/lib/gcstar/GCPlugins/GCbooks/GCbooksCommon.pm @@ -0,0 +1,61 @@ +package GCPlugins::GCbooks::GCbooksCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCPluginsBase; + +{ + package GCPlugins::GCbooks::GCbooksPluginsBase; + + use base qw(GCPluginParser); + use HTML::Entities; + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + $self->{searchType} = 'books'; + return $self; + } + + sub getSearchFieldsArray + { + return ['title']; + } + + sub getEanField + { + my $self = shift; + my $fields = $self->getSearchFieldsArray; + return 'isbn' + if $fields->[0] eq 'isbn'; + return undef; + } + +} + +1;
\ No newline at end of file |