package GCPlugins::GCbooks::GCBokkilden; ################################################### # # Copyright 2005-2010 Christian Jodar # # This file is part of GCstar. # # GCstar is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # GCstar is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with GCstar; if not, write to the Free Software # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA # ################################################### use strict; use utf8; use GCPlugins::GCbooks::GCbooksCommon; { package GCPlugins::GCbooks::GCPluginBokkilden; use base qw(GCPlugins::GCbooks::GCbooksPluginsBase); sub start { my ($self, $tagname, $attr, $attrseq, $origtext) = @_; $self->{inside}->{$tagname}++; if ($self->{parsingEnded}) { if ($self->{itemIdx} < 0) { $self->{itemIdx} = 0; $self->{itemsList}[0]->{url} = $self->{loadedUrl}; } return; } if ($self->{parsingList}) { if (($tagname eq 'h1') && ($attr->{class} eq 'normal')) { $self->{isBook} = 1; $self->{itemIdx}++; } elsif ($self->{isBook}) { if ($tagname eq 'a') { if (($attr->{href} =~ /produkt\.do/) && (!$self->{itemsList}[$self->{itemIdx}]->{title})) { $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; $self->{isTitle} = 1; } elsif ($attr->{href} =~ /sok\.do\?enkeltsok/) { $self->{isAuthor} = 1; } } } } else { if ($tagname eq 'table') { $self->{isBook} = 1 if ($attr->{class} eq 'bokfaktatabell'); } elsif ($tagname eq 'div') { $self->{isCover} = 1 if ($attr->{class} eq 'img-ilus') && ($attr->{style} eq 'width:120px;'); $self->{is} = 'description' if $attr->{id} eq 'omtale-hidden'; } elsif ($tagname eq 'img') { if ($self->{isCover}) { $self->{curInfo}->{cover} = 'http://www.bokkilden.no/SamboWeb/' . $attr->{src}; $self->{isCover} = 0; } } elsif ($tagname eq 'h1') { $self->{h1Style} = $attr->{style}; } } } sub end { my ($self, $tagname) = @_; $self->{inside}->{$tagname}--; } sub text { my ($self, $origtext) = @_; return if ($self->{parsingEnded}); if ($self->{parsingList}) { if ($self->{inside}->{title}) { $self->{parsingEnded} = 1 if $origtext !~ /S..?k p..?/; } elsif ($self->{isTitle}) { $self->{itemsList}[$self->{itemIdx}]->{title} = $origtext; $self->{isTitle} = 0; } elsif ($self->{isAuthor}) { $self->{itemsList}[$self->{itemIdx}]->{authors} .= ',' if $self->{itemsList}[$self->{itemIdx}]->{authors}; $self->{itemsList}[$self->{itemIdx}]->{authors} .= $origtext; $self->{isAuthor} = 0; } elsif ($self->{isBook}) { if ($origtext =~ / \| /) { $origtext =~ /(\d{4})/; $self->{itemsList}[$self->{itemIdx}]->{publication} = $1; $self->{isBook} = 0; } } } else { if ($self->{is}) { $origtext =~ s/^\s*//; $self->{curInfo}->{$self->{is}} = $origtext; if ($self->{is} eq 'genre') { $self->{curInfo}->{genre} =~ s/;\s*/,/g; } elsif ($self->{is} eq 'pages') { $self->{curInfo}->{pages} =~ s/[^0-9]//g; } $self->{is} = ''; } elsif ($self->{inside}->{title}) { $self->{tmpTitle} = $origtext; } elsif ($self->{inside}->{h1}) { if (!$self->{curInfo}->{title}) { if ($self->{h1Style}) { $self->{tmpTitle} =~ /\s*(.*?) av (.*?) ยป/gim; $self->{curInfo}->{title} = $1; $self->{curInfo}->{authors} = $2; } else { $self->{curInfo}->{title} = $origtext; } } } elsif ($self->{inside}->{author}) { $self->{curInfo}->{authors} .= ',' if $self->{curInfo}->{authors}; $self->{curInfo}->{authors} .= $origtext; } if ($self->{inside}->{translator}) { $self->{curInfo}->{translator} .= ', ' if $self->{curInfo}->{translator}; $self->{curInfo}->{translator} .= $origtext; } elsif (($self->{isBook}) && $self->{inside}->{b}) { $self->{is} = ($origtext eq 'Utgitt: ') ? 'publication' : ($origtext eq 'Forlag: ') ? 'publisher' : ($origtext eq 'Innb.: ') ? 'format' : ($origtext =~ /Spr..?k:/) ? 'language' : ($origtext eq 'Sider: ') ? 'pages' : ($origtext eq 'ISBN: ') ? 'isbn' : ($origtext eq 'Utgave: ') ? 'edition' : ($origtext eq 'Genre:') ? 'genre' : ''; } } } sub new { my $proto = shift; my $class = ref($proto) || $proto; my $self = $class->SUPER::new(); bless ($self, $class); $self->{hasField} = { title => 1, authors => 1, publication => 1, format => 0, edition => 0, }; return $self; } sub preProcess { my ($self, $html) = @_; $self->{parsingEnded} = 0; $self->{isBook} = 0; if ($self->{parsingList}) { $self->{isTitle} = 0; $self->{isAuthor} = 0; } else { $self->{is} = ''; $self->{isCover} = 0; $html =~ s|(.*?)|$1|gim; $html =~ s|([^<]*)|$1|gim; #" $html =~ s|(.*?)|$1|gim; } return $html; } sub getSearchUrl { my ($self, $word) = @_; return "http://www.bokkilden.no/SamboWeb/sok.do?rom=MP&enkeltsok=$word&innsnevre=ja"; } sub getItemUrl { my ($self, $url) = @_; return "http://www.bokkilden.no/SamboWeb/$url" if $url !~ m|http://www.bokkilden.no/|; return $url; } sub getCharset { my $self = shift; return 'UTF-8'; } sub getSearchFieldsArray { return ['isbn', 'title']; } sub getName { return 'Bokkilden'; } sub getLang { return 'NO'; } sub getAuthor { return 'Tian'; } } 1;