package GCPlugins::GCcomics::GCbedetheque;
###################################################
#
# Copyright 2005-2010 Christian Jodar
#
# This file is part of GCstar.
#
# GCstar is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# GCstar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCstar; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
#
###################################################
use strict;
use utf8;
use GCPlugins::GCcomics::GCcomicsCommon;
{
package GCPlugins::GCcomics::GCPluginbedetheque;
use LWP::Simple qw($ua);
use base qw(GCPlugins::GCcomics::GCcomicsPluginsBase);
sub getSearchUrl
{
my ( $self, $word ) = @_;
if ($self->{searchField} eq 'series')
{
return "http://www.bedetheque.com/search/albums?RechSerie=$word";
}
elsif ($self->{searchField} eq 'writer')
{
return "http://www.bedetheque.com/search/albums?RechAuteur=$word";
}
else
{
return '';
}
#return "http://www.bedetheque.com/index.php?R=1&RechTexte=$word";
}
sub getSearchFieldsArray
{
return ['series', 'writer'];
}
sub getItemUrl
{
my ( $self, $url ) = @_;
my @array = split( /#/, $url );
$self->{site_internal_id} = $array[1];
# print "getItemUrl $url\n\n";
return $url if $url =~ /^http:/;
return "http://www.bedetheque.com/" . $url;
}
sub getNumberPasses
{
return 1;
}
sub getName
{
return "Bedetheque";
}
sub getAuthor
{
return 'Mckmonster';
}
sub getLang
{
return 'FR';
}
sub getSearchCharset
{
my $self = shift;
# Need urls to be double character encoded
return "utf8";
}
sub new
{
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = $class->SUPER::new();
bless( $self, $class );
$self->{hasField} = {
series => 1,
title => 1,
volume => 1,
};
$self->{isResultsTable} = 0;
$self->{isCover} = 0;
$self->{itemIdx} = 0;
$self->{last_cover} = "";
$self->{site_internal_id} = "";
$self->{serie} = "";
$self->{synopsis} = "";
$self->{current_field} = "";
return $self;
}
sub preProcess
{
my ( $self, $html ) = @_;
$self->{parsingEnded} = 0;
$html =~ s/\s+/ /g;
$html =~ s/\r?\n//g;
if ( $self->{parsingList} )
{
if ( $html =~ m/(\d+\salbum\(s\).+)/ ) {
#keep only albums, no series or objects
$html = $1;
$self->{alternative} = 0;
} elsif ( $html =~ m/(
.+)/ ) {
$html = $1;
$self->{alternative} = 1;
}
}
else
{
# print $html ;
$html =~ m/(
.+)/;
$html = $1;
$self->{isResultsTable} = 0;
$self->{parsingEnded} = 0;
$self->{isCover} = 0;
$self->{isTabs} = 0;
$self->{isLabel} = 0;
$self->{itemIdx}++;
#
$self->{doneColourist} = 0 ;
$self->{doneCost} = 0 ;
$self->{doneFormat} = 0 ;
$self->{doneIllustrator} = 0 ;
$self->{doneISBN} = 0 ;
$self->{doneNumberboards} = 0 ;
$self->{donePublishdate} = 0 ;
$self->{donePublishdate} = 0 ;
$self->{donePublisher} = 0 ;
$self->{doneSerie} = 0 ;
$self->{doneSynopsis} = 0 ;
$self->{doneTitle} = 0 ;
$self->{doneVolume} = 0 ;
$self->{doneWriter} = 0 ;
}
return $html;
}
sub start
{
my ( $self, $tagname, $attr, $attrseq, $origtext ) = @_;
return if ( $self->{parsingEnded} );
if ( $self->{parsingList} )
{
if ( !defined ($self->{alternative}) || (!$self->{alternative}) )
{
if ( ( $tagname eq "a" ) && ( $attr->{href} =~ m/album-/ ) )
{
$self->{isCollection} = 1;
$self->{itemIdx}++;
my $searchUrl = substr($attr->{href},0,index($attr->{href},".")).substr($attr->{href},index($attr->{href},"."));
$self->{itemsList}[$self->{itemIdx}]->{url} = $searchUrl;
$self->{itemsList}[$self->{itemIdx}]->{title} = $attr->{title};
#$self->{itemsList}[ $self->{itemIdx} ]->{url} =
# "http://www.bedetheque.com/" . $attr->{href};
}
elsif ( ( $tagname eq "ul" ) && ( $attr->{class} eq "search-list" ) ) {
$self->{inTable} = 1;
}
elsif ( ($self->{inTable}) && ( $tagname eq "li" ) ) {
$self->{isVolume} = 1;
}
elsif ( ($self->{inTable}) && ( $tagname eq "a" ) && ( $attr->{title} eq "tooltip" ) ) {
$self->{itemsList}[$self->{itemIdx}]->{image} = $attr->{rel};
$self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
}
elsif ( ($self->{isVolume}) && ( $tagname eq "span" ) && ( $attr->{class} eq "titre" ) ) {
$self->{isTitle} = 1;
}
elsif ( ($self->{isVolume}) && ( $tagname eq "span" ) && ( $attr->{class} eq "serie" ) ) {
$self->{isSerie} = 1;
}
elsif ( ($self->{isVolume}) && ( $tagname eq "span" ) && ( $attr->{class} eq "num" ) ) {
$self->{isNumber} = 1;
}
} else {
if ( ( $tagname eq "ul" ) && ( $attr->{class} eq "search-list" ) ) {
$self->{inTable} = 1;
}
elsif ( ($self->{inTable}) && ( $tagname eq "li" ) ) {
$self->{itemIdx}++;
$self->{isVolume} = 1;
}
elsif ( ($self->{inTable}) && ( $tagname eq "a" ) && ( $attr->{title} eq "tooltip" ) ) {
$self->{itemsList}[$self->{itemIdx}]->{image} = $attr->{rel};
$self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
}
elsif ( ($self->{isVolume}) && ( $tagname eq "span" ) && ( $attr->{class} eq "titre" ) ) {
$self->{isTitle} = 1;
}
elsif ( ($self->{isVolume}) && ( $tagname eq "span" ) && ( $attr->{class} eq "serie" ) ) {
$self->{isSerie} = 1;
}
elsif ( ( $self->{isSynopsis} ) && ( $tagname eq "br" ) && ( $self->{startSynopsis} ) ) {
# This is a stop! for br ;-) and complementary of the p in the end section
# should be ( ( $tagname eq "p" ) || ( $tagname eq "br" ) )
$self->{isSynopsis} = 0;
$self->{startSynopsis} = 0;
$self->{parsingEnded} = 1;
}
}
}
else
{
if ( ( $self->{isCover} == 0 ) && ( $tagname eq "a" ) && ( $attr->{href} =~ m/Couvertures\/.*\.[jJ][pP][gG]/ ) ) {
$self->{curInfo}->{image} = $attr->{href};
$self->{isCover} = 1;
}
elsif ( $tagname eq "label" ) {
$self->{isLabel} = 1;
}
elsif ( ( $tagname eq "ul" ) && ( $attr->{class} eq "tabs-album" ) && ( ! $self->{doneSerie} ) ) {
$self->{isTabs} = 1;
}
elsif ( ( $tagname eq "span" ) && ( $attr->{itemprop} eq "name" ) && ( ! $self->{doneTitle} ) ) {
$self->{isTitle} = 1;
}
elsif ( ( $tagname eq "span" ) && ( $attr->{class} eq "titre-rubrique" ) && ( ! $self->{doneSerie} ) && ( $self->{isTabs} ) ) {
$self->{isSerie} = 1;
}
elsif ( ( $tagname eq "span" ) && ( $attr->{itemprop} eq "author" ) && ( ! $self->{doneWriter} ) ) {
$self->{isWriter} = 1;
}
elsif ( ( $tagname eq "span" ) && ( $attr->{itemprop} eq "illustrator" ) && ( ! $self->{doneIllustrator} ) ) {
$self->{isIllustrator} = 1;
}
elsif ( ( $tagname eq "span" ) && ( $attr->{itemprop} eq "illustrator" ) && ( ! $self->{doneColourist} ) && ( $self->{doneIllustrator} ) ) {
$self->{isColourist} = 1;
}
elsif ( ( $tagname eq "span" ) && ( $attr->{itemprop} eq "publisher" ) && ( ! $self->{donePublisher} ) ) {
$self->{isPublisher} = 1;
}
elsif ( ( $tagname eq "span" ) && ( $attr->{itemprop} eq "isbn" ) && ( ! $self->{doneISBN} ) ) {
$self->{isISBN} = 1;
}
elsif ( ( $tagname eq "span" ) && ( $attr->{itemprop} eq "numberOfPages" ) && ( ! $self->{doneNumberboards} ) ) {
$self->{isNumberboards} = 1;
}
elsif ( ( $tagname eq "span" ) && ( $attr->{itemprop} eq "description" ) && ( ! $self->{doneSynopsis} ) ) {
$self->{isSynopsis} = 1;
}
elsif ( ( $tagname eq "ul" ) && ( $attr->{class} eq "liste-albums" ) ) {
$self->{doneColourist} = 1; # To avoid getting mess with illustrator
}
# elsif ( ( $tagname eq "a" ) && ( $attr->{class} eq "titre eo" ) ) {
# if ( $attr->{title} =~ m/.+\s-(\d+)-\s.+/ ) {
# $self->{curInfo}->{volume} = $1;
# }
# }
}
}
sub text
{
my ( $self, $origtext ) = @_;
return if ( $origtext eq " " );
return if ( $self->{parsingEnded} );
if ( $self->{parsingList} )
{
if ( !defined ($self->{alternative}) || (!$self->{alternative}) ) {
if ( $self->{isSerie} == 1)
{
$self->{itemsList}[ $self->{itemIdx} ]->{series} = $origtext;
$self->{isSerie} = 0;
}
elsif ( $self->{isTitle} == 1)
{
$self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext;
$self->{isTitle} = 0;
}
elsif ( $self->{isNumber} == 1)
{
$self->{itemsList}[ $self->{itemIdx} ]->{volume} = $origtext;
$self->{itemsList}[ $self->{itemIdx} ]->{volume} =~ s/#//;
$self->{isNumber} = 0;
}
else
{
if ($self->{isCollection} == 1)
{
#sometimes the field is "-vol-title", sometimes "--vol-title"
$origtext =~ s/-+/-/;
if ( $origtext =~ m/(.+)\s-(\d+)-\s(.+)/ ) {
$self->{itemsList}[ $self->{itemIdx} ]->{series} = $1;
$self->{itemsList}[ $self->{itemIdx} ]->{volume} = $2;
} elsif ( $origtext =~ /-/ ){
my @fields = split( /-/, $origtext );
$self->{itemsList}[ $self->{itemIdx} ]->{series} = $fields[0];
$self->{itemsList}[ $self->{itemIdx} ]->{volume} = $fields[1];
}
$self->{isCollection} = 0;
}
}
} else {
if ( ( $self->{inTable} ) && ( $self->{isTitle} ) ) {
$self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext;
} elsif ( ( $self->{inTable} ) && ( $self->{isVolume} ) ) {
$self->{itemsList}[ $self->{itemIdx} ]->{volume} = $origtext;
}
}
}
else
{
if ( $self->{isResultsTable} == 1 )
{
$origtext=~s/:\s+/:/;
my %td_fields_map = (
"Identifiant :" => '',
"Scénario :" => 'writer',
"Dessin :" => 'illustrator',
"Couleurs :" => 'colourist',
"Dépot légal :" => 'publishdate',
"Achevé impr. :" => 'printdate ',
"Estimation :" => 'cost',
"Editeur :" => 'publisher',
"Collection : " => 'collection',
"Taille :" => 'format',
"ISBN :" => 'isbn',
"Planches :" => 'numberboards'
);
if ( ( $self->{openlabel} ) && ( exists $td_fields_map{$origtext} ) ) {
$self->{current_field} = $td_fields_map{$origtext};
}
elsif ( defined ( $self->{current_field} ) && ( $self->{current_field} !~ /^$/ ) )
{
$origtext=~s/ / /g;
$origtext=~s/\s+$//g;
$self->{curInfo}->{$self->{current_field}} = $origtext;
$self->{current_field} = "";
}
}
elsif ( $self->{isTitle} ) {
$self->{curInfo}->{title} = $origtext;
$self->{isTitle} = 0 ;
$self->{doneTitle} = 1 ;
}
elsif ( $self->{isSerie} ) {
$self->{curInfo}->{series} = $origtext;
$self->{curInfo}->{series} =~s/^\s+//;
$self->{isSerie} = 0 ;
$self->{doneSerie} = 1 ;
$self->{isTabs} = 0 ;
}
elsif ( $self->{isWriter} ) {
$self->{curInfo}->{writer} = $origtext;
$self->{isWriter} = 0 ;
$self->{doneWriter} = 1 ;
}
elsif ( $self->{isIllustrator} ) {
$self->{curInfo}->{illustrator} = $origtext;
$self->{isIllustrator} = 0 ;
$self->{doneIllustrator} = 1 ;
}
elsif ( $self->{isColourist} ) {
$self->{curInfo}->{colourist} = $origtext;
$self->{isColourist} = 0 ;
$self->{doneColourist} = 1 ;
}
elsif ( $self->{isPublisher} ) {
$self->{curInfo}->{publisher} = $origtext;
$self->{isPublisher} = 0 ;
$self->{donePublisher} = 1 ;
}
elsif ( $self->{isISBN} ) {
$self->{curInfo}->{isbn} = $origtext;
$self->{isISBN} = 0 ;
$self->{doneISBN} = 1 ;
}
elsif ( $self->{isNumberboards} ) {
$self->{curInfo}->{numberboards} = $origtext;
$self->{isNumberboards} = 0 ;
$self->{doneNumberboards} = 1 ;
}
elsif ( $self->{isVolume} ) {
$self->{curInfo}->{volume} = $origtext;
$self->{isVolume} = 0 ;
$self->{doneVolume} = 1 ;
}
elsif ( ( $self->{isLabel} ) && ( $origtext =~ m/Dépot légal/ ) && ( ! $self->{donePublishdate} ) ) {
$self->{isPublishdate} = 1 ;
$self->{isLabel} = 0 ;
}
elsif ( $self->{isPublishdate} ) {
$self->{curInfo}->{publishdate} = $origtext;
$self->{isPublishdate} = 0 ;
$self->{donePublishdate} = 1 ;
}
elsif ( ( $self->{isLabel} ) && ( $origtext =~ m/Estimation/ ) && ( ! $self->{doneCost} ) ) {
$self->{isCost} = 1 ;
$self->{isLabel} = 0 ;
}
elsif ( $self->{isCost} ) {
$self->{curInfo}->{cost} = $origtext;
$self->{isCost} = 0 ;
$self->{doneCost} = 1 ;
}
elsif ( ( $self->{isLabel} ) && ( $origtext =~ m/Format/ ) && ( ! $self->{doneFormat} ) ) {
$self->{isFormat} = 1 ;
$self->{isLabel} = 0 ;
}
elsif ( $self->{isFormat} ) {
$self->{curInfo}->{format} = $origtext;
$self->{isFormat} = 0 ;
$self->{doneFormat} = 1 ;
}
elsif ( $self->{isSynopsis} ) {
$self->{curInfo}->{synopsis} = $origtext;
$self->{curInfo}->{synopsis} =~ s/^(\s)*//;
$self->{curInfo}->{synopsis} =~ s/(\s)*$//;
$self->{isSynopsis} = 0 ;
$self->{doneSynopsis} = 1 ;
}
}
}
sub end
{
my ( $self, $tagname ) = @_;
return if ( $self->{parsingEnded} );
if ( $self->{parsingList} )
{
if ( !defined ($self->{alternative}) || (!$self->{alternative}) ) {
if ( ( $tagname eq "i" ) && $self->{isCollection} == 1)
{
#end of collection, next field is title
$self->{isTitle} = 1;
$self->{isCollection} = 0;
}
} else {
if ( ( $self->{inTable} ) && ( $tagname eq "span" ) ) {
$self->{isTitle} = 0;
} elsif ( ( $self->{inTable} ) && ( $tagname eq "li" ) ) {
$self->{isVolume} = 0;
}
}
}
else
{
if ( ( $tagname eq "ul" ) && $self->{isResultsTable} == 1 )
{
$self->{isIssue} = 0;
$self->{isResultsTable} = 0;
}
elsif ( $tagname eq "label" ) {
$self->{openlabel} = 0;
$self->{isLabel} = 0;
}
elsif ( $tagname eq "span" ) {
$self->{isColourist} = 0;
$self->{isIllustrator} = 0;
$self->{isISBN} = 0;
$self->{isNumberboards} = 0;
$self->{isPublisher} = 0;
$self->{isSerie} = 0;
$self->{isSynopsis} = 0;
$self->{isTitle} = 0;
$self->{isWriter} = 0;
}
elsif ( ( $self->{isSynopsis} ) && ( ( $tagname eq "p" ) || ( $tagname eq "br" ) ) && ( $self->{startSynopsis} ) ) {
$self->{isSynopsis} = 0;
$self->{startSynopsis} = 0;
$self->{parsingEnded} = 1;
}
}
}
}
1;