summaryrefslogtreecommitdiff
path: root/lib/gcstar/GCPlugins/GCcomics
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gcstar/GCPlugins/GCcomics')
-rw-r--r--lib/gcstar/GCPlugins/GCcomics/GCbedetheque.pm398
-rw-r--r--lib/gcstar/GCPlugins/GCcomics/GCcomicbookdb.pm546
-rw-r--r--lib/gcstar/GCPlugins/GCcomics/GCcomicsCommon.pm49
-rw-r--r--lib/gcstar/GCPlugins/GCcomics/GCmangasanctuary.pm503
4 files changed, 1496 insertions, 0 deletions
diff --git a/lib/gcstar/GCPlugins/GCcomics/GCbedetheque.pm b/lib/gcstar/GCPlugins/GCcomics/GCbedetheque.pm
new file mode 100644
index 0000000..457194a
--- /dev/null
+++ b/lib/gcstar/GCPlugins/GCcomics/GCbedetheque.pm
@@ -0,0 +1,398 @@
+package GCPlugins::GCcomics::GCbedetheque;
+
+###################################################
+#
+# Copyright 2005-2010 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCcomics::GCcomicsCommon;
+
+{
+
+ package GCPlugins::GCcomics::GCPluginbedetheque;
+
+ use LWP::Simple qw($ua);
+
+ use base qw(GCPlugins::GCcomics::GCcomicsPluginsBase);
+ sub getSearchUrl
+ {
+ my ( $self, $word ) = @_;
+ if ($self->{searchField} eq 'series')
+ {
+ return "http://www.bedetheque.com/index.php?R=1&RechSerie=$word";
+ }
+ elsif ($self->{searchField} eq 'writer')
+ {
+ return "http://www.bedetheque.com/index.php?R=1&RechAuteur=$word";
+ }
+ else
+ {
+ return '';
+ }
+
+ #return "http://www.bedetheque.com/index.php?R=1&RechTexte=$word";
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['series', 'writer'];
+ }
+
+ sub getItemUrl
+ {
+ my ( $self, $url ) = @_;
+ my @array = split( /#/, $url );
+ $self->{site_internal_id} = $array[1];
+
+ return $url if $url =~ /^http:/;
+ return "http://www.bedetheque.com/" . $url;
+ }
+
+ sub getNumberPasses
+ {
+ return 1;
+ }
+
+ sub getName
+ {
+ return "Bedetheque";
+ }
+
+ sub getAuthor
+ {
+ return 'Mckmonster';
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+
+ sub getSearchCharset
+ {
+ my $self = shift;
+
+ # Need urls to be double character encoded
+ return "utf8";
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless( $self, $class );
+
+ $self->{hasField} = {
+ series => 1,
+ title => 1,
+ volume => 1,
+ };
+
+ $self->{isResultsTable} = 0;
+ $self->{isCover} = 0;
+ $self->{itemIdx} = 0;
+ $self->{last_cover} = "";
+ $self->{site_internal_id} = "";
+ $self->{serie} = "";
+ $self->{synopsis} = "";
+ $self->{current_field} = "";
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ( $self, $html ) = @_;
+
+ $self->{parsingEnded} = 0;
+ $html =~ s/\s+/ /g;
+ $html =~ s/\r?\n//g;
+
+ if ( $self->{parsingList} )
+ {
+ if ( $html =~ m/(\d+\salbum\(s\).+)/ ) {
+
+ #keep only albums, no series or objects
+ $html = $1;
+ $self->{alternative} = 0;
+ } elsif ( $html =~ m/(<div id="albums_table">.+)/ ) {
+ $html = $1;
+ $self->{alternative} = 1;
+ }
+ }
+ else
+ {
+ $html =~ m/(<div class="box main reeditions">.+)/;
+
+ #$html =~ m/(<div class="album.+)/;
+ $html = $1;
+ $self->{isResultsTable} = 0;
+ $self->{parsingEnded} = 0;
+ $self->{isCover} = 0;
+ $self->{itemIdx}++;;
+ }
+
+ return $html;
+ }
+
+ sub start
+ {
+ my ( $self, $tagname, $attr, $attrseq, $origtext ) = @_;
+
+ return if ( $self->{parsingEnded} );
+
+ if ( $self->{parsingList} )
+ {
+ if ( !defined ($self->{alternative}) || (!$self->{alternative}) )
+ {
+ if ( ( $tagname eq "a" ) && ( $attr->{href} =~ m/album-/ ) )
+ {
+ $self->{isCollection} = 1;
+ $self->{itemIdx}++;
+
+ my $searchUrl = substr($attr->{href},0,index($attr->{href},".")).substr($attr->{href},index($attr->{href},"."));
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $searchUrl;
+ $self->{itemsList}[$self->{itemIdx}]->{title} = $attr->{title};
+
+ #$self->{itemsList}[ $self->{itemIdx} ]->{url} =
+ # "http://www.bedetheque.com/" . $attr->{href};
+ }
+ elsif ( $tagname eq "i" )
+ {
+ $self->{isSerie} = 1;
+ }
+ } else {
+ if ( ( $tagname eq "table" ) && ( $attr->{id} eq "albums_serie" ) ) {
+ $self->{inTable} = 1;
+ }
+ elsif ( ($self->{inTable}) && ( $tagname eq "td" ) && ( $attr->{class} eq "num" ) ) {
+ $self->{itemIdx}++;
+ $self->{isVolume} = 1;
+ }
+ elsif ( ($self->{inTable}) && ( $tagname eq "a" ) && ( $attr->{href} =~ m/serie-/ ) ) {
+ $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href};
+ $self->{isTitle} = 1;
+ }
+ elsif ( ( $self->{isSynopsis} ) && ( $tagname eq "br" ) && ( $self->{startSynopsis} ) ) {
+
+ # This is a stop! for br ;-) and complementary of the p in the end section
+ # should be ( ( $tagname eq "p" ) || ( $tagname eq "br" ) )
+ $self->{isSynopsis} = 0;
+ $self->{startSynopsis} = 0;
+ $self->{parsingEnded} = 1;
+ }
+ }
+ }
+ else
+ {
+ if ( $tagname eq "title")
+ {
+ $self->{isIssue} = 1;
+ $self->{isTitle} = 1;
+ }
+
+ if ( ( $self->{isCover} == 0 ) && ( $tagname eq "a" ) && ( $attr->{href} =~ m/Couvertures\/.*\.[jJ][pP][gG]/ ) )
+ {
+ $self->{curInfo}->{image} = 'http://www.bedetheque.com/' . $attr->{href};
+ $self->{isCover} = 1;
+ }
+ elsif ( ( $tagname eq "div") && ( $attr->{class} eq "titre" ) ) {
+ $self->{isVolume} = 1;
+ }
+ elsif ( ( $tagname eq "ul") && ( $attr->{class} eq "infos" ) ) {
+ $self->{isResultsTable} = 1;
+ }
+ elsif ( ( $self->{isResultsTable} ) && ( $tagname eq "label" ) ) {
+ $self->{current_field} = '';
+ $self->{openlabel} = 1;
+ }
+ elsif ( ( $tagname eq "div" ) && ( $attr->{class} eq "title" ) && ( !defined( $self->{curInfo}->{title} ) || ( $self->{curInfo}->{title} =~ /^$/ ) ) ) {
+ $self->{isTitle} = 1;
+ }
+ elsif ( ( $tagname eq "span" ) && ( $attr->{class} eq "type" ) ) {
+ $self->{isSerie} = 1;
+ }
+ elsif ( $tagname eq "em" ) {
+ $self->{isSynopsis} = 1;
+ }
+ elsif ( ( $tagname eq "a" ) && ( $attr->{class} eq "titre eo" ) ) {
+ if ( $attr->{title} =~ m/.+\s-(\d+)-\s.+/ ) {
+ $self->{curInfo}->{volume} = $1;
+ }
+ }
+ }
+ }
+
+ sub text
+ {
+ my ( $self, $origtext ) = @_;
+
+ return if ( $origtext eq " " );
+
+ return if ( $self->{parsingEnded} );
+
+ if ( $self->{parsingList} )
+ {
+ if ( !defined ($self->{alternative}) || (!$self->{alternative}) ) {
+ if ( $self->{isSerie} == 1)
+ {
+ $self->{itemsList}[ $self->{itemIdx} ]->{series} = $origtext;
+ $self->{isSerie} = 0;
+ }
+ else
+ {
+ if ($self->{isCollection} == 1)
+ {
+
+ #sometimes the field is "-vol-title", sometimes "--vol-title"
+ $origtext =~ s/-+/-/;
+ if ( $origtext =~ m/(.+)\s-(\d+)-\s(.+)/ ) {
+ $self->{itemsList}[ $self->{itemIdx} ]->{series} = $1;
+ $self->{itemsList}[ $self->{itemIdx} ]->{volume} = $2;
+ } elsif ( $origtext =~ /-/ ){
+ my @fields = split( /-/, $origtext );
+ $self->{itemsList}[ $self->{itemIdx} ]->{series} = $fields[0];
+ $self->{itemsList}[ $self->{itemIdx} ]->{volume} = $fields[1];
+ }
+ $self->{isCollection} = 0;
+ }
+ }
+ } else {
+ if ( ( $self->{inTable} ) && ( $self->{isTitle} ) ) {
+ $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext;
+ } elsif ( ( $self->{inTable} ) && ( $self->{isVolume} ) ) {
+ $self->{itemsList}[ $self->{itemIdx} ]->{volume} = $origtext;
+ }
+ }
+ }
+ else
+ {
+ if ( $self->{isResultsTable} == 1 )
+ {
+ $origtext=~s/:\s+/:/;
+ my %td_fields_map = (
+ "Identifiant :" => '',
+ "Scénario :" => 'writer',
+ "Dessin :" => 'illustrator',
+ "Couleurs :" => 'colourist',
+ "Dépot légal :" => 'publishdate',
+ "Achevé impr. :" => 'printdate ',
+ "Estimation :" => 'cost',
+ "Editeur :" => 'publisher',
+ "Collection : " => 'collection',
+ "Taille :" => 'format',
+ "ISBN :" => 'isbn',
+ "Planches :" => 'numberboards'
+ );
+
+ if ( ( $self->{openlabel} ) && ( exists $td_fields_map{$origtext} ) ) {
+ $self->{current_field} = $td_fields_map{$origtext};
+ }
+ elsif ( defined ( $self->{current_field} ) && ( $self->{current_field} !~ /^$/ ) )
+ {
+ $origtext=~s/&nbsp;/ /g;
+ $origtext=~s/\s+$//g;
+ $self->{curInfo}->{$self->{current_field}} = $origtext;
+ $self->{current_field} = "";
+ }
+ }
+ elsif ( $self->{isVolume} )
+ {
+ $self->{curInfo}->{volume} = $origtext;
+ $self->{isVolume} = 0 ;
+ }
+
+ if ( $self->{isTitle} )
+ {
+ $self->{curInfo}->{title} = $origtext;
+ }
+ elsif ( $self->{isSerie} ) {
+ $self->{curInfo}->{series} = $origtext;
+ $self->{curInfo}->{series} =~s/^\s+//;
+ }
+ elsif ( ( $self->{isSynopsis} ) && ( ( $origtext =~ /Résumé de l'album :/ ) || ( $origtext =~ /Résumé de la série :/ ) ) ) {
+ $self->{startSynopsis} = 1;
+ }
+ elsif ( ( $self->{isSynopsis} ) && ( $self->{startSynopsis} ) ) {
+ $self->{curInfo}->{synopsis} .= " ".$origtext;
+ $self->{curInfo}->{synopsis} =~ s/^(\s)*//;
+ $self->{curInfo}->{synopsis} =~ s/(\s)*$//;
+ }
+ }
+ }
+
+ sub end
+ {
+ my ( $self, $tagname ) = @_;
+
+ return if ( $self->{parsingEnded} );
+
+ if ( $self->{parsingList} )
+ {
+ if ( !defined ($self->{alternative}) || (!$self->{alternative}) ) {
+ if ( ( $tagname eq "i" ) && $self->{isCollection} == 1)
+ {
+
+ #end of collection, next field is title
+ $self->{isTitle} = 1;
+ $self->{isCollection} = 0;
+ }
+ } else {
+ if ( ( $self->{inTable} ) && ( $tagname eq "a" ) ) {
+ $self->{isTitle} = 0;
+ } elsif ( ( $self->{inTable} ) && ( $tagname eq "td" ) ) {
+ $self->{isVolume} = 0;
+ }
+ }
+ }
+ else
+ {
+ if ( ( $tagname eq "ul" ) && $self->{isResultsTable} == 1 )
+ {
+ $self->{isIssue} = 0;
+ $self->{isResultsTable} = 0;
+ }
+ elsif ( ( $self->{isResultsTable} ) && ( $tagname eq "label" ) ) {
+ $self->{openlabel} = 0;
+ }
+ elsif ( ( $self->{isTitle} ) && ( ( $tagname eq "div" ) || ( $tagname eq "h1" ) ) ) {
+ $self->{isTitle} = 0;
+ }
+ elsif ( ( $self->{isSerie} ) && ( $tagname eq "a" ) ) {
+ $self->{isSerie} = 0;
+ }
+ elsif ( ( $self->{isSynopsis} ) && ( $tagname eq "em" ) && ( !$self->{startSynopsis} ) ) {
+ $self->{isSynopsis} = 0;
+ $self->{startSynopsis} = 0;
+ }
+ elsif ( ( $self->{isSynopsis} ) && ( ( $tagname eq "p" ) || ( $tagname eq "br" ) ) && ( $self->{startSynopsis} ) ) {
+ $self->{isSynopsis} = 0;
+ $self->{startSynopsis} = 0;
+ $self->{parsingEnded} = 1;
+ }
+ }
+ }
+}
+
+1; \ No newline at end of file
diff --git a/lib/gcstar/GCPlugins/GCcomics/GCcomicbookdb.pm b/lib/gcstar/GCPlugins/GCcomics/GCcomicbookdb.pm
new file mode 100644
index 0000000..80b299b
--- /dev/null
+++ b/lib/gcstar/GCPlugins/GCcomics/GCcomicbookdb.pm
@@ -0,0 +1,546 @@
+package GCPlugins::GCcomics::GCcomicbookdb;
+
+###################################################
+#
+# Copyright 2005-2012 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCcomics::GCcomicsCommon;
+
+{
+
+ package GCPlugins::GCcomics::GCPlugincomicbookdb;
+
+ use LWP::Simple qw($ua);
+ use HTTP::Cookies;
+
+ use base qw(GCPlugins::GCcomics::GCcomicsPluginsBase);
+
+ sub start
+ {
+ my ($self, $tagname, $attr, $attrseq, $origtext) = @_;
+
+ if ($self->{pass} == 1)
+ {
+ # First pass, searching for series name
+ if ($tagname eq "h2")
+ {
+ $self->{isAtResults} = 1;
+ }
+ if ( ($tagname eq "a")
+ && ($self->{isAtResults})
+ && !($attr->{href} =~ m/ebay\.com/))
+ {
+ $self->{isCollection} = 1;
+ $self->{itemIdx}++;
+
+ $self->{itemsList}[ $self->{itemIdx} ]->{nextUrl} =
+ "http://www.comicbookdb.com/" . $attr->{href};
+ }
+ }
+ else
+ {
+ # Second pass, or fetching item info
+ if ($self->{parsingList})
+ {
+
+ if ( ($tagname eq "tbody")
+ && ($self->{isResultsTable})
+ && ($self->{isSpecialIssue} == 1))
+ {
+ $self->{isSpecialIssue} = 2;
+ }
+ # Parsing issue list
+ if (($tagname eq "a") && ($self->{isResultsTable}))
+ {
+ if ($attr->{href} =~ m/javascript/)
+ {
+ # Multiple editions of the one issue, need to be
+ # handled differently
+ $self->{isSpecialIssue} = 1;
+ }
+ elsif ($attr->{href} =~ m/storyarc.php/)
+ {
+ # Prevent story arcs from populating lists
+ }
+ elsif ($self->{isSpecialIssue} == 1)
+ {
+ $self->{resultsTableColumn}++;
+ if ($self->{resultsTableColumn} == 1)
+ {
+ $self->{isSpecialIssueNo} = 1;
+ $self->{isIssue} = 1;
+ $self->{itemIdx}++;
+ $self->{itemsList}[ $self->{itemIdx} ]->{url} =
+ "http://www.comicbookdb.com/" . $attr->{href};
+ }
+ elsif ($self->{resultsTableColumn} == 2)
+ {
+ $self->{isTitle} = 1;
+ $self->{isSpecialTitle} = 1;
+ }
+ }
+ elsif ($self->{isSpecialIssue} == 2)
+ {
+ $self->{itemIdx}++;
+ $self->{itemsList}[ $self->{itemIdx} ]->{url} =
+ "http://www.comicbookdb.com/" . $attr->{href};
+ $self->{isTitle} = 1;
+ }
+ else
+ {
+ $self->{resultsTableColumn}++;
+ if ($self->{resultsTableColumn} == 1)
+ {
+ $self->{isIssue} = 1;
+ $self->{itemIdx}++;
+ $self->{itemsList}[ $self->{itemIdx} ]->{url} =
+ "http://www.comicbookdb.com/" . $attr->{href};
+ }
+ elsif ($self->{resultsTableColumn} == 2)
+ {
+ $self->{isTitle} = 1;
+ }
+ }
+ }
+ }
+ else
+ {
+ # Fetching item info
+ if ( ($tagname eq "span")
+ && ((index $attr->{class}, "page_headline") > -1))
+ {
+ $self->{insideHeadline} = 1;
+ }
+ elsif (($tagname eq "a")
+ && ($self->{insideHeadline})
+ && ($attr->{href} =~ m/title.php/))
+ {
+ $self->{insideName} = 1;
+ }
+ elsif (($tagname eq "a")
+ && ($self->{insideHeadline})
+ && ($attr->{href} =~ m/issue_number.php/))
+ {
+ $self->{insideNumber} = 1;
+ }
+ elsif (($tagname eq "a") && ($self->{nextisWriters}))
+ {
+ $self->{insideWriters} = 1;
+ $self->{insidePencillers} = 0;
+ $self->{insideColorists} = 0;
+ }
+ elsif (($tagname eq "a") && ($self->{nextisPencillers}))
+ {
+ $self->{insideWriters} = 0;
+ $self->{insidePencillers} = 1;
+ $self->{insideColorists} = 0;
+ }
+ elsif (($tagname eq "a") && ($self->{nextisColorists}))
+ {
+ $self->{insideWriters} = 0;
+ $self->{insidePencillers} = 0;
+ $self->{insideColorists} = 1;
+ }
+ elsif (($tagname eq "a") && ($attr->{href} =~ /imprint.php/))
+ {
+ $self->{insidePublisher} = 1;
+ }
+ elsif (($tagname eq "a")
+ && ($attr->{href} =~ /publisher.php/)
+ && (!$self->{curInfo}->{publisher}))
+ {
+ $self->{insidePublisher} = 1;
+ }
+ elsif (($tagname eq "a") && ($attr->{href} =~ /coverdate.php/))
+ {
+ $self->{insideCoverDate} = 1;
+ }
+ if ( ($tagname eq "span")
+ && ((index $attr->{class}, "test") > -1)
+ && ((index $attr->{class}, "page_subheadline") > -1))
+ {
+ $self->{insideSubHeadline} = 1;
+ }
+ elsif (($tagname eq "a")
+ && ($attr->{href} =~ /^graphics\/comic_graphics\//))
+ {
+ $self->{curInfo}->{image} =
+ "http://www.comicbookdb.com/" . $attr->{href};
+ }
+ elsif (($tagname eq "img")
+ && ($attr->{src} =~ /^graphics\/comic_graphics\//)
+ && (!$self->{curInfo}->{image}))
+ {
+ $self->{curInfo}->{image} =
+ "http://www.comicbookdb.com/" . $attr->{src};
+ }
+
+ }
+ }
+ }
+
+ sub end
+ {
+ my ($self, $tagname) = @_;
+ $self->{inside}->{$tagname}--;
+
+ if ($self->{isResultsTable})
+ {
+ if ($tagname eq "table")
+ {
+ $self->{isResultsTable} = 0;
+ }
+ elsif ($tagname eq "tr")
+ {
+ $self->{resultsTableColumn} = 0;
+ }
+ }
+
+ if ($tagname eq "tbody")
+ {
+ $self->{isSpecialIssue} = 0;
+ }
+ elsif ($tagname eq "span")
+ {
+ $self->{insideHeadline} = 0;
+ $self->{insideSubHeadline} = 0;
+ $self->{insideNumber} = 0;
+ }
+ elsif ($tagname eq "td")
+ {
+ $self->{isAtResults} = 0;
+ $self->{nextisWriters} = 0;
+ $self->{nextisPencillers} = 0;
+ $self->{nextisColorists} = 0;
+ $self->{insideWriters} = 0;
+ $self->{insidePencillers} = 0;
+ $self->{insideColorists} = 0;
+ }
+ elsif ($tagname eq "a")
+ {
+ $self->{insidePublisher} = 0;
+ $self->{insideCoverDate} = 0;
+ }
+ }
+
+ sub text
+ {
+ my ($self, $origtext) = @_;
+
+ return if ($origtext eq " ");
+
+ return if ($self->{parsingEnded});
+
+ if ($self->{parsingList})
+ {
+ if ($self->{isCollection})
+ {
+ $self->{itemsList}[ $self->{itemIdx} ]->{series} = $origtext;
+ $self->{isCollection} = 0;
+ }
+ if ($origtext eq "Cover Date")
+ {
+ $self->{isResultsTable} = 1;
+ }
+ if ($self->{isIssue})
+ {
+ $self->{itemsList}[ $self->{itemIdx} ]->{volume} = $origtext;
+ $self->{isIssue} = 0;
+ }
+ if ($self->{isSpecialIssueNo})
+ {
+ $self->{specialIssueNo} = $origtext;
+ $self->{isSpecialIssueNo} = 0;
+ }
+ if ($self->{isTitle})
+ {
+ if ($self->{isSpecialIssue} == 2)
+ {
+ $self->{itemsList}[ $self->{itemIdx} ]->{volume} =
+ $self->{specialIssueNo};
+ $self->{itemsList}[ $self->{itemIdx} ]->{title} =
+ $self->{specialTitle} . $origtext;
+ }
+ else
+ {
+ $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext;
+ }
+ $self->{isTitle} = 0;
+ }
+ if ($self->{isSpecialTitle})
+ {
+ $self->{specialTitle} = $origtext;
+ $self->{isSpecialTitle} = 0;
+ }
+ }
+ else
+ {
+ if ($self->{insideName})
+ {
+ $self->{curInfo}->{series} = $origtext;
+ #$self->{curInfo}->{series} =~ s/(\s\([0-9]*\))$//;
+ $self->{insideName} = 0;
+ }
+ elsif (($self->{insideNumber}) && ($origtext =~ /^\s*#(\d+)/))
+ {
+ # volume where #XX is in <A HREF... tag, '-' is not
+ $self->{curInfo}->{volume} = $1;
+ $self->{insideNumber} = 0;
+ }
+ elsif (($self->{insideHeadline}) && ($origtext =~ /-\s#(\d+)/))
+ {
+ # volume where #XX isn't in <A HREF... tag
+ $self->{curInfo}->{volume} = $1;
+ $self->{insideNumber} = 0;
+ }
+ elsif (($self->{insideHeadline}) && ($origtext =~ /-\s*TPB/))
+ {
+ # Trade paperback
+ $self->{curInfo}->{series} .= " TPB";
+
+ # Get volume number. Default to 1.
+ if ($origtext =~ /vol\. (\d+)/)
+ {
+ $self->{curInfo}->{volume} = $1;
+ }
+ else
+ {
+ $self->{curInfo}->{volume} = 1;
+ }
+ $self->{insideNumber} = 0;
+ }
+ elsif (($self->{insideHeadline}) && ($origtext =~ /vol\. (\d+)/))
+ {
+ $self->{curInfo}->{volume} = $1;
+ $self->{insideNumber} = 0;
+ }
+ elsif (($self->{insideHeadline}) && ($origtext =~ /-\s*Annual\s*(\d+)/))
+ {
+ # Annual volume where #XX isn't in <A HREF... tag
+ $self->{curInfo}->{volume} = $1;
+ $self->{curInfo}->{series} .= " Annual";
+ $self->{insideNumber} = 0;
+ }
+ elsif (($self->{insideSubHeadline}) && ($origtext =~ /\"(.*)\"/))
+ {
+ $self->{curInfo}->{title} = $1;
+
+ # Get printing or other note if present
+ if ($origtext =~ /\((.*)\)/)
+ {
+ $self->{curInfo}->{title} .= " (" . $1 . ")";
+ }
+ }
+ elsif ($self->{insidePublisher})
+ {
+ $self->{curInfo}->{publisher} = $origtext;
+ $self->{insidePublisher} = 0;
+ }
+ elsif ($origtext eq "Writer(s):")
+ {
+ $self->{nextisWriters} = 1;
+ $self->{nextisPencillers} = 0;
+ $self->{nextisColorists} = 0;
+ }
+ elsif ($origtext eq "Penciller(s):")
+ {
+ $self->{nextisWriters} = 0;
+ $self->{nextisPencillers} = 1;
+ $self->{nextisColorists} = 0;
+ }
+ elsif ($origtext eq "Colorist(s):")
+ {
+ $self->{nextisWriters} = 0;
+ $self->{nextisPencillers} = 0;
+ $self->{nextisColorists} = 1;
+ }
+ elsif (($origtext eq "Letterer(s):")
+ || ($origtext eq "Inker(s):")
+ || ($origtext eq "Editor(s):")
+ || ($origtext eq "Cover Artist(s):")
+ || ($origtext eq "Characters:")
+ || ($origtext eq "Groups:"))
+ {
+ $self->{nextisWriters} = 0;
+ $self->{nextisPencillers} = 0;
+ $self->{nextisColorists} = 0;
+ }
+ elsif ($self->{insideWriters})
+ {
+ if ($self->{curInfo}->{writer} eq "")
+ {
+ $self->{curInfo}->{writer} = $origtext;
+ }
+ elsif ((index $self->{curInfo}->{writer}, $origtext) == -1)
+ {
+ $self->{curInfo}->{writer} .= ", ";
+ $self->{curInfo}->{writer} .= $origtext;
+ }
+
+ $self->{insideWriters} = 0;
+ }
+ elsif ($self->{insidePencillers})
+ {
+ if ($self->{curInfo}->{illustrator} eq "")
+ {
+ $self->{curInfo}->{illustrator} = $origtext;
+ }
+ elsif ((index $self->{curInfo}->{illustrator}, $origtext) == -1)
+ {
+ $self->{curInfo}->{illustrator} .= ", ";
+ $self->{curInfo}->{illustrator} .= $origtext;
+ }
+
+ $self->{insidePencillers} = 0;
+ }
+ elsif ($self->{insideColorists})
+ {
+ if ($self->{curInfo}->{colourist} eq "")
+ {
+ $self->{curInfo}->{colourist} = $origtext;
+ }
+ elsif ((index $self->{curInfo}->{colourist}, $origtext) == -1)
+ {
+ $self->{curInfo}->{colourist} .= ", ";
+ $self->{curInfo}->{colourist} .= $origtext;
+ }
+
+ $self->{insideColorists} = 0;
+ }
+ elsif ($origtext eq "Synopsis: ")
+ {
+ $self->{nextisSynopsis} = 1;
+ }
+ elsif ($self->{nextisSynopsis})
+ {
+ if ($origtext !~ /None entered./)
+ {
+ $self->{curInfo}->{synopsis} = $origtext;
+ $self->{curInfo}->{synopsis} =~ s/^(\s)*//;
+ $self->{curInfo}->{synopsis} =~ s/(\s)*$//;
+ }
+ $self->{nextisSynopsis} = 0;
+ }
+ elsif ($self->{insideCoverDate})
+ {
+ $self->{curInfo}->{printdate} = $origtext;
+ $self->{curInfo}->{printdate} =~ s/^(\s)*//;
+
+ # Translate date string to date
+ $self->{curInfo}->{printdate} =
+ GCUtils::strToTime($self->{curInfo}->{printdate}, "%B %Y");
+ $self->{curInfo}->{publishdate} = $self->{curInfo}->{printdate};
+ }
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+
+ $self->{ua}->cookie_jar(HTTP::Cookies->new);
+
+ bless($self, $class);
+
+ $self->{isResultsTable} = 0;
+ $self->{itemIdx} = 0;
+ $self->{resultsTableColumn} = 0;
+ $self->{curName} = undef;
+ $self->{curUrl} = undef;
+
+ return $self;
+ }
+
+ sub getReturnedFields
+ {
+ my $self = shift;
+
+ if ($self->{pass} == 1)
+ {
+ $self->{hasField} = {series => 1,};
+ }
+ else
+ {
+ $self->{hasField} = {
+ title => 1,
+ volume => 1,
+ };
+ }
+ }
+
+ sub preProcess
+ {
+ my ($self, $html) = @_;
+
+ $self->{parsingEnded} = 0;
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ($self, $word) = @_;
+
+ $word =~ s/\+%28\d{4}%29$//; # strip year from end of $word (title)
+
+ # Grab the home page first, or the pages fetched are blank
+ # (who knows why... must be something funky with the website)
+ my $response = $ua->get('http://www.comicbookdb.com/');
+
+ return
+ "http://www.comicbookdb.com/search.php?form_search=$word&form_searchtype=Title";
+ }
+
+ sub getItemUrl
+ {
+ my ($self, $url) = @_;
+ return $url if $url =~ /^http:/;
+
+ return "http://www.comicbookdb.com" . $url;
+ }
+
+ sub getNumberPasses
+ {
+ return 2;
+ }
+
+ sub getName
+ {
+ return "Comic Book DB";
+ }
+
+ sub getAuthor
+ {
+ return 'Zombiepig';
+ }
+
+ sub getLang
+ {
+ return 'EN';
+ }
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCcomics/GCcomicsCommon.pm b/lib/gcstar/GCPlugins/GCcomics/GCcomicsCommon.pm
new file mode 100644
index 0000000..3b1229c
--- /dev/null
+++ b/lib/gcstar/GCPlugins/GCcomics/GCcomicsCommon.pm
@@ -0,0 +1,49 @@
+package GCPlugins::GCcomics::GCcomicsCommon;
+
+###################################################
+#
+# Copyright 2005-2010 Christian Jodar
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+
+use GCPlugins::GCPluginsBase;
+
+{
+ package GCPlugins::GCcomics::GCcomicsPluginsBase;
+
+ use base qw(GCPluginParser);
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless ($self, $class);
+ return $self;
+ }
+
+ sub getSearchFieldsArray
+ {
+ return ['series'];
+ }
+}
+
+1;
diff --git a/lib/gcstar/GCPlugins/GCcomics/GCmangasanctuary.pm b/lib/gcstar/GCPlugins/GCcomics/GCmangasanctuary.pm
new file mode 100644
index 0000000..d05d0c8
--- /dev/null
+++ b/lib/gcstar/GCPlugins/GCcomics/GCmangasanctuary.pm
@@ -0,0 +1,503 @@
+package GCPlugins::GCcomics::GCmangasanctuary;
+
+###################################################
+#
+# Copyright 2005-2007 Tian
+#
+# This file is part of GCstar.
+#
+# GCstar is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# GCstar is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCstar; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+#
+###################################################
+
+use strict;
+use utf8;
+
+use GCPlugins::GCcomics::GCcomicsCommon;
+
+{
+
+ package GCPlugins::GCcomics::GCPluginmangasanctuary;
+
+ use LWP::Simple qw($ua);
+
+ use base qw(GCPlugins::GCcomics::GCcomicsPluginsBase);
+
+ sub start
+ {
+ my ( $self, $tagname, $attr, $attrseq, $origtext ) = @_;
+ if ( $self->{parsingList} )# partie en rapport à la page de résultats
+ {
+
+ #The interesting part to parse looks like this :
+ #<li class="row1"><a href="/manhwa-rebirth-vol-2-simple-s1397-p682.html">Rebirth #2</a> <span>Manhwa</span></li>
+ if ( $tagname eq "a" )
+ {
+ $self->{isDebut} = 1;
+ $self->{itemIdx}++;
+ $self->{itemsList}[ $self->{itemIdx} ]->{url} = "http://www.manga-sanctuary.com" . $attr->{href};
+ $attr->{href} =~ m/\/(.*?)-.*-vol-\d+-(.*?)-s\d+-p\d+.html/;
+ $self->{itemsList}[ $self->{itemIdx} ]->{type} = $1;
+ $self->{itemsList}[ $self->{itemIdx} ]->{format} = $2;
+ }
+ }
+ else# partie en rapport à la page de l'élément
+ {
+
+ #Commencer par récupérer l'image
+ #<a target="_blank" href="/couvertures/big/rebirth1gd.jpg"><img src="/couvertures/rebirth1gd.jpg"></a>
+ if ( ( $tagname eq "a" ) && ( $attr->{href} =~ m/couvertures.*\.[jJ][pP][gG]/ ) )
+ {
+ my $response = $ua->get("http://www.manga-sanctuary.com" . $attr->{href});
+ if ($response->content_type =~ m/text\/html/) #la grande image n'existe pas
+ {
+ $self->{downloadThumbnail} = 1;
+ }
+ else#la grande image existe
+ {
+ $self->{curInfo}->{image} = "http://www.manga-sanctuary.com" . $attr->{href};
+ }
+ }
+ if ( ( $tagname eq "img" ) && ( $attr->{src} =~ m/couvertures.*\.[jJ][pP][gG]/ ) && ($self->{downloadThumbnail} == 1) )
+ {
+ $self->{curInfo}->{image} = "http://www.manga-sanctuary.com" . $attr->{src};
+ $self->{downloadThumbnail} =0;
+ }
+ #Code général détection dt et dd
+ if ( $tagname eq "dt")
+ {
+ $self->{tagDTdetected} =1;
+ }elsif ( $tagname eq "dd")
+ {
+ $self->{tagDDdetected} =1;
+ }elsif ( $tagname eq "h3")
+ {
+ $self->{tagH3detected} =1;
+ }elsif ( $tagname eq "p")
+ {
+ $self->{tagPdetected} =1;
+ }elsif ( $tagname eq "a")
+ {
+ $self->{tagAdetected} =1;
+ }
+ #Code pour différencier les types de titres (original /français)
+ if ( ( $tagname eq "img") && ( $attr->{src} =~ m/\/design\/img\/flags/ ) && ($self->{titleDetected} == 1) )
+ {
+ $attr->{src} =~ m/\/(\d*)\.png$/;
+ if ($1 == 77)
+ {
+ $self->{titreFrancais} = 1;
+ }
+ else
+ {
+ $self->{titreFrancais} = 0;
+ }
+ }
+ #Code pour récupérer la notation
+ #<ul id="notation">\nStaff MS:<img src="/design/img/9.gif" title="8.5/10"/></ul>
+ if ( ( $tagname eq "ul") && ( $attr->{id} =~ m/notation/ ) )
+ {
+ $self->{notationDetected} = 1;
+ }elsif ( ( $tagname eq "img") && ( $self->{notationDetected} == 1 ) )
+ {
+ $attr->{title} =~ m/^(\d*\.?\d*)\/10/;
+ $self->{curInfo}->{rating} = $1;
+ $self->{notationDetected} = 0;
+
+ #Récupération du format dans l'adresse de la page.
+ #http://www.manga-sanctuary.com/manga-duds-hunt-vol-1-simple-s1169-p1477.html
+ #Peut être fait dès que webPage est renseigné, placé ici pour être sûr de n'être lancé qu'une seule fois.
+ $self->{curInfo}->{webPage} =~ m/vol-\d+-(.*?)-s\d+-p\d+\.html/;
+ $self->{curInfo}->{format} = $1;
+ }
+ }
+ }
+
+ sub end
+ {
+ my ( $self, $tagname ) = @_;
+ if ( $self->{parsingList} )# partie en rapport à la page de résultats
+ {
+ if ( ( $tagname eq "a" ) && $self->{isFin} == 1 )
+ {
+ #end of collection, next field is title
+ $self->{isFin} = 0;
+ }
+ }
+ else# partie en rapport à la page de l'élément
+ {
+ #Code général détection dt et dd
+ if ( $tagname eq "dt")
+ {
+ $self->{tagDTdetected} =0;
+ }elsif ( $tagname eq "dd")
+ {
+ $self->{tagDDdetected} =0;
+ #RAZ en cas de champ vide
+ $self->{titleDetected} =0;
+ $self->{titreFrancais} = 1;
+ $self->{publisherDetected} =0;
+ $self->{collectionDetected} =0;
+ $self->{publishdateDetected} =0;
+ $self->{costDetected} =0;
+ $self->{typeDetected} =0;
+ $self->{categoryDetected} =0;
+ $self->{genresDetected} =0;
+ $self->{scenaristeDetected} =0;
+ $self->{dessinateurDetected} =0;
+ }elsif ( $tagname eq "div")#Le code à récupérer pour un titre h3 donné se trouve après la balise <\h3> donc on ne peut pas l'utiliser.
+ {
+ $self->{tagH3detected} =0;
+ }elsif ( $tagname eq "p")
+ {
+ $self->{tagPdetected} =0;
+ #RAZ en cas de champ vide
+ $self->{synopsisDetected} =0;
+ $self->{critiquesDetected} =0;
+ $self->{reactionsDetected} =0;
+ }elsif ( $tagname eq "a")
+ {
+ $self->{tagAdetected} =0;
+ }elsif ( $tagname eq "ul" )
+ {
+ $self->{notationDetected} = 0;
+ }
+ }
+ }
+
+ sub text
+ {
+ my ( $self, $origtext ) = @_;
+
+ return if ( $origtext eq " " );
+
+ return if ( $self->{parsingEnded} );
+
+ if ( $self->{parsingList} )# partie en rapport à la page de résultats
+ {
+ if ( $self->{isDebut} )
+ {
+ $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext;
+ $self->{isDebut} = 0;
+ $self->{isFin} = 1;
+ }
+ }
+ else# partie en rapport à la page de l'élément
+ {
+
+ if ( $self->{tagDTdetected} == 1 )
+ {
+ #Title
+ #<dt><label>Titre <img src="/design/img/flags/112.png"></label></dt><dd>&#37507;&#22818; Last Order </dd><dt><label>Titre <img src="/design/img/flags/77.png"></label></dt><dd>Gunnm Last Order</dd>
+ if ($origtext =~ m/^Titre/)
+ {
+ $self->{titleDetected} =1;
+ }
+ #Volume
+ #<dt><label>Volume:</label></dt>\n<dd>1/23</dd>
+ elsif ($origtext =~ m/^Volume/)
+ {
+ $self->{volumeDetected} =1;
+ }
+ #Publisher
+ #<dt><label>Editeur:</label></dt>\n<dd><a href="http://www.manga-sanctuary.com/bdd/editeurs/6-glenat.html" title="Glénat">Glénat</a></dd>
+ elsif ($origtext =~ m/^Editeur/)
+ {
+ $self->{publisherDetected} =1;
+ }
+ #collection
+ #<dt><label>Label:</label></dt>\n<dd>Kana Shonen</dd>
+ elsif ($origtext =~ m/^Label/)
+ {
+ $self->{collectionDetected} =1;
+ }
+ #PublishDate
+ #<dt><label>Date de sortie:</label></dt>\n<dd>31/10/2002</dd>
+ elsif ($origtext =~ m/^Date de sortie/)
+ {
+ $self->{publishdateDetected} =1;
+ }
+ #cost
+ #<dt><label>Prix:</label></dt>\n<dd>6.5 EUR</dd>
+ elsif ($origtext =~ m/^Prix/)
+ {
+ $self->{costDetected} =1;
+ }
+ #type
+ #<dt><label>Type:</label></dt>\n<dd>Manga</dd>
+ elsif ($origtext =~ m/^Type/)
+ {
+ $self->{typeDetected} =1;
+ }
+ #category
+ #<dt><label>Catégorie:</label></dt>\n<dd>Seinen</dd>
+ elsif ($origtext =~ m/^Catégorie/)
+ {
+ $self->{categoryDetected} =1;
+ }
+ #Genres [NOTE: pas d'accès aux tags alors je le mets dans synopsis]
+ #<dt><label>Genres:</label></dt>\n<dd>Action, SF</dd>
+ elsif ($origtext =~ m/^Genres/)
+ {
+ $self->{genresDetected} =1;
+ }
+ #scenariste [de la fiche série]
+ #<dt><label>Scénariste</label></dt>
+ elsif ($origtext =~ m/^Scénariste/)
+ {
+ $self->{scenaristeDetected} =1;
+ }
+ #dessinateur [de la fiche série]
+ #<dt><label>Dessinateur</label></dt>
+ elsif ($origtext =~ m/^Dessinateur/)
+ {
+ $self->{dessinateurDetected} =1;
+ }
+ }
+
+ if ( $self->{tagDDdetected} == 1 )
+ {
+ if ($self->{titleDetected} == 1)
+ {
+ $origtext =~ m/^\s*(.*?)\s*$/;
+ if ($self->{titreFrancais} == 1)
+ {
+ #$self->{curInfo}->{title} = $1; #Je désactive le titre car c'est le même que la série
+ $self->{curInfo}->{series} = $1;
+ }
+ else
+ {
+ $self->{curInfo}->{synopsis} .= "Titre original :".$1."\n";
+ }
+ $self->{titleDetected} = 0;
+ }
+ elsif ($self->{volumeDetected} == 1)
+ {
+ $origtext =~ m/^(\d*)\//;
+ $self->{curInfo}->{volume} = $1;
+ $self->{volumeDetected} =0;
+ }
+ elsif ($self->{publisherDetected} == 1)
+ {
+ $self->{curInfo}->{publisher} = $origtext;
+ $self->{publisherDetected} =0;
+ }
+ elsif ($self->{collectionDetected} == 1)
+ {
+ $self->{curInfo}->{collection} = $origtext;
+ $self->{collectionDetected} =0;
+ }
+ elsif ($self->{publishdateDetected} == 1)
+ {
+ $self->{curInfo}->{publishdate} = $origtext;
+ $self->{publishdateDetected} =0;
+ }
+ elsif ($self->{costDetected} == 1)
+ {
+ $origtext =~ m/^\s*(\d*\.\d*)/;
+ $self->{curInfo}->{cost} = $1;
+ $self->{costDetected} =0;
+ }
+ elsif ($self->{typeDetected} == 1)
+ {
+ $self->{curInfo}->{type} = $origtext;
+ $self->{typeDetected} =0;
+ }
+ elsif ($self->{categoryDetected} == 1)
+ {
+ $self->{curInfo}->{category} = $origtext;
+ $self->{categoryDetected} =0;
+ }
+ elsif ($self->{genresDetected} == 1)
+ {
+ $origtext =~ m/^\s*(.*?)\s*$/;
+ $self->{curInfo}->{synopsis} .= "Genres : ".$1."\n\n";
+ $self->{genresDetected} =0;
+ }
+ }
+ if ( $self->{tagH3detected} == 1 )
+ {
+ #Code détection synopsis
+ # <h3><span>Synopsis</span></h3>
+ if ($origtext =~ m/^Synopsis/)
+ {
+ $self->{synopsisDetected} =1;
+ $self->{curInfo}->{synopsis} .= "Synopsis :\n"
+ }
+ #Code détection critiques
+ #<h3>Critiques du staff</h3>
+ elsif ($origtext =~ m/^Critiques du staff/)
+ {
+ $self->{critiquesDetected} =1;
+ $self->{curInfo}->{synopsis} .= "\n\nCritiques du staff :\n";
+ }
+ #Réactions désactivées car pas super intéressant
+ # #Code détection reactions
+ # #<h3>Réactions</h3>
+ # elsif ($origtext =~ m/^Réactions/)
+ # {
+ # $self->{reactionsDetected} =1;
+ # $self->{curInfo}->{synopsis} .= "\n\nRéactions :\n";
+ # }
+ }
+ if ( $self->{tagPdetected} == 1 )
+ {
+ if ($self->{synopsisDetected} == 1)
+ {
+ $origtext =~ m/^\s*(.*?)\s*$/;
+ $self->{curInfo}->{synopsis} .= $1."\n";
+ $self->{genresDetected} =0;
+ }elsif ($self->{critiquesDetected} == 1)
+ {
+ $origtext =~ m/^\s*(.*?)\s*$/;
+ $self->{curInfo}->{synopsis} .= $1."\n";
+ $self->{genresDetected} =0;
+ }
+ #Réactions désactivées car pas super intéressant
+ # elsif ($self->{reactionsDetected} == 1)
+ # {
+ # $origtext =~ m/^\s*(.*?)\s*$/;
+ # $self->{curInfo}->{synopsis} .= $1."\n";
+ # $self->{genresDetected} =0;
+ # }
+ }
+ if ( $self->{tagAdetected} == 1 )
+ {
+ if ($self->{scenaristeDetected} == 1)
+ {
+ $self->{curInfo}->{writer} = $origtext;
+ $self->{scenaristeDetected} =0;
+ }
+ elsif ($self->{dessinateurDetected} == 1)
+ {
+ $self->{curInfo}->{illustrator} = $origtext;
+ $self->{dessinateurDetected} =0;
+ }
+ }
+ }
+ }
+
+ sub new
+ {
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
+ my $self = $class->SUPER::new();
+ bless( $self, $class );
+#pour la recherche:
+# $self->{hasField} = {
+# series => 1,
+# title => 1,
+# volume => 1,
+# };
+ $self->{hasField} = {
+ title => 1,
+ type => 1,
+ format => 1,
+ };
+
+
+
+ $self->{itemIdx} = 0;
+ $self->{downloadThumbnail} = 0;
+ $self->{tagDTdetected} =0;
+ $self->{tagDDdetected} =0;
+ $self->{tagH3detected} =0;
+ $self->{tagPdetected} =0;
+ $self->{titleDetected} =0;
+ $self->{titreFrancais} = 1;#défaut francais
+ $self->{publisherDetected} =0;
+ $self->{collectionDetected} =0;
+ $self->{publishdateDetected} =0;
+ $self->{costDetected} =0;
+ $self->{typeDetected} =0;
+ $self->{categoryDetected} =0;
+ $self->{genresDetected} =0;
+ $self->{synopsisDetected} =0;
+ $self->{critiquesDetected} =0;
+ $self->{reactionsDetected} =0;
+ $self->{scenaristeDetected} =0;
+ $self->{dessinateurDetected} =0;
+ $self->{notationDetected} = 0;
+
+ return $self;
+ }
+
+ sub preProcess
+ {
+ my ( $self, $html ) = @_;
+
+ if ( $self->{parsingList} ) # partie en rapport à la page de résultats
+ {
+ #keep only Volumes
+ $html =~ m/<h3>Volumes\s\(\d+\)<\/h3>\s*(.*?)\s*<h3>Critiques/s;
+ $html = $1;
+ }
+ else # partie en rapport à la page de l'élément
+ {
+ $html =~ m/<div id="contenu">\s*(<ul id="menu_fiche">\s*<li><a href="(http:\/\/www.manga-sanctuary.com.*?)">.*?)\s*<h3><span>Mes actions<\/span><\/h3>/s;
+ $html = $1;
+
+ #récupération des infos de la fiche série
+ my $response = $ua->get($2);
+ $response->content =~ m/<h3><span>Staff<\/span><\/h3>\s*(.*?<\/dl>)/s;
+
+ $html .= "\n\n <fiche série>\n\n".$1;
+
+ }
+
+ return $html;
+ }
+
+ sub getSearchUrl
+ {
+ my ( $self, $word ) = @_;
+ $word =~ s/\+/ /g;
+ return ('http://www.manga-sanctuary.com/recherche/tout/', ['keywords' => $word]);
+
+ }
+
+ sub getItemUrl
+ {
+ my ( $self, $url ) = @_;
+ #Je fais le pari que cette partie n'est pas utilisée
+ # my @array = split( /#/, $url );
+ # $self->{site_internal_id} = $array[1];
+
+ return $url if $url =~ /^http:/;
+ return "http://www.manga-sanctuary.com" . $url;
+ }
+
+ sub getNumberPasses
+ {
+ return 1;
+ }
+
+ sub getName
+ {
+ return "Manga-Sanctuary";
+ }
+
+ sub getAuthor
+ {
+ return 'Biggriffon';
+ }
+
+ sub getLang
+ {
+ return 'FR';
+ }
+}
+
+1;