diff options
Diffstat (limited to 'lib/gcstar/GCPlugins/GCcomics')
-rw-r--r-- | lib/gcstar/GCPlugins/GCcomics/GCbedetheque.pm | 398 | ||||
-rw-r--r-- | lib/gcstar/GCPlugins/GCcomics/GCcomicbookdb.pm | 546 | ||||
-rw-r--r-- | lib/gcstar/GCPlugins/GCcomics/GCcomicsCommon.pm | 49 | ||||
-rw-r--r-- | lib/gcstar/GCPlugins/GCcomics/GCmangasanctuary.pm | 503 |
4 files changed, 1496 insertions, 0 deletions
diff --git a/lib/gcstar/GCPlugins/GCcomics/GCbedetheque.pm b/lib/gcstar/GCPlugins/GCcomics/GCbedetheque.pm new file mode 100644 index 0000000..457194a --- /dev/null +++ b/lib/gcstar/GCPlugins/GCcomics/GCbedetheque.pm @@ -0,0 +1,398 @@ +package GCPlugins::GCcomics::GCbedetheque; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCcomics::GCcomicsCommon; + +{ + + package GCPlugins::GCcomics::GCPluginbedetheque; + + use LWP::Simple qw($ua); + + use base qw(GCPlugins::GCcomics::GCcomicsPluginsBase); + sub getSearchUrl + { + my ( $self, $word ) = @_; + if ($self->{searchField} eq 'series') + { + return "http://www.bedetheque.com/index.php?R=1&RechSerie=$word"; + } + elsif ($self->{searchField} eq 'writer') + { + return "http://www.bedetheque.com/index.php?R=1&RechAuteur=$word"; + } + else + { + return ''; + } + + #return "http://www.bedetheque.com/index.php?R=1&RechTexte=$word"; + } + + sub getSearchFieldsArray + { + return ['series', 'writer']; + } + + sub getItemUrl + { + my ( $self, $url ) = @_; + my @array = split( /#/, $url ); + $self->{site_internal_id} = $array[1]; + + return $url if $url =~ /^http:/; + return "http://www.bedetheque.com/" . $url; + } + + sub getNumberPasses + { + return 1; + } + + sub getName + { + return "Bedetheque"; + } + + sub getAuthor + { + return 'Mckmonster'; + } + + sub getLang + { + return 'FR'; + } + + sub getSearchCharset + { + my $self = shift; + + # Need urls to be double character encoded + return "utf8"; + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless( $self, $class ); + + $self->{hasField} = { + series => 1, + title => 1, + volume => 1, + }; + + $self->{isResultsTable} = 0; + $self->{isCover} = 0; + $self->{itemIdx} = 0; + $self->{last_cover} = ""; + $self->{site_internal_id} = ""; + $self->{serie} = ""; + $self->{synopsis} = ""; + $self->{current_field} = ""; + + return $self; + } + + sub preProcess + { + my ( $self, $html ) = @_; + + $self->{parsingEnded} = 0; + $html =~ s/\s+/ /g; + $html =~ s/\r?\n//g; + + if ( $self->{parsingList} ) + { + if ( $html =~ m/(\d+\salbum\(s\).+)/ ) { + + #keep only albums, no series or objects + $html = $1; + $self->{alternative} = 0; + } elsif ( $html =~ m/(<div id="albums_table">.+)/ ) { + $html = $1; + $self->{alternative} = 1; + } + } + else + { + $html =~ m/(<div class="box main reeditions">.+)/; + + #$html =~ m/(<div class="album.+)/; + $html = $1; + $self->{isResultsTable} = 0; + $self->{parsingEnded} = 0; + $self->{isCover} = 0; + $self->{itemIdx}++;; + } + + return $html; + } + + sub start + { + my ( $self, $tagname, $attr, $attrseq, $origtext ) = @_; + + return if ( $self->{parsingEnded} ); + + if ( $self->{parsingList} ) + { + if ( !defined ($self->{alternative}) || (!$self->{alternative}) ) + { + if ( ( $tagname eq "a" ) && ( $attr->{href} =~ m/album-/ ) ) + { + $self->{isCollection} = 1; + $self->{itemIdx}++; + + my $searchUrl = substr($attr->{href},0,index($attr->{href},".")).substr($attr->{href},index($attr->{href},".")); + $self->{itemsList}[$self->{itemIdx}]->{url} = $searchUrl; + $self->{itemsList}[$self->{itemIdx}]->{title} = $attr->{title}; + + #$self->{itemsList}[ $self->{itemIdx} ]->{url} = + # "http://www.bedetheque.com/" . $attr->{href}; + } + elsif ( $tagname eq "i" ) + { + $self->{isSerie} = 1; + } + } else { + if ( ( $tagname eq "table" ) && ( $attr->{id} eq "albums_serie" ) ) { + $self->{inTable} = 1; + } + elsif ( ($self->{inTable}) && ( $tagname eq "td" ) && ( $attr->{class} eq "num" ) ) { + $self->{itemIdx}++; + $self->{isVolume} = 1; + } + elsif ( ($self->{inTable}) && ( $tagname eq "a" ) && ( $attr->{href} =~ m/serie-/ ) ) { + $self->{itemsList}[$self->{itemIdx}]->{url} = $attr->{href}; + $self->{isTitle} = 1; + } + elsif ( ( $self->{isSynopsis} ) && ( $tagname eq "br" ) && ( $self->{startSynopsis} ) ) { + + # This is a stop! for br ;-) and complementary of the p in the end section + # should be ( ( $tagname eq "p" ) || ( $tagname eq "br" ) ) + $self->{isSynopsis} = 0; + $self->{startSynopsis} = 0; + $self->{parsingEnded} = 1; + } + } + } + else + { + if ( $tagname eq "title") + { + $self->{isIssue} = 1; + $self->{isTitle} = 1; + } + + if ( ( $self->{isCover} == 0 ) && ( $tagname eq "a" ) && ( $attr->{href} =~ m/Couvertures\/.*\.[jJ][pP][gG]/ ) ) + { + $self->{curInfo}->{image} = 'http://www.bedetheque.com/' . $attr->{href}; + $self->{isCover} = 1; + } + elsif ( ( $tagname eq "div") && ( $attr->{class} eq "titre" ) ) { + $self->{isVolume} = 1; + } + elsif ( ( $tagname eq "ul") && ( $attr->{class} eq "infos" ) ) { + $self->{isResultsTable} = 1; + } + elsif ( ( $self->{isResultsTable} ) && ( $tagname eq "label" ) ) { + $self->{current_field} = ''; + $self->{openlabel} = 1; + } + elsif ( ( $tagname eq "div" ) && ( $attr->{class} eq "title" ) && ( !defined( $self->{curInfo}->{title} ) || ( $self->{curInfo}->{title} =~ /^$/ ) ) ) { + $self->{isTitle} = 1; + } + elsif ( ( $tagname eq "span" ) && ( $attr->{class} eq "type" ) ) { + $self->{isSerie} = 1; + } + elsif ( $tagname eq "em" ) { + $self->{isSynopsis} = 1; + } + elsif ( ( $tagname eq "a" ) && ( $attr->{class} eq "titre eo" ) ) { + if ( $attr->{title} =~ m/.+\s-(\d+)-\s.+/ ) { + $self->{curInfo}->{volume} = $1; + } + } + } + } + + sub text + { + my ( $self, $origtext ) = @_; + + return if ( $origtext eq " " ); + + return if ( $self->{parsingEnded} ); + + if ( $self->{parsingList} ) + { + if ( !defined ($self->{alternative}) || (!$self->{alternative}) ) { + if ( $self->{isSerie} == 1) + { + $self->{itemsList}[ $self->{itemIdx} ]->{series} = $origtext; + $self->{isSerie} = 0; + } + else + { + if ($self->{isCollection} == 1) + { + + #sometimes the field is "-vol-title", sometimes "--vol-title" + $origtext =~ s/-+/-/; + if ( $origtext =~ m/(.+)\s-(\d+)-\s(.+)/ ) { + $self->{itemsList}[ $self->{itemIdx} ]->{series} = $1; + $self->{itemsList}[ $self->{itemIdx} ]->{volume} = $2; + } elsif ( $origtext =~ /-/ ){ + my @fields = split( /-/, $origtext ); + $self->{itemsList}[ $self->{itemIdx} ]->{series} = $fields[0]; + $self->{itemsList}[ $self->{itemIdx} ]->{volume} = $fields[1]; + } + $self->{isCollection} = 0; + } + } + } else { + if ( ( $self->{inTable} ) && ( $self->{isTitle} ) ) { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + } elsif ( ( $self->{inTable} ) && ( $self->{isVolume} ) ) { + $self->{itemsList}[ $self->{itemIdx} ]->{volume} = $origtext; + } + } + } + else + { + if ( $self->{isResultsTable} == 1 ) + { + $origtext=~s/:\s+/:/; + my %td_fields_map = ( + "Identifiant :" => '', + "Scénario :" => 'writer', + "Dessin :" => 'illustrator', + "Couleurs :" => 'colourist', + "Dépot légal :" => 'publishdate', + "Achevé impr. :" => 'printdate ', + "Estimation :" => 'cost', + "Editeur :" => 'publisher', + "Collection : " => 'collection', + "Taille :" => 'format', + "ISBN :" => 'isbn', + "Planches :" => 'numberboards' + ); + + if ( ( $self->{openlabel} ) && ( exists $td_fields_map{$origtext} ) ) { + $self->{current_field} = $td_fields_map{$origtext}; + } + elsif ( defined ( $self->{current_field} ) && ( $self->{current_field} !~ /^$/ ) ) + { + $origtext=~s/ / /g; + $origtext=~s/\s+$//g; + $self->{curInfo}->{$self->{current_field}} = $origtext; + $self->{current_field} = ""; + } + } + elsif ( $self->{isVolume} ) + { + $self->{curInfo}->{volume} = $origtext; + $self->{isVolume} = 0 ; + } + + if ( $self->{isTitle} ) + { + $self->{curInfo}->{title} = $origtext; + } + elsif ( $self->{isSerie} ) { + $self->{curInfo}->{series} = $origtext; + $self->{curInfo}->{series} =~s/^\s+//; + } + elsif ( ( $self->{isSynopsis} ) && ( ( $origtext =~ /Résumé de l'album :/ ) || ( $origtext =~ /Résumé de la série :/ ) ) ) { + $self->{startSynopsis} = 1; + } + elsif ( ( $self->{isSynopsis} ) && ( $self->{startSynopsis} ) ) { + $self->{curInfo}->{synopsis} .= " ".$origtext; + $self->{curInfo}->{synopsis} =~ s/^(\s)*//; + $self->{curInfo}->{synopsis} =~ s/(\s)*$//; + } + } + } + + sub end + { + my ( $self, $tagname ) = @_; + + return if ( $self->{parsingEnded} ); + + if ( $self->{parsingList} ) + { + if ( !defined ($self->{alternative}) || (!$self->{alternative}) ) { + if ( ( $tagname eq "i" ) && $self->{isCollection} == 1) + { + + #end of collection, next field is title + $self->{isTitle} = 1; + $self->{isCollection} = 0; + } + } else { + if ( ( $self->{inTable} ) && ( $tagname eq "a" ) ) { + $self->{isTitle} = 0; + } elsif ( ( $self->{inTable} ) && ( $tagname eq "td" ) ) { + $self->{isVolume} = 0; + } + } + } + else + { + if ( ( $tagname eq "ul" ) && $self->{isResultsTable} == 1 ) + { + $self->{isIssue} = 0; + $self->{isResultsTable} = 0; + } + elsif ( ( $self->{isResultsTable} ) && ( $tagname eq "label" ) ) { + $self->{openlabel} = 0; + } + elsif ( ( $self->{isTitle} ) && ( ( $tagname eq "div" ) || ( $tagname eq "h1" ) ) ) { + $self->{isTitle} = 0; + } + elsif ( ( $self->{isSerie} ) && ( $tagname eq "a" ) ) { + $self->{isSerie} = 0; + } + elsif ( ( $self->{isSynopsis} ) && ( $tagname eq "em" ) && ( !$self->{startSynopsis} ) ) { + $self->{isSynopsis} = 0; + $self->{startSynopsis} = 0; + } + elsif ( ( $self->{isSynopsis} ) && ( ( $tagname eq "p" ) || ( $tagname eq "br" ) ) && ( $self->{startSynopsis} ) ) { + $self->{isSynopsis} = 0; + $self->{startSynopsis} = 0; + $self->{parsingEnded} = 1; + } + } + } +} + +1;
\ No newline at end of file diff --git a/lib/gcstar/GCPlugins/GCcomics/GCcomicbookdb.pm b/lib/gcstar/GCPlugins/GCcomics/GCcomicbookdb.pm new file mode 100644 index 0000000..80b299b --- /dev/null +++ b/lib/gcstar/GCPlugins/GCcomics/GCcomicbookdb.pm @@ -0,0 +1,546 @@ +package GCPlugins::GCcomics::GCcomicbookdb; + +################################################### +# +# Copyright 2005-2012 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCcomics::GCcomicsCommon; + +{ + + package GCPlugins::GCcomics::GCPlugincomicbookdb; + + use LWP::Simple qw($ua); + use HTTP::Cookies; + + use base qw(GCPlugins::GCcomics::GCcomicsPluginsBase); + + sub start + { + my ($self, $tagname, $attr, $attrseq, $origtext) = @_; + + if ($self->{pass} == 1) + { + # First pass, searching for series name + if ($tagname eq "h2") + { + $self->{isAtResults} = 1; + } + if ( ($tagname eq "a") + && ($self->{isAtResults}) + && !($attr->{href} =~ m/ebay\.com/)) + { + $self->{isCollection} = 1; + $self->{itemIdx}++; + + $self->{itemsList}[ $self->{itemIdx} ]->{nextUrl} = + "http://www.comicbookdb.com/" . $attr->{href}; + } + } + else + { + # Second pass, or fetching item info + if ($self->{parsingList}) + { + + if ( ($tagname eq "tbody") + && ($self->{isResultsTable}) + && ($self->{isSpecialIssue} == 1)) + { + $self->{isSpecialIssue} = 2; + } + # Parsing issue list + if (($tagname eq "a") && ($self->{isResultsTable})) + { + if ($attr->{href} =~ m/javascript/) + { + # Multiple editions of the one issue, need to be + # handled differently + $self->{isSpecialIssue} = 1; + } + elsif ($attr->{href} =~ m/storyarc.php/) + { + # Prevent story arcs from populating lists + } + elsif ($self->{isSpecialIssue} == 1) + { + $self->{resultsTableColumn}++; + if ($self->{resultsTableColumn} == 1) + { + $self->{isSpecialIssueNo} = 1; + $self->{isIssue} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = + "http://www.comicbookdb.com/" . $attr->{href}; + } + elsif ($self->{resultsTableColumn} == 2) + { + $self->{isTitle} = 1; + $self->{isSpecialTitle} = 1; + } + } + elsif ($self->{isSpecialIssue} == 2) + { + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = + "http://www.comicbookdb.com/" . $attr->{href}; + $self->{isTitle} = 1; + } + else + { + $self->{resultsTableColumn}++; + if ($self->{resultsTableColumn} == 1) + { + $self->{isIssue} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = + "http://www.comicbookdb.com/" . $attr->{href}; + } + elsif ($self->{resultsTableColumn} == 2) + { + $self->{isTitle} = 1; + } + } + } + } + else + { + # Fetching item info + if ( ($tagname eq "span") + && ((index $attr->{class}, "page_headline") > -1)) + { + $self->{insideHeadline} = 1; + } + elsif (($tagname eq "a") + && ($self->{insideHeadline}) + && ($attr->{href} =~ m/title.php/)) + { + $self->{insideName} = 1; + } + elsif (($tagname eq "a") + && ($self->{insideHeadline}) + && ($attr->{href} =~ m/issue_number.php/)) + { + $self->{insideNumber} = 1; + } + elsif (($tagname eq "a") && ($self->{nextisWriters})) + { + $self->{insideWriters} = 1; + $self->{insidePencillers} = 0; + $self->{insideColorists} = 0; + } + elsif (($tagname eq "a") && ($self->{nextisPencillers})) + { + $self->{insideWriters} = 0; + $self->{insidePencillers} = 1; + $self->{insideColorists} = 0; + } + elsif (($tagname eq "a") && ($self->{nextisColorists})) + { + $self->{insideWriters} = 0; + $self->{insidePencillers} = 0; + $self->{insideColorists} = 1; + } + elsif (($tagname eq "a") && ($attr->{href} =~ /imprint.php/)) + { + $self->{insidePublisher} = 1; + } + elsif (($tagname eq "a") + && ($attr->{href} =~ /publisher.php/) + && (!$self->{curInfo}->{publisher})) + { + $self->{insidePublisher} = 1; + } + elsif (($tagname eq "a") && ($attr->{href} =~ /coverdate.php/)) + { + $self->{insideCoverDate} = 1; + } + if ( ($tagname eq "span") + && ((index $attr->{class}, "test") > -1) + && ((index $attr->{class}, "page_subheadline") > -1)) + { + $self->{insideSubHeadline} = 1; + } + elsif (($tagname eq "a") + && ($attr->{href} =~ /^graphics\/comic_graphics\//)) + { + $self->{curInfo}->{image} = + "http://www.comicbookdb.com/" . $attr->{href}; + } + elsif (($tagname eq "img") + && ($attr->{src} =~ /^graphics\/comic_graphics\//) + && (!$self->{curInfo}->{image})) + { + $self->{curInfo}->{image} = + "http://www.comicbookdb.com/" . $attr->{src}; + } + + } + } + } + + sub end + { + my ($self, $tagname) = @_; + $self->{inside}->{$tagname}--; + + if ($self->{isResultsTable}) + { + if ($tagname eq "table") + { + $self->{isResultsTable} = 0; + } + elsif ($tagname eq "tr") + { + $self->{resultsTableColumn} = 0; + } + } + + if ($tagname eq "tbody") + { + $self->{isSpecialIssue} = 0; + } + elsif ($tagname eq "span") + { + $self->{insideHeadline} = 0; + $self->{insideSubHeadline} = 0; + $self->{insideNumber} = 0; + } + elsif ($tagname eq "td") + { + $self->{isAtResults} = 0; + $self->{nextisWriters} = 0; + $self->{nextisPencillers} = 0; + $self->{nextisColorists} = 0; + $self->{insideWriters} = 0; + $self->{insidePencillers} = 0; + $self->{insideColorists} = 0; + } + elsif ($tagname eq "a") + { + $self->{insidePublisher} = 0; + $self->{insideCoverDate} = 0; + } + } + + sub text + { + my ($self, $origtext) = @_; + + return if ($origtext eq " "); + + return if ($self->{parsingEnded}); + + if ($self->{parsingList}) + { + if ($self->{isCollection}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{series} = $origtext; + $self->{isCollection} = 0; + } + if ($origtext eq "Cover Date") + { + $self->{isResultsTable} = 1; + } + if ($self->{isIssue}) + { + $self->{itemsList}[ $self->{itemIdx} ]->{volume} = $origtext; + $self->{isIssue} = 0; + } + if ($self->{isSpecialIssueNo}) + { + $self->{specialIssueNo} = $origtext; + $self->{isSpecialIssueNo} = 0; + } + if ($self->{isTitle}) + { + if ($self->{isSpecialIssue} == 2) + { + $self->{itemsList}[ $self->{itemIdx} ]->{volume} = + $self->{specialIssueNo}; + $self->{itemsList}[ $self->{itemIdx} ]->{title} = + $self->{specialTitle} . $origtext; + } + else + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + } + $self->{isTitle} = 0; + } + if ($self->{isSpecialTitle}) + { + $self->{specialTitle} = $origtext; + $self->{isSpecialTitle} = 0; + } + } + else + { + if ($self->{insideName}) + { + $self->{curInfo}->{series} = $origtext; + #$self->{curInfo}->{series} =~ s/(\s\([0-9]*\))$//; + $self->{insideName} = 0; + } + elsif (($self->{insideNumber}) && ($origtext =~ /^\s*#(\d+)/)) + { + # volume where #XX is in <A HREF... tag, '-' is not + $self->{curInfo}->{volume} = $1; + $self->{insideNumber} = 0; + } + elsif (($self->{insideHeadline}) && ($origtext =~ /-\s#(\d+)/)) + { + # volume where #XX isn't in <A HREF... tag + $self->{curInfo}->{volume} = $1; + $self->{insideNumber} = 0; + } + elsif (($self->{insideHeadline}) && ($origtext =~ /-\s*TPB/)) + { + # Trade paperback + $self->{curInfo}->{series} .= " TPB"; + + # Get volume number. Default to 1. + if ($origtext =~ /vol\. (\d+)/) + { + $self->{curInfo}->{volume} = $1; + } + else + { + $self->{curInfo}->{volume} = 1; + } + $self->{insideNumber} = 0; + } + elsif (($self->{insideHeadline}) && ($origtext =~ /vol\. (\d+)/)) + { + $self->{curInfo}->{volume} = $1; + $self->{insideNumber} = 0; + } + elsif (($self->{insideHeadline}) && ($origtext =~ /-\s*Annual\s*(\d+)/)) + { + # Annual volume where #XX isn't in <A HREF... tag + $self->{curInfo}->{volume} = $1; + $self->{curInfo}->{series} .= " Annual"; + $self->{insideNumber} = 0; + } + elsif (($self->{insideSubHeadline}) && ($origtext =~ /\"(.*)\"/)) + { + $self->{curInfo}->{title} = $1; + + # Get printing or other note if present + if ($origtext =~ /\((.*)\)/) + { + $self->{curInfo}->{title} .= " (" . $1 . ")"; + } + } + elsif ($self->{insidePublisher}) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{insidePublisher} = 0; + } + elsif ($origtext eq "Writer(s):") + { + $self->{nextisWriters} = 1; + $self->{nextisPencillers} = 0; + $self->{nextisColorists} = 0; + } + elsif ($origtext eq "Penciller(s):") + { + $self->{nextisWriters} = 0; + $self->{nextisPencillers} = 1; + $self->{nextisColorists} = 0; + } + elsif ($origtext eq "Colorist(s):") + { + $self->{nextisWriters} = 0; + $self->{nextisPencillers} = 0; + $self->{nextisColorists} = 1; + } + elsif (($origtext eq "Letterer(s):") + || ($origtext eq "Inker(s):") + || ($origtext eq "Editor(s):") + || ($origtext eq "Cover Artist(s):") + || ($origtext eq "Characters:") + || ($origtext eq "Groups:")) + { + $self->{nextisWriters} = 0; + $self->{nextisPencillers} = 0; + $self->{nextisColorists} = 0; + } + elsif ($self->{insideWriters}) + { + if ($self->{curInfo}->{writer} eq "") + { + $self->{curInfo}->{writer} = $origtext; + } + elsif ((index $self->{curInfo}->{writer}, $origtext) == -1) + { + $self->{curInfo}->{writer} .= ", "; + $self->{curInfo}->{writer} .= $origtext; + } + + $self->{insideWriters} = 0; + } + elsif ($self->{insidePencillers}) + { + if ($self->{curInfo}->{illustrator} eq "") + { + $self->{curInfo}->{illustrator} = $origtext; + } + elsif ((index $self->{curInfo}->{illustrator}, $origtext) == -1) + { + $self->{curInfo}->{illustrator} .= ", "; + $self->{curInfo}->{illustrator} .= $origtext; + } + + $self->{insidePencillers} = 0; + } + elsif ($self->{insideColorists}) + { + if ($self->{curInfo}->{colourist} eq "") + { + $self->{curInfo}->{colourist} = $origtext; + } + elsif ((index $self->{curInfo}->{colourist}, $origtext) == -1) + { + $self->{curInfo}->{colourist} .= ", "; + $self->{curInfo}->{colourist} .= $origtext; + } + + $self->{insideColorists} = 0; + } + elsif ($origtext eq "Synopsis: ") + { + $self->{nextisSynopsis} = 1; + } + elsif ($self->{nextisSynopsis}) + { + if ($origtext !~ /None entered./) + { + $self->{curInfo}->{synopsis} = $origtext; + $self->{curInfo}->{synopsis} =~ s/^(\s)*//; + $self->{curInfo}->{synopsis} =~ s/(\s)*$//; + } + $self->{nextisSynopsis} = 0; + } + elsif ($self->{insideCoverDate}) + { + $self->{curInfo}->{printdate} = $origtext; + $self->{curInfo}->{printdate} =~ s/^(\s)*//; + + # Translate date string to date + $self->{curInfo}->{printdate} = + GCUtils::strToTime($self->{curInfo}->{printdate}, "%B %Y"); + $self->{curInfo}->{publishdate} = $self->{curInfo}->{printdate}; + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + + $self->{ua}->cookie_jar(HTTP::Cookies->new); + + bless($self, $class); + + $self->{isResultsTable} = 0; + $self->{itemIdx} = 0; + $self->{resultsTableColumn} = 0; + $self->{curName} = undef; + $self->{curUrl} = undef; + + return $self; + } + + sub getReturnedFields + { + my $self = shift; + + if ($self->{pass} == 1) + { + $self->{hasField} = {series => 1,}; + } + else + { + $self->{hasField} = { + title => 1, + volume => 1, + }; + } + } + + sub preProcess + { + my ($self, $html) = @_; + + $self->{parsingEnded} = 0; + + return $html; + } + + sub getSearchUrl + { + my ($self, $word) = @_; + + $word =~ s/\+%28\d{4}%29$//; # strip year from end of $word (title) + + # Grab the home page first, or the pages fetched are blank + # (who knows why... must be something funky with the website) + my $response = $ua->get('http://www.comicbookdb.com/'); + + return + "http://www.comicbookdb.com/search.php?form_search=$word&form_searchtype=Title"; + } + + sub getItemUrl + { + my ($self, $url) = @_; + return $url if $url =~ /^http:/; + + return "http://www.comicbookdb.com" . $url; + } + + sub getNumberPasses + { + return 2; + } + + sub getName + { + return "Comic Book DB"; + } + + sub getAuthor + { + return 'Zombiepig'; + } + + sub getLang + { + return 'EN'; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCcomics/GCcomicsCommon.pm b/lib/gcstar/GCPlugins/GCcomics/GCcomicsCommon.pm new file mode 100644 index 0000000..3b1229c --- /dev/null +++ b/lib/gcstar/GCPlugins/GCcomics/GCcomicsCommon.pm @@ -0,0 +1,49 @@ +package GCPlugins::GCcomics::GCcomicsCommon; + +################################################### +# +# Copyright 2005-2010 Christian Jodar +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; + +use GCPlugins::GCPluginsBase; + +{ + package GCPlugins::GCcomics::GCcomicsPluginsBase; + + use base qw(GCPluginParser); + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless ($self, $class); + return $self; + } + + sub getSearchFieldsArray + { + return ['series']; + } +} + +1; diff --git a/lib/gcstar/GCPlugins/GCcomics/GCmangasanctuary.pm b/lib/gcstar/GCPlugins/GCcomics/GCmangasanctuary.pm new file mode 100644 index 0000000..d05d0c8 --- /dev/null +++ b/lib/gcstar/GCPlugins/GCcomics/GCmangasanctuary.pm @@ -0,0 +1,503 @@ +package GCPlugins::GCcomics::GCmangasanctuary; + +################################################### +# +# Copyright 2005-2007 Tian +# +# This file is part of GCstar. +# +# GCstar is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# GCstar is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCstar; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA +# +################################################### + +use strict; +use utf8; + +use GCPlugins::GCcomics::GCcomicsCommon; + +{ + + package GCPlugins::GCcomics::GCPluginmangasanctuary; + + use LWP::Simple qw($ua); + + use base qw(GCPlugins::GCcomics::GCcomicsPluginsBase); + + sub start + { + my ( $self, $tagname, $attr, $attrseq, $origtext ) = @_; + if ( $self->{parsingList} )# partie en rapport à la page de résultats + { + + #The interesting part to parse looks like this : + #<li class="row1"><a href="/manhwa-rebirth-vol-2-simple-s1397-p682.html">Rebirth #2</a> <span>Manhwa</span></li> + if ( $tagname eq "a" ) + { + $self->{isDebut} = 1; + $self->{itemIdx}++; + $self->{itemsList}[ $self->{itemIdx} ]->{url} = "http://www.manga-sanctuary.com" . $attr->{href}; + $attr->{href} =~ m/\/(.*?)-.*-vol-\d+-(.*?)-s\d+-p\d+.html/; + $self->{itemsList}[ $self->{itemIdx} ]->{type} = $1; + $self->{itemsList}[ $self->{itemIdx} ]->{format} = $2; + } + } + else# partie en rapport à la page de l'élément + { + + #Commencer par récupérer l'image + #<a target="_blank" href="/couvertures/big/rebirth1gd.jpg"><img src="/couvertures/rebirth1gd.jpg"></a> + if ( ( $tagname eq "a" ) && ( $attr->{href} =~ m/couvertures.*\.[jJ][pP][gG]/ ) ) + { + my $response = $ua->get("http://www.manga-sanctuary.com" . $attr->{href}); + if ($response->content_type =~ m/text\/html/) #la grande image n'existe pas + { + $self->{downloadThumbnail} = 1; + } + else#la grande image existe + { + $self->{curInfo}->{image} = "http://www.manga-sanctuary.com" . $attr->{href}; + } + } + if ( ( $tagname eq "img" ) && ( $attr->{src} =~ m/couvertures.*\.[jJ][pP][gG]/ ) && ($self->{downloadThumbnail} == 1) ) + { + $self->{curInfo}->{image} = "http://www.manga-sanctuary.com" . $attr->{src}; + $self->{downloadThumbnail} =0; + } + #Code général détection dt et dd + if ( $tagname eq "dt") + { + $self->{tagDTdetected} =1; + }elsif ( $tagname eq "dd") + { + $self->{tagDDdetected} =1; + }elsif ( $tagname eq "h3") + { + $self->{tagH3detected} =1; + }elsif ( $tagname eq "p") + { + $self->{tagPdetected} =1; + }elsif ( $tagname eq "a") + { + $self->{tagAdetected} =1; + } + #Code pour différencier les types de titres (original /français) + if ( ( $tagname eq "img") && ( $attr->{src} =~ m/\/design\/img\/flags/ ) && ($self->{titleDetected} == 1) ) + { + $attr->{src} =~ m/\/(\d*)\.png$/; + if ($1 == 77) + { + $self->{titreFrancais} = 1; + } + else + { + $self->{titreFrancais} = 0; + } + } + #Code pour récupérer la notation + #<ul id="notation">\nStaff MS:<img src="/design/img/9.gif" title="8.5/10"/></ul> + if ( ( $tagname eq "ul") && ( $attr->{id} =~ m/notation/ ) ) + { + $self->{notationDetected} = 1; + }elsif ( ( $tagname eq "img") && ( $self->{notationDetected} == 1 ) ) + { + $attr->{title} =~ m/^(\d*\.?\d*)\/10/; + $self->{curInfo}->{rating} = $1; + $self->{notationDetected} = 0; + + #Récupération du format dans l'adresse de la page. + #http://www.manga-sanctuary.com/manga-duds-hunt-vol-1-simple-s1169-p1477.html + #Peut être fait dès que webPage est renseigné, placé ici pour être sûr de n'être lancé qu'une seule fois. + $self->{curInfo}->{webPage} =~ m/vol-\d+-(.*?)-s\d+-p\d+\.html/; + $self->{curInfo}->{format} = $1; + } + } + } + + sub end + { + my ( $self, $tagname ) = @_; + if ( $self->{parsingList} )# partie en rapport à la page de résultats + { + if ( ( $tagname eq "a" ) && $self->{isFin} == 1 ) + { + #end of collection, next field is title + $self->{isFin} = 0; + } + } + else# partie en rapport à la page de l'élément + { + #Code général détection dt et dd + if ( $tagname eq "dt") + { + $self->{tagDTdetected} =0; + }elsif ( $tagname eq "dd") + { + $self->{tagDDdetected} =0; + #RAZ en cas de champ vide + $self->{titleDetected} =0; + $self->{titreFrancais} = 1; + $self->{publisherDetected} =0; + $self->{collectionDetected} =0; + $self->{publishdateDetected} =0; + $self->{costDetected} =0; + $self->{typeDetected} =0; + $self->{categoryDetected} =0; + $self->{genresDetected} =0; + $self->{scenaristeDetected} =0; + $self->{dessinateurDetected} =0; + }elsif ( $tagname eq "div")#Le code à récupérer pour un titre h3 donné se trouve après la balise <\h3> donc on ne peut pas l'utiliser. + { + $self->{tagH3detected} =0; + }elsif ( $tagname eq "p") + { + $self->{tagPdetected} =0; + #RAZ en cas de champ vide + $self->{synopsisDetected} =0; + $self->{critiquesDetected} =0; + $self->{reactionsDetected} =0; + }elsif ( $tagname eq "a") + { + $self->{tagAdetected} =0; + }elsif ( $tagname eq "ul" ) + { + $self->{notationDetected} = 0; + } + } + } + + sub text + { + my ( $self, $origtext ) = @_; + + return if ( $origtext eq " " ); + + return if ( $self->{parsingEnded} ); + + if ( $self->{parsingList} )# partie en rapport à la page de résultats + { + if ( $self->{isDebut} ) + { + $self->{itemsList}[ $self->{itemIdx} ]->{title} = $origtext; + $self->{isDebut} = 0; + $self->{isFin} = 1; + } + } + else# partie en rapport à la page de l'élément + { + + if ( $self->{tagDTdetected} == 1 ) + { + #Title + #<dt><label>Titre <img src="/design/img/flags/112.png"></label></dt><dd>銃夢 Last Order </dd><dt><label>Titre <img src="/design/img/flags/77.png"></label></dt><dd>Gunnm Last Order</dd> + if ($origtext =~ m/^Titre/) + { + $self->{titleDetected} =1; + } + #Volume + #<dt><label>Volume:</label></dt>\n<dd>1/23</dd> + elsif ($origtext =~ m/^Volume/) + { + $self->{volumeDetected} =1; + } + #Publisher + #<dt><label>Editeur:</label></dt>\n<dd><a href="http://www.manga-sanctuary.com/bdd/editeurs/6-glenat.html" title="Glénat">Glénat</a></dd> + elsif ($origtext =~ m/^Editeur/) + { + $self->{publisherDetected} =1; + } + #collection + #<dt><label>Label:</label></dt>\n<dd>Kana Shonen</dd> + elsif ($origtext =~ m/^Label/) + { + $self->{collectionDetected} =1; + } + #PublishDate + #<dt><label>Date de sortie:</label></dt>\n<dd>31/10/2002</dd> + elsif ($origtext =~ m/^Date de sortie/) + { + $self->{publishdateDetected} =1; + } + #cost + #<dt><label>Prix:</label></dt>\n<dd>6.5 EUR</dd> + elsif ($origtext =~ m/^Prix/) + { + $self->{costDetected} =1; + } + #type + #<dt><label>Type:</label></dt>\n<dd>Manga</dd> + elsif ($origtext =~ m/^Type/) + { + $self->{typeDetected} =1; + } + #category + #<dt><label>Catégorie:</label></dt>\n<dd>Seinen</dd> + elsif ($origtext =~ m/^Catégorie/) + { + $self->{categoryDetected} =1; + } + #Genres [NOTE: pas d'accès aux tags alors je le mets dans synopsis] + #<dt><label>Genres:</label></dt>\n<dd>Action, SF</dd> + elsif ($origtext =~ m/^Genres/) + { + $self->{genresDetected} =1; + } + #scenariste [de la fiche série] + #<dt><label>Scénariste</label></dt> + elsif ($origtext =~ m/^Scénariste/) + { + $self->{scenaristeDetected} =1; + } + #dessinateur [de la fiche série] + #<dt><label>Dessinateur</label></dt> + elsif ($origtext =~ m/^Dessinateur/) + { + $self->{dessinateurDetected} =1; + } + } + + if ( $self->{tagDDdetected} == 1 ) + { + if ($self->{titleDetected} == 1) + { + $origtext =~ m/^\s*(.*?)\s*$/; + if ($self->{titreFrancais} == 1) + { + #$self->{curInfo}->{title} = $1; #Je désactive le titre car c'est le même que la série + $self->{curInfo}->{series} = $1; + } + else + { + $self->{curInfo}->{synopsis} .= "Titre original :".$1."\n"; + } + $self->{titleDetected} = 0; + } + elsif ($self->{volumeDetected} == 1) + { + $origtext =~ m/^(\d*)\//; + $self->{curInfo}->{volume} = $1; + $self->{volumeDetected} =0; + } + elsif ($self->{publisherDetected} == 1) + { + $self->{curInfo}->{publisher} = $origtext; + $self->{publisherDetected} =0; + } + elsif ($self->{collectionDetected} == 1) + { + $self->{curInfo}->{collection} = $origtext; + $self->{collectionDetected} =0; + } + elsif ($self->{publishdateDetected} == 1) + { + $self->{curInfo}->{publishdate} = $origtext; + $self->{publishdateDetected} =0; + } + elsif ($self->{costDetected} == 1) + { + $origtext =~ m/^\s*(\d*\.\d*)/; + $self->{curInfo}->{cost} = $1; + $self->{costDetected} =0; + } + elsif ($self->{typeDetected} == 1) + { + $self->{curInfo}->{type} = $origtext; + $self->{typeDetected} =0; + } + elsif ($self->{categoryDetected} == 1) + { + $self->{curInfo}->{category} = $origtext; + $self->{categoryDetected} =0; + } + elsif ($self->{genresDetected} == 1) + { + $origtext =~ m/^\s*(.*?)\s*$/; + $self->{curInfo}->{synopsis} .= "Genres : ".$1."\n\n"; + $self->{genresDetected} =0; + } + } + if ( $self->{tagH3detected} == 1 ) + { + #Code détection synopsis + # <h3><span>Synopsis</span></h3> + if ($origtext =~ m/^Synopsis/) + { + $self->{synopsisDetected} =1; + $self->{curInfo}->{synopsis} .= "Synopsis :\n" + } + #Code détection critiques + #<h3>Critiques du staff</h3> + elsif ($origtext =~ m/^Critiques du staff/) + { + $self->{critiquesDetected} =1; + $self->{curInfo}->{synopsis} .= "\n\nCritiques du staff :\n"; + } + #Réactions désactivées car pas super intéressant + # #Code détection reactions + # #<h3>Réactions</h3> + # elsif ($origtext =~ m/^Réactions/) + # { + # $self->{reactionsDetected} =1; + # $self->{curInfo}->{synopsis} .= "\n\nRéactions :\n"; + # } + } + if ( $self->{tagPdetected} == 1 ) + { + if ($self->{synopsisDetected} == 1) + { + $origtext =~ m/^\s*(.*?)\s*$/; + $self->{curInfo}->{synopsis} .= $1."\n"; + $self->{genresDetected} =0; + }elsif ($self->{critiquesDetected} == 1) + { + $origtext =~ m/^\s*(.*?)\s*$/; + $self->{curInfo}->{synopsis} .= $1."\n"; + $self->{genresDetected} =0; + } + #Réactions désactivées car pas super intéressant + # elsif ($self->{reactionsDetected} == 1) + # { + # $origtext =~ m/^\s*(.*?)\s*$/; + # $self->{curInfo}->{synopsis} .= $1."\n"; + # $self->{genresDetected} =0; + # } + } + if ( $self->{tagAdetected} == 1 ) + { + if ($self->{scenaristeDetected} == 1) + { + $self->{curInfo}->{writer} = $origtext; + $self->{scenaristeDetected} =0; + } + elsif ($self->{dessinateurDetected} == 1) + { + $self->{curInfo}->{illustrator} = $origtext; + $self->{dessinateurDetected} =0; + } + } + } + } + + sub new + { + my $proto = shift; + my $class = ref($proto) || $proto; + my $self = $class->SUPER::new(); + bless( $self, $class ); +#pour la recherche: +# $self->{hasField} = { +# series => 1, +# title => 1, +# volume => 1, +# }; + $self->{hasField} = { + title => 1, + type => 1, + format => 1, + }; + + + + $self->{itemIdx} = 0; + $self->{downloadThumbnail} = 0; + $self->{tagDTdetected} =0; + $self->{tagDDdetected} =0; + $self->{tagH3detected} =0; + $self->{tagPdetected} =0; + $self->{titleDetected} =0; + $self->{titreFrancais} = 1;#défaut francais + $self->{publisherDetected} =0; + $self->{collectionDetected} =0; + $self->{publishdateDetected} =0; + $self->{costDetected} =0; + $self->{typeDetected} =0; + $self->{categoryDetected} =0; + $self->{genresDetected} =0; + $self->{synopsisDetected} =0; + $self->{critiquesDetected} =0; + $self->{reactionsDetected} =0; + $self->{scenaristeDetected} =0; + $self->{dessinateurDetected} =0; + $self->{notationDetected} = 0; + + return $self; + } + + sub preProcess + { + my ( $self, $html ) = @_; + + if ( $self->{parsingList} ) # partie en rapport à la page de résultats + { + #keep only Volumes + $html =~ m/<h3>Volumes\s\(\d+\)<\/h3>\s*(.*?)\s*<h3>Critiques/s; + $html = $1; + } + else # partie en rapport à la page de l'élément + { + $html =~ m/<div id="contenu">\s*(<ul id="menu_fiche">\s*<li><a href="(http:\/\/www.manga-sanctuary.com.*?)">.*?)\s*<h3><span>Mes actions<\/span><\/h3>/s; + $html = $1; + + #récupération des infos de la fiche série + my $response = $ua->get($2); + $response->content =~ m/<h3><span>Staff<\/span><\/h3>\s*(.*?<\/dl>)/s; + + $html .= "\n\n <fiche série>\n\n".$1; + + } + + return $html; + } + + sub getSearchUrl + { + my ( $self, $word ) = @_; + $word =~ s/\+/ /g; + return ('http://www.manga-sanctuary.com/recherche/tout/', ['keywords' => $word]); + + } + + sub getItemUrl + { + my ( $self, $url ) = @_; + #Je fais le pari que cette partie n'est pas utilisée + # my @array = split( /#/, $url ); + # $self->{site_internal_id} = $array[1]; + + return $url if $url =~ /^http:/; + return "http://www.manga-sanctuary.com" . $url; + } + + sub getNumberPasses + { + return 1; + } + + sub getName + { + return "Manga-Sanctuary"; + } + + sub getAuthor + { + return 'Biggriffon'; + } + + sub getLang + { + return 'FR'; + } +} + +1; |